{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.836073797447188, "eval_steps": 500, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.573825316314587e-05, "grad_norm": 0.5498427152633667, "learning_rate": 3.3333333333333335e-07, "loss": 1.7989, "step": 1 }, { "epoch": 0.00011147650632629174, "grad_norm": 0.6303576827049255, "learning_rate": 6.666666666666667e-07, "loss": 1.996, "step": 2 }, { "epoch": 0.0001672147594894376, "grad_norm": 0.5333236455917358, "learning_rate": 1.0000000000000002e-06, "loss": 1.8613, "step": 3 }, { "epoch": 0.00022295301265258348, "grad_norm": 0.5659189224243164, "learning_rate": 1.3333333333333334e-06, "loss": 1.8904, "step": 4 }, { "epoch": 0.0002786912658157293, "grad_norm": 0.6221416592597961, "learning_rate": 1.6666666666666667e-06, "loss": 2.0151, "step": 5 }, { "epoch": 0.0003344295189788752, "grad_norm": 0.6198977828025818, "learning_rate": 2.0000000000000003e-06, "loss": 1.9774, "step": 6 }, { "epoch": 0.0003901677721420211, "grad_norm": 0.6328762173652649, "learning_rate": 2.3333333333333336e-06, "loss": 1.8994, "step": 7 }, { "epoch": 0.00044590602530516696, "grad_norm": 0.6075513362884521, "learning_rate": 2.666666666666667e-06, "loss": 1.894, "step": 8 }, { "epoch": 0.0005016442784683128, "grad_norm": 0.6397244930267334, "learning_rate": 3e-06, "loss": 2.0865, "step": 9 }, { "epoch": 0.0005573825316314586, "grad_norm": 0.6115519404411316, "learning_rate": 3.3333333333333333e-06, "loss": 1.9688, "step": 10 }, { "epoch": 0.0006131207847946045, "grad_norm": 0.546791672706604, "learning_rate": 3.666666666666667e-06, "loss": 1.8239, "step": 11 }, { "epoch": 0.0006688590379577504, "grad_norm": 0.690762996673584, "learning_rate": 4.000000000000001e-06, "loss": 2.0367, "step": 12 }, { "epoch": 0.0007245972911208963, "grad_norm": 0.7190566062927246, "learning_rate": 4.333333333333334e-06, "loss": 1.9817, "step": 13 }, { "epoch": 0.0007803355442840422, "grad_norm": 0.6093202233314514, "learning_rate": 4.666666666666667e-06, "loss": 2.01, "step": 14 }, { "epoch": 0.000836073797447188, "grad_norm": 0.5230669975280762, "learning_rate": 5e-06, "loss": 1.8419, "step": 15 }, { "epoch": 0.0008918120506103339, "grad_norm": 0.5391668677330017, "learning_rate": 5.333333333333334e-06, "loss": 1.8663, "step": 16 }, { "epoch": 0.0009475503037734797, "grad_norm": 0.6359019875526428, "learning_rate": 5.666666666666667e-06, "loss": 2.2089, "step": 17 }, { "epoch": 0.0010032885569366257, "grad_norm": 0.61967533826828, "learning_rate": 6e-06, "loss": 2.0842, "step": 18 }, { "epoch": 0.0010590268100997716, "grad_norm": 0.491642028093338, "learning_rate": 6.333333333333334e-06, "loss": 1.755, "step": 19 }, { "epoch": 0.0011147650632629172, "grad_norm": 0.7064740657806396, "learning_rate": 6.666666666666667e-06, "loss": 2.2494, "step": 20 }, { "epoch": 0.0011705033164260631, "grad_norm": 0.5671775937080383, "learning_rate": 7.000000000000001e-06, "loss": 2.0236, "step": 21 }, { "epoch": 0.001226241569589209, "grad_norm": 0.5698847770690918, "learning_rate": 7.333333333333334e-06, "loss": 1.8295, "step": 22 }, { "epoch": 0.001281979822752355, "grad_norm": 0.5910470485687256, "learning_rate": 7.666666666666667e-06, "loss": 2.1311, "step": 23 }, { "epoch": 0.0013377180759155008, "grad_norm": 0.567130446434021, "learning_rate": 8.000000000000001e-06, "loss": 1.888, "step": 24 }, { "epoch": 0.0013934563290786467, "grad_norm": 0.5540428757667542, "learning_rate": 8.333333333333334e-06, "loss": 1.6625, "step": 25 }, { "epoch": 0.0014491945822417925, "grad_norm": 0.5729663372039795, "learning_rate": 8.666666666666668e-06, "loss": 2.0062, "step": 26 }, { "epoch": 0.0015049328354049384, "grad_norm": 0.5232088565826416, "learning_rate": 9e-06, "loss": 1.7991, "step": 27 }, { "epoch": 0.0015606710885680843, "grad_norm": 0.5638092160224915, "learning_rate": 9.333333333333334e-06, "loss": 2.0728, "step": 28 }, { "epoch": 0.0016164093417312302, "grad_norm": 0.5504807829856873, "learning_rate": 9.666666666666667e-06, "loss": 1.808, "step": 29 }, { "epoch": 0.001672147594894376, "grad_norm": 0.5935587882995605, "learning_rate": 1e-05, "loss": 1.9738, "step": 30 }, { "epoch": 0.001727885848057522, "grad_norm": 0.6431534886360168, "learning_rate": 1.0333333333333333e-05, "loss": 2.0967, "step": 31 }, { "epoch": 0.0017836241012206678, "grad_norm": 0.5587693452835083, "learning_rate": 1.0666666666666667e-05, "loss": 1.6821, "step": 32 }, { "epoch": 0.0018393623543838135, "grad_norm": 0.5473759174346924, "learning_rate": 1.1000000000000001e-05, "loss": 1.8442, "step": 33 }, { "epoch": 0.0018951006075469594, "grad_norm": 0.6185194849967957, "learning_rate": 1.1333333333333334e-05, "loss": 2.0705, "step": 34 }, { "epoch": 0.0019508388607101053, "grad_norm": 0.5253747701644897, "learning_rate": 1.1666666666666668e-05, "loss": 1.7944, "step": 35 }, { "epoch": 0.0020065771138732514, "grad_norm": 0.5742389559745789, "learning_rate": 1.2e-05, "loss": 2.0, "step": 36 }, { "epoch": 0.0020623153670363973, "grad_norm": 0.6290589570999146, "learning_rate": 1.2333333333333334e-05, "loss": 2.1365, "step": 37 }, { "epoch": 0.002118053620199543, "grad_norm": 0.5194576382637024, "learning_rate": 1.2666666666666668e-05, "loss": 1.8569, "step": 38 }, { "epoch": 0.0021737918733626886, "grad_norm": 0.5665763020515442, "learning_rate": 1.3000000000000001e-05, "loss": 1.9313, "step": 39 }, { "epoch": 0.0022295301265258345, "grad_norm": 0.5268619060516357, "learning_rate": 1.3333333333333333e-05, "loss": 1.8843, "step": 40 }, { "epoch": 0.0022852683796889804, "grad_norm": 0.7840973734855652, "learning_rate": 1.3666666666666666e-05, "loss": 1.929, "step": 41 }, { "epoch": 0.0023410066328521262, "grad_norm": 0.5785960555076599, "learning_rate": 1.4000000000000001e-05, "loss": 2.0276, "step": 42 }, { "epoch": 0.002396744886015272, "grad_norm": 0.5202842354774475, "learning_rate": 1.4333333333333334e-05, "loss": 1.949, "step": 43 }, { "epoch": 0.002452483139178418, "grad_norm": 0.72431480884552, "learning_rate": 1.4666666666666668e-05, "loss": 2.2978, "step": 44 }, { "epoch": 0.002508221392341564, "grad_norm": 0.5558940768241882, "learning_rate": 1.5e-05, "loss": 1.9125, "step": 45 }, { "epoch": 0.00256395964550471, "grad_norm": 0.5687503814697266, "learning_rate": 1.5333333333333334e-05, "loss": 1.8533, "step": 46 }, { "epoch": 0.0026196978986678557, "grad_norm": 0.5703473091125488, "learning_rate": 1.5666666666666667e-05, "loss": 1.9015, "step": 47 }, { "epoch": 0.0026754361518310016, "grad_norm": 0.5496488809585571, "learning_rate": 1.6000000000000003e-05, "loss": 1.682, "step": 48 }, { "epoch": 0.0027311744049941474, "grad_norm": 0.6371431946754456, "learning_rate": 1.6333333333333335e-05, "loss": 2.0425, "step": 49 }, { "epoch": 0.0027869126581572933, "grad_norm": 0.6071433424949646, "learning_rate": 1.6666666666666667e-05, "loss": 1.8745, "step": 50 }, { "epoch": 0.002842650911320439, "grad_norm": 0.5981681942939758, "learning_rate": 1.7000000000000003e-05, "loss": 1.8872, "step": 51 }, { "epoch": 0.002898389164483585, "grad_norm": 0.6591808795928955, "learning_rate": 1.7333333333333336e-05, "loss": 2.0187, "step": 52 }, { "epoch": 0.002954127417646731, "grad_norm": 0.6213610172271729, "learning_rate": 1.7666666666666668e-05, "loss": 2.0231, "step": 53 }, { "epoch": 0.003009865670809877, "grad_norm": 0.6377214789390564, "learning_rate": 1.8e-05, "loss": 1.8641, "step": 54 }, { "epoch": 0.0030656039239730227, "grad_norm": 0.675821840763092, "learning_rate": 1.8333333333333333e-05, "loss": 2.215, "step": 55 }, { "epoch": 0.0031213421771361686, "grad_norm": 0.5989570021629333, "learning_rate": 1.866666666666667e-05, "loss": 1.9232, "step": 56 }, { "epoch": 0.0031770804302993145, "grad_norm": 0.6279881596565247, "learning_rate": 1.9e-05, "loss": 1.8452, "step": 57 }, { "epoch": 0.0032328186834624604, "grad_norm": 0.5670164227485657, "learning_rate": 1.9333333333333333e-05, "loss": 1.5623, "step": 58 }, { "epoch": 0.0032885569366256063, "grad_norm": 0.5822334289550781, "learning_rate": 1.9666666666666666e-05, "loss": 1.7901, "step": 59 }, { "epoch": 0.003344295189788752, "grad_norm": 0.6322411298751831, "learning_rate": 2e-05, "loss": 1.8802, "step": 60 }, { "epoch": 0.003400033442951898, "grad_norm": 0.6066840291023254, "learning_rate": 2.0333333333333334e-05, "loss": 1.8334, "step": 61 }, { "epoch": 0.003455771696115044, "grad_norm": 0.6801030039787292, "learning_rate": 2.0666666666666666e-05, "loss": 2.1029, "step": 62 }, { "epoch": 0.00351150994927819, "grad_norm": 0.6445280909538269, "learning_rate": 2.1e-05, "loss": 2.0333, "step": 63 }, { "epoch": 0.0035672482024413357, "grad_norm": 0.6259938478469849, "learning_rate": 2.1333333333333335e-05, "loss": 1.6012, "step": 64 }, { "epoch": 0.003622986455604481, "grad_norm": 0.6786999702453613, "learning_rate": 2.1666666666666667e-05, "loss": 2.0818, "step": 65 }, { "epoch": 0.003678724708767627, "grad_norm": 0.6728941202163696, "learning_rate": 2.2000000000000003e-05, "loss": 1.9022, "step": 66 }, { "epoch": 0.003734462961930773, "grad_norm": 0.6992253661155701, "learning_rate": 2.2333333333333335e-05, "loss": 1.7435, "step": 67 }, { "epoch": 0.003790201215093919, "grad_norm": 0.6083998084068298, "learning_rate": 2.2666666666666668e-05, "loss": 1.7816, "step": 68 }, { "epoch": 0.0038459394682570647, "grad_norm": 0.6070435643196106, "learning_rate": 2.3000000000000003e-05, "loss": 1.672, "step": 69 }, { "epoch": 0.0039016777214202106, "grad_norm": 0.6032823920249939, "learning_rate": 2.3333333333333336e-05, "loss": 1.771, "step": 70 }, { "epoch": 0.0039574159745833564, "grad_norm": 0.689372181892395, "learning_rate": 2.3666666666666668e-05, "loss": 1.9594, "step": 71 }, { "epoch": 0.004013154227746503, "grad_norm": 0.6333785653114319, "learning_rate": 2.4e-05, "loss": 1.8492, "step": 72 }, { "epoch": 0.004068892480909648, "grad_norm": 0.638140857219696, "learning_rate": 2.4333333333333336e-05, "loss": 1.798, "step": 73 }, { "epoch": 0.0041246307340727945, "grad_norm": 0.6000136137008667, "learning_rate": 2.466666666666667e-05, "loss": 1.6625, "step": 74 }, { "epoch": 0.00418036898723594, "grad_norm": 0.7654765248298645, "learning_rate": 2.5e-05, "loss": 2.1015, "step": 75 }, { "epoch": 0.004236107240399086, "grad_norm": 0.6845409870147705, "learning_rate": 2.5333333333333337e-05, "loss": 1.9176, "step": 76 }, { "epoch": 0.004291845493562232, "grad_norm": 0.6557128429412842, "learning_rate": 2.5666666666666666e-05, "loss": 1.8244, "step": 77 }, { "epoch": 0.004347583746725377, "grad_norm": 0.6574406027793884, "learning_rate": 2.6000000000000002e-05, "loss": 1.851, "step": 78 }, { "epoch": 0.0044033219998885235, "grad_norm": 0.6624826192855835, "learning_rate": 2.633333333333333e-05, "loss": 1.8332, "step": 79 }, { "epoch": 0.004459060253051669, "grad_norm": 0.7041051983833313, "learning_rate": 2.6666666666666667e-05, "loss": 1.8357, "step": 80 }, { "epoch": 0.004514798506214815, "grad_norm": 0.6737162470817566, "learning_rate": 2.7000000000000002e-05, "loss": 1.8162, "step": 81 }, { "epoch": 0.004570536759377961, "grad_norm": 0.6803858280181885, "learning_rate": 2.733333333333333e-05, "loss": 1.9187, "step": 82 }, { "epoch": 0.004626275012541107, "grad_norm": 0.6441910862922668, "learning_rate": 2.7666666666666667e-05, "loss": 1.9235, "step": 83 }, { "epoch": 0.0046820132657042525, "grad_norm": 0.6409979462623596, "learning_rate": 2.8000000000000003e-05, "loss": 1.9148, "step": 84 }, { "epoch": 0.004737751518867399, "grad_norm": 0.722623348236084, "learning_rate": 2.8333333333333335e-05, "loss": 1.9738, "step": 85 }, { "epoch": 0.004793489772030544, "grad_norm": 0.6637834310531616, "learning_rate": 2.8666666666666668e-05, "loss": 1.6872, "step": 86 }, { "epoch": 0.004849228025193691, "grad_norm": 0.7143079042434692, "learning_rate": 2.9e-05, "loss": 1.9944, "step": 87 }, { "epoch": 0.004904966278356836, "grad_norm": 0.7566176652908325, "learning_rate": 2.9333333333333336e-05, "loss": 1.7542, "step": 88 }, { "epoch": 0.004960704531519982, "grad_norm": 0.6472474932670593, "learning_rate": 2.9666666666666672e-05, "loss": 1.9534, "step": 89 }, { "epoch": 0.005016442784683128, "grad_norm": 0.6678224205970764, "learning_rate": 3e-05, "loss": 1.7684, "step": 90 }, { "epoch": 0.005072181037846274, "grad_norm": 0.6665822267532349, "learning_rate": 3.0333333333333337e-05, "loss": 1.9028, "step": 91 }, { "epoch": 0.00512791929100942, "grad_norm": 0.7907567620277405, "learning_rate": 3.066666666666667e-05, "loss": 1.8876, "step": 92 }, { "epoch": 0.005183657544172566, "grad_norm": 0.6738147735595703, "learning_rate": 3.1e-05, "loss": 1.7623, "step": 93 }, { "epoch": 0.005239395797335711, "grad_norm": 0.6898536086082458, "learning_rate": 3.1333333333333334e-05, "loss": 1.7103, "step": 94 }, { "epoch": 0.005295134050498858, "grad_norm": 0.6961106061935425, "learning_rate": 3.1666666666666666e-05, "loss": 1.537, "step": 95 }, { "epoch": 0.005350872303662003, "grad_norm": 0.6331319808959961, "learning_rate": 3.2000000000000005e-05, "loss": 1.6681, "step": 96 }, { "epoch": 0.005406610556825149, "grad_norm": 0.7678634524345398, "learning_rate": 3.233333333333333e-05, "loss": 2.1339, "step": 97 }, { "epoch": 0.005462348809988295, "grad_norm": 0.7012338638305664, "learning_rate": 3.266666666666667e-05, "loss": 1.7591, "step": 98 }, { "epoch": 0.005518087063151441, "grad_norm": 0.7289243340492249, "learning_rate": 3.3e-05, "loss": 1.901, "step": 99 }, { "epoch": 0.005573825316314587, "grad_norm": 0.6416298747062683, "learning_rate": 3.3333333333333335e-05, "loss": 1.5989, "step": 100 }, { "epoch": 0.005629563569477733, "grad_norm": 0.6193853616714478, "learning_rate": 3.366666666666667e-05, "loss": 1.7429, "step": 101 }, { "epoch": 0.005685301822640878, "grad_norm": 0.7283613681793213, "learning_rate": 3.4000000000000007e-05, "loss": 1.9885, "step": 102 }, { "epoch": 0.005741040075804025, "grad_norm": 0.6713369488716125, "learning_rate": 3.433333333333333e-05, "loss": 1.8521, "step": 103 }, { "epoch": 0.00579677832896717, "grad_norm": 0.6700227856636047, "learning_rate": 3.466666666666667e-05, "loss": 1.8404, "step": 104 }, { "epoch": 0.005852516582130316, "grad_norm": 0.6885061860084534, "learning_rate": 3.5e-05, "loss": 1.8081, "step": 105 }, { "epoch": 0.005908254835293462, "grad_norm": 0.6814194917678833, "learning_rate": 3.5333333333333336e-05, "loss": 1.8672, "step": 106 }, { "epoch": 0.005963993088456607, "grad_norm": 0.6492342948913574, "learning_rate": 3.566666666666667e-05, "loss": 1.7029, "step": 107 }, { "epoch": 0.006019731341619754, "grad_norm": 0.5920109748840332, "learning_rate": 3.6e-05, "loss": 1.5455, "step": 108 }, { "epoch": 0.006075469594782899, "grad_norm": 0.6685107946395874, "learning_rate": 3.633333333333333e-05, "loss": 1.9576, "step": 109 }, { "epoch": 0.0061312078479460455, "grad_norm": 0.6917557716369629, "learning_rate": 3.6666666666666666e-05, "loss": 1.9341, "step": 110 }, { "epoch": 0.006186946101109191, "grad_norm": 0.730872631072998, "learning_rate": 3.7e-05, "loss": 1.9828, "step": 111 }, { "epoch": 0.006242684354272337, "grad_norm": 0.7139527797698975, "learning_rate": 3.733333333333334e-05, "loss": 2.0277, "step": 112 }, { "epoch": 0.006298422607435483, "grad_norm": 0.6276320219039917, "learning_rate": 3.766666666666667e-05, "loss": 1.7702, "step": 113 }, { "epoch": 0.006354160860598629, "grad_norm": 0.6891281008720398, "learning_rate": 3.8e-05, "loss": 1.9062, "step": 114 }, { "epoch": 0.0064098991137617745, "grad_norm": 0.7155683636665344, "learning_rate": 3.8333333333333334e-05, "loss": 1.8527, "step": 115 }, { "epoch": 0.006465637366924921, "grad_norm": 0.6917515397071838, "learning_rate": 3.866666666666667e-05, "loss": 1.8439, "step": 116 }, { "epoch": 0.006521375620088066, "grad_norm": 0.7216237783432007, "learning_rate": 3.9000000000000006e-05, "loss": 2.0114, "step": 117 }, { "epoch": 0.0065771138732512125, "grad_norm": 0.6636412739753723, "learning_rate": 3.933333333333333e-05, "loss": 1.6951, "step": 118 }, { "epoch": 0.006632852126414358, "grad_norm": 0.7715172171592712, "learning_rate": 3.966666666666667e-05, "loss": 1.9907, "step": 119 }, { "epoch": 0.006688590379577504, "grad_norm": 0.6481485366821289, "learning_rate": 4e-05, "loss": 1.7934, "step": 120 }, { "epoch": 0.00674432863274065, "grad_norm": 0.6104344725608826, "learning_rate": 4.0333333333333336e-05, "loss": 1.6549, "step": 121 }, { "epoch": 0.006800066885903796, "grad_norm": 0.706912100315094, "learning_rate": 4.066666666666667e-05, "loss": 1.9666, "step": 122 }, { "epoch": 0.0068558051390669415, "grad_norm": 0.7835676670074463, "learning_rate": 4.1e-05, "loss": 2.024, "step": 123 }, { "epoch": 0.006911543392230088, "grad_norm": 0.6462398171424866, "learning_rate": 4.133333333333333e-05, "loss": 1.6993, "step": 124 }, { "epoch": 0.006967281645393233, "grad_norm": 0.7756698727607727, "learning_rate": 4.166666666666667e-05, "loss": 2.0135, "step": 125 }, { "epoch": 0.00702301989855638, "grad_norm": 0.6666940450668335, "learning_rate": 4.2e-05, "loss": 1.9444, "step": 126 }, { "epoch": 0.007078758151719525, "grad_norm": 0.6363375782966614, "learning_rate": 4.233333333333334e-05, "loss": 1.6977, "step": 127 }, { "epoch": 0.007134496404882671, "grad_norm": 0.6881687045097351, "learning_rate": 4.266666666666667e-05, "loss": 1.7938, "step": 128 }, { "epoch": 0.007190234658045817, "grad_norm": 0.7950214147567749, "learning_rate": 4.3e-05, "loss": 2.1036, "step": 129 }, { "epoch": 0.007245972911208962, "grad_norm": 0.6743674874305725, "learning_rate": 4.3333333333333334e-05, "loss": 2.0052, "step": 130 }, { "epoch": 0.007301711164372109, "grad_norm": 0.7302188277244568, "learning_rate": 4.3666666666666666e-05, "loss": 1.7815, "step": 131 }, { "epoch": 0.007357449417535254, "grad_norm": 0.691747784614563, "learning_rate": 4.4000000000000006e-05, "loss": 1.7225, "step": 132 }, { "epoch": 0.0074131876706984, "grad_norm": 0.6021103262901306, "learning_rate": 4.433333333333334e-05, "loss": 1.5821, "step": 133 }, { "epoch": 0.007468925923861546, "grad_norm": 0.7083866000175476, "learning_rate": 4.466666666666667e-05, "loss": 1.7831, "step": 134 }, { "epoch": 0.007524664177024692, "grad_norm": 0.6396238207817078, "learning_rate": 4.5e-05, "loss": 1.7933, "step": 135 }, { "epoch": 0.007580402430187838, "grad_norm": 0.6446027159690857, "learning_rate": 4.5333333333333335e-05, "loss": 1.697, "step": 136 }, { "epoch": 0.007636140683350984, "grad_norm": 0.6570568084716797, "learning_rate": 4.566666666666667e-05, "loss": 1.8226, "step": 137 }, { "epoch": 0.007691878936514129, "grad_norm": 0.7829813361167908, "learning_rate": 4.600000000000001e-05, "loss": 1.9071, "step": 138 }, { "epoch": 0.007747617189677276, "grad_norm": 0.6894962787628174, "learning_rate": 4.633333333333333e-05, "loss": 1.8796, "step": 139 }, { "epoch": 0.007803355442840421, "grad_norm": 0.6631702184677124, "learning_rate": 4.666666666666667e-05, "loss": 1.7765, "step": 140 }, { "epoch": 0.007859093696003567, "grad_norm": 0.7325467467308044, "learning_rate": 4.7e-05, "loss": 1.9653, "step": 141 }, { "epoch": 0.007914831949166713, "grad_norm": 0.7264820337295532, "learning_rate": 4.7333333333333336e-05, "loss": 1.9019, "step": 142 }, { "epoch": 0.00797057020232986, "grad_norm": 0.6573049426078796, "learning_rate": 4.766666666666667e-05, "loss": 1.8028, "step": 143 }, { "epoch": 0.008026308455493006, "grad_norm": 0.6475189328193665, "learning_rate": 4.8e-05, "loss": 1.8229, "step": 144 }, { "epoch": 0.00808204670865615, "grad_norm": 0.6277217864990234, "learning_rate": 4.8333333333333334e-05, "loss": 1.8648, "step": 145 }, { "epoch": 0.008137784961819296, "grad_norm": 0.6631461381912231, "learning_rate": 4.866666666666667e-05, "loss": 1.7499, "step": 146 }, { "epoch": 0.008193523214982443, "grad_norm": 0.8212792873382568, "learning_rate": 4.9e-05, "loss": 1.9345, "step": 147 }, { "epoch": 0.008249261468145589, "grad_norm": 0.6783550977706909, "learning_rate": 4.933333333333334e-05, "loss": 2.0028, "step": 148 }, { "epoch": 0.008304999721308734, "grad_norm": 0.7066723704338074, "learning_rate": 4.966666666666667e-05, "loss": 2.0291, "step": 149 }, { "epoch": 0.00836073797447188, "grad_norm": 0.772089958190918, "learning_rate": 5e-05, "loss": 2.0909, "step": 150 }, { "epoch": 0.008416476227635026, "grad_norm": 0.6396070718765259, "learning_rate": 5.0333333333333335e-05, "loss": 1.75, "step": 151 }, { "epoch": 0.008472214480798173, "grad_norm": 0.6549371480941772, "learning_rate": 5.0666666666666674e-05, "loss": 1.8499, "step": 152 }, { "epoch": 0.008527952733961317, "grad_norm": 0.7041524648666382, "learning_rate": 5.1000000000000006e-05, "loss": 1.9604, "step": 153 }, { "epoch": 0.008583690987124463, "grad_norm": 0.6144838929176331, "learning_rate": 5.133333333333333e-05, "loss": 1.813, "step": 154 }, { "epoch": 0.00863942924028761, "grad_norm": 0.5433954000473022, "learning_rate": 5.166666666666667e-05, "loss": 1.7692, "step": 155 }, { "epoch": 0.008695167493450754, "grad_norm": 0.6341120600700378, "learning_rate": 5.2000000000000004e-05, "loss": 1.8756, "step": 156 }, { "epoch": 0.0087509057466139, "grad_norm": 0.6475428938865662, "learning_rate": 5.2333333333333336e-05, "loss": 2.0465, "step": 157 }, { "epoch": 0.008806643999777047, "grad_norm": 0.6457498669624329, "learning_rate": 5.266666666666666e-05, "loss": 1.9387, "step": 158 }, { "epoch": 0.008862382252940193, "grad_norm": 0.562533974647522, "learning_rate": 5.300000000000001e-05, "loss": 1.7746, "step": 159 }, { "epoch": 0.008918120506103338, "grad_norm": 0.6415228247642517, "learning_rate": 5.333333333333333e-05, "loss": 1.7729, "step": 160 }, { "epoch": 0.008973858759266484, "grad_norm": 0.6404130458831787, "learning_rate": 5.3666666666666666e-05, "loss": 1.7488, "step": 161 }, { "epoch": 0.00902959701242963, "grad_norm": 0.6626627445220947, "learning_rate": 5.4000000000000005e-05, "loss": 1.8962, "step": 162 }, { "epoch": 0.009085335265592777, "grad_norm": 0.6191387176513672, "learning_rate": 5.433333333333334e-05, "loss": 1.8141, "step": 163 }, { "epoch": 0.009141073518755921, "grad_norm": 0.5454838871955872, "learning_rate": 5.466666666666666e-05, "loss": 1.5107, "step": 164 }, { "epoch": 0.009196811771919068, "grad_norm": 0.6767019033432007, "learning_rate": 5.500000000000001e-05, "loss": 2.1324, "step": 165 }, { "epoch": 0.009252550025082214, "grad_norm": 0.6267591714859009, "learning_rate": 5.5333333333333334e-05, "loss": 1.7378, "step": 166 }, { "epoch": 0.00930828827824536, "grad_norm": 0.5743867754936218, "learning_rate": 5.566666666666667e-05, "loss": 1.7654, "step": 167 }, { "epoch": 0.009364026531408505, "grad_norm": 0.5550642013549805, "learning_rate": 5.6000000000000006e-05, "loss": 1.8091, "step": 168 }, { "epoch": 0.009419764784571651, "grad_norm": 0.5943305492401123, "learning_rate": 5.633333333333334e-05, "loss": 1.6823, "step": 169 }, { "epoch": 0.009475503037734798, "grad_norm": 0.6027736663818359, "learning_rate": 5.666666666666667e-05, "loss": 1.7736, "step": 170 }, { "epoch": 0.009531241290897944, "grad_norm": 0.6379444003105164, "learning_rate": 5.6999999999999996e-05, "loss": 2.0331, "step": 171 }, { "epoch": 0.009586979544061089, "grad_norm": 0.6117588877677917, "learning_rate": 5.7333333333333336e-05, "loss": 1.8546, "step": 172 }, { "epoch": 0.009642717797224235, "grad_norm": 0.6109329462051392, "learning_rate": 5.766666666666667e-05, "loss": 2.0427, "step": 173 }, { "epoch": 0.009698456050387381, "grad_norm": 0.5530399084091187, "learning_rate": 5.8e-05, "loss": 1.7323, "step": 174 }, { "epoch": 0.009754194303550527, "grad_norm": 0.7092908024787903, "learning_rate": 5.833333333333334e-05, "loss": 2.2, "step": 175 }, { "epoch": 0.009809932556713672, "grad_norm": 0.5897237658500671, "learning_rate": 5.866666666666667e-05, "loss": 1.5879, "step": 176 }, { "epoch": 0.009865670809876818, "grad_norm": 0.5485551357269287, "learning_rate": 5.9e-05, "loss": 1.6043, "step": 177 }, { "epoch": 0.009921409063039965, "grad_norm": 0.5792586803436279, "learning_rate": 5.9333333333333343e-05, "loss": 1.8772, "step": 178 }, { "epoch": 0.009977147316203111, "grad_norm": 0.6716285943984985, "learning_rate": 5.966666666666667e-05, "loss": 1.7887, "step": 179 }, { "epoch": 0.010032885569366256, "grad_norm": 0.5866957902908325, "learning_rate": 6e-05, "loss": 1.7228, "step": 180 }, { "epoch": 0.010088623822529402, "grad_norm": 0.6197178363800049, "learning_rate": 6.033333333333334e-05, "loss": 1.7767, "step": 181 }, { "epoch": 0.010144362075692548, "grad_norm": 0.6811436414718628, "learning_rate": 6.066666666666667e-05, "loss": 2.002, "step": 182 }, { "epoch": 0.010200100328855693, "grad_norm": 0.6519239544868469, "learning_rate": 6.1e-05, "loss": 1.7755, "step": 183 }, { "epoch": 0.01025583858201884, "grad_norm": 0.5758973360061646, "learning_rate": 6.133333333333334e-05, "loss": 1.7244, "step": 184 }, { "epoch": 0.010311576835181985, "grad_norm": 0.5882923007011414, "learning_rate": 6.166666666666667e-05, "loss": 1.8041, "step": 185 }, { "epoch": 0.010367315088345132, "grad_norm": 0.5509873032569885, "learning_rate": 6.2e-05, "loss": 1.7813, "step": 186 }, { "epoch": 0.010423053341508276, "grad_norm": 0.5870537757873535, "learning_rate": 6.233333333333334e-05, "loss": 1.9419, "step": 187 }, { "epoch": 0.010478791594671423, "grad_norm": 0.5315700173377991, "learning_rate": 6.266666666666667e-05, "loss": 1.6804, "step": 188 }, { "epoch": 0.010534529847834569, "grad_norm": 0.5694735646247864, "learning_rate": 6.3e-05, "loss": 1.8406, "step": 189 }, { "epoch": 0.010590268100997715, "grad_norm": 0.5579227209091187, "learning_rate": 6.333333333333333e-05, "loss": 1.9451, "step": 190 }, { "epoch": 0.01064600635416086, "grad_norm": 0.5777730941772461, "learning_rate": 6.366666666666668e-05, "loss": 1.7783, "step": 191 }, { "epoch": 0.010701744607324006, "grad_norm": 0.5626804828643799, "learning_rate": 6.400000000000001e-05, "loss": 1.8944, "step": 192 }, { "epoch": 0.010757482860487153, "grad_norm": 0.5726325511932373, "learning_rate": 6.433333333333333e-05, "loss": 1.8799, "step": 193 }, { "epoch": 0.010813221113650299, "grad_norm": 0.6156812906265259, "learning_rate": 6.466666666666666e-05, "loss": 1.8651, "step": 194 }, { "epoch": 0.010868959366813443, "grad_norm": 0.545893669128418, "learning_rate": 6.500000000000001e-05, "loss": 1.6938, "step": 195 }, { "epoch": 0.01092469761997659, "grad_norm": 0.5374442934989929, "learning_rate": 6.533333333333334e-05, "loss": 1.756, "step": 196 }, { "epoch": 0.010980435873139736, "grad_norm": 0.5943235754966736, "learning_rate": 6.566666666666666e-05, "loss": 1.8388, "step": 197 }, { "epoch": 0.011036174126302882, "grad_norm": 0.7199476361274719, "learning_rate": 6.6e-05, "loss": 2.0311, "step": 198 }, { "epoch": 0.011091912379466027, "grad_norm": 0.65143883228302, "learning_rate": 6.633333333333334e-05, "loss": 2.0285, "step": 199 }, { "epoch": 0.011147650632629173, "grad_norm": 0.5984755754470825, "learning_rate": 6.666666666666667e-05, "loss": 1.7062, "step": 200 }, { "epoch": 0.01120338888579232, "grad_norm": 0.5733404755592346, "learning_rate": 6.7e-05, "loss": 1.916, "step": 201 }, { "epoch": 0.011259127138955466, "grad_norm": 0.5946204662322998, "learning_rate": 6.733333333333333e-05, "loss": 1.9394, "step": 202 }, { "epoch": 0.01131486539211861, "grad_norm": 0.677741527557373, "learning_rate": 6.766666666666667e-05, "loss": 2.248, "step": 203 }, { "epoch": 0.011370603645281757, "grad_norm": 0.5983121991157532, "learning_rate": 6.800000000000001e-05, "loss": 1.835, "step": 204 }, { "epoch": 0.011426341898444903, "grad_norm": 0.5219351053237915, "learning_rate": 6.833333333333333e-05, "loss": 1.7373, "step": 205 }, { "epoch": 0.01148208015160805, "grad_norm": 0.657131552696228, "learning_rate": 6.866666666666666e-05, "loss": 2.1801, "step": 206 }, { "epoch": 0.011537818404771194, "grad_norm": 0.6068251132965088, "learning_rate": 6.9e-05, "loss": 1.7873, "step": 207 }, { "epoch": 0.01159355665793434, "grad_norm": 0.5744972825050354, "learning_rate": 6.933333333333334e-05, "loss": 1.9491, "step": 208 }, { "epoch": 0.011649294911097487, "grad_norm": 0.5395380854606628, "learning_rate": 6.966666666666668e-05, "loss": 1.7532, "step": 209 }, { "epoch": 0.011705033164260631, "grad_norm": 0.5843316912651062, "learning_rate": 7e-05, "loss": 1.7694, "step": 210 }, { "epoch": 0.011760771417423778, "grad_norm": 0.6699615716934204, "learning_rate": 7.033333333333334e-05, "loss": 2.2063, "step": 211 }, { "epoch": 0.011816509670586924, "grad_norm": 0.5723788738250732, "learning_rate": 7.066666666666667e-05, "loss": 1.8842, "step": 212 }, { "epoch": 0.01187224792375007, "grad_norm": 0.5478008985519409, "learning_rate": 7.1e-05, "loss": 1.7411, "step": 213 }, { "epoch": 0.011927986176913215, "grad_norm": 0.567477285861969, "learning_rate": 7.133333333333334e-05, "loss": 1.8457, "step": 214 }, { "epoch": 0.011983724430076361, "grad_norm": 0.5568417310714722, "learning_rate": 7.166666666666667e-05, "loss": 1.8425, "step": 215 }, { "epoch": 0.012039462683239507, "grad_norm": 0.552416205406189, "learning_rate": 7.2e-05, "loss": 1.9535, "step": 216 }, { "epoch": 0.012095200936402654, "grad_norm": 0.6089819073677063, "learning_rate": 7.233333333333335e-05, "loss": 1.8465, "step": 217 }, { "epoch": 0.012150939189565798, "grad_norm": 0.6218812465667725, "learning_rate": 7.266666666666667e-05, "loss": 2.1711, "step": 218 }, { "epoch": 0.012206677442728945, "grad_norm": 0.5704020261764526, "learning_rate": 7.3e-05, "loss": 1.7793, "step": 219 }, { "epoch": 0.012262415695892091, "grad_norm": 0.5598061084747314, "learning_rate": 7.333333333333333e-05, "loss": 1.9454, "step": 220 }, { "epoch": 0.012318153949055237, "grad_norm": 0.5439260601997375, "learning_rate": 7.366666666666668e-05, "loss": 1.8544, "step": 221 }, { "epoch": 0.012373892202218382, "grad_norm": 0.5953371524810791, "learning_rate": 7.4e-05, "loss": 1.8335, "step": 222 }, { "epoch": 0.012429630455381528, "grad_norm": 0.5699326395988464, "learning_rate": 7.433333333333333e-05, "loss": 1.6647, "step": 223 }, { "epoch": 0.012485368708544674, "grad_norm": 0.5833302140235901, "learning_rate": 7.466666666666667e-05, "loss": 1.9092, "step": 224 }, { "epoch": 0.01254110696170782, "grad_norm": 0.5663686394691467, "learning_rate": 7.500000000000001e-05, "loss": 1.7344, "step": 225 }, { "epoch": 0.012596845214870965, "grad_norm": 0.5459832549095154, "learning_rate": 7.533333333333334e-05, "loss": 1.6805, "step": 226 }, { "epoch": 0.012652583468034112, "grad_norm": 0.6193357110023499, "learning_rate": 7.566666666666667e-05, "loss": 1.6711, "step": 227 }, { "epoch": 0.012708321721197258, "grad_norm": 0.6414167284965515, "learning_rate": 7.6e-05, "loss": 1.9194, "step": 228 }, { "epoch": 0.012764059974360404, "grad_norm": 0.541812539100647, "learning_rate": 7.633333333333334e-05, "loss": 1.9374, "step": 229 }, { "epoch": 0.012819798227523549, "grad_norm": 0.5368767976760864, "learning_rate": 7.666666666666667e-05, "loss": 1.605, "step": 230 }, { "epoch": 0.012875536480686695, "grad_norm": 0.622112512588501, "learning_rate": 7.7e-05, "loss": 1.804, "step": 231 }, { "epoch": 0.012931274733849842, "grad_norm": 0.5820221900939941, "learning_rate": 7.733333333333333e-05, "loss": 1.796, "step": 232 }, { "epoch": 0.012987012987012988, "grad_norm": 0.5530866980552673, "learning_rate": 7.766666666666667e-05, "loss": 1.704, "step": 233 }, { "epoch": 0.013042751240176132, "grad_norm": 0.5967001914978027, "learning_rate": 7.800000000000001e-05, "loss": 2.0598, "step": 234 }, { "epoch": 0.013098489493339279, "grad_norm": 0.5761673450469971, "learning_rate": 7.833333333333333e-05, "loss": 1.9391, "step": 235 }, { "epoch": 0.013154227746502425, "grad_norm": 0.582139253616333, "learning_rate": 7.866666666666666e-05, "loss": 1.851, "step": 236 }, { "epoch": 0.01320996599966557, "grad_norm": 0.6047868132591248, "learning_rate": 7.900000000000001e-05, "loss": 1.9757, "step": 237 }, { "epoch": 0.013265704252828716, "grad_norm": 0.6394466757774353, "learning_rate": 7.933333333333334e-05, "loss": 2.2063, "step": 238 }, { "epoch": 0.013321442505991862, "grad_norm": 0.6129965782165527, "learning_rate": 7.966666666666666e-05, "loss": 1.8813, "step": 239 }, { "epoch": 0.013377180759155009, "grad_norm": 0.5982023477554321, "learning_rate": 8e-05, "loss": 1.928, "step": 240 }, { "epoch": 0.013432919012318153, "grad_norm": 0.515180230140686, "learning_rate": 8.033333333333334e-05, "loss": 1.5582, "step": 241 }, { "epoch": 0.0134886572654813, "grad_norm": 0.669916033744812, "learning_rate": 8.066666666666667e-05, "loss": 2.1044, "step": 242 }, { "epoch": 0.013544395518644446, "grad_norm": 0.5825132131576538, "learning_rate": 8.1e-05, "loss": 1.7521, "step": 243 }, { "epoch": 0.013600133771807592, "grad_norm": 0.6118985414505005, "learning_rate": 8.133333333333334e-05, "loss": 1.9605, "step": 244 }, { "epoch": 0.013655872024970737, "grad_norm": 0.5747547745704651, "learning_rate": 8.166666666666667e-05, "loss": 1.8198, "step": 245 }, { "epoch": 0.013711610278133883, "grad_norm": 0.609553337097168, "learning_rate": 8.2e-05, "loss": 2.0001, "step": 246 }, { "epoch": 0.01376734853129703, "grad_norm": 0.5751491189002991, "learning_rate": 8.233333333333333e-05, "loss": 1.9317, "step": 247 }, { "epoch": 0.013823086784460176, "grad_norm": 0.599029541015625, "learning_rate": 8.266666666666667e-05, "loss": 1.7716, "step": 248 }, { "epoch": 0.01387882503762332, "grad_norm": 0.5347121953964233, "learning_rate": 8.3e-05, "loss": 1.82, "step": 249 }, { "epoch": 0.013934563290786467, "grad_norm": 0.5724605917930603, "learning_rate": 8.333333333333334e-05, "loss": 1.8309, "step": 250 }, { "epoch": 0.013990301543949613, "grad_norm": 0.531136691570282, "learning_rate": 8.366666666666668e-05, "loss": 1.682, "step": 251 }, { "epoch": 0.01404603979711276, "grad_norm": 0.5464481115341187, "learning_rate": 8.4e-05, "loss": 2.001, "step": 252 }, { "epoch": 0.014101778050275904, "grad_norm": 0.5945254564285278, "learning_rate": 8.433333333333334e-05, "loss": 1.7766, "step": 253 }, { "epoch": 0.01415751630343905, "grad_norm": 0.5452976226806641, "learning_rate": 8.466666666666667e-05, "loss": 1.6948, "step": 254 }, { "epoch": 0.014213254556602196, "grad_norm": 0.5722144842147827, "learning_rate": 8.5e-05, "loss": 1.8978, "step": 255 }, { "epoch": 0.014268992809765343, "grad_norm": 0.5629029870033264, "learning_rate": 8.533333333333334e-05, "loss": 1.7381, "step": 256 }, { "epoch": 0.014324731062928487, "grad_norm": 0.584661066532135, "learning_rate": 8.566666666666667e-05, "loss": 1.7016, "step": 257 }, { "epoch": 0.014380469316091634, "grad_norm": 0.544104814529419, "learning_rate": 8.6e-05, "loss": 1.8649, "step": 258 }, { "epoch": 0.01443620756925478, "grad_norm": 0.5734279751777649, "learning_rate": 8.633333333333334e-05, "loss": 1.7844, "step": 259 }, { "epoch": 0.014491945822417925, "grad_norm": 0.5523878335952759, "learning_rate": 8.666666666666667e-05, "loss": 2.0572, "step": 260 }, { "epoch": 0.014547684075581071, "grad_norm": 0.5634390115737915, "learning_rate": 8.7e-05, "loss": 1.8073, "step": 261 }, { "epoch": 0.014603422328744217, "grad_norm": 0.5875604152679443, "learning_rate": 8.733333333333333e-05, "loss": 1.9706, "step": 262 }, { "epoch": 0.014659160581907364, "grad_norm": 0.534288227558136, "learning_rate": 8.766666666666668e-05, "loss": 1.7742, "step": 263 }, { "epoch": 0.014714898835070508, "grad_norm": 0.5286023020744324, "learning_rate": 8.800000000000001e-05, "loss": 1.6763, "step": 264 }, { "epoch": 0.014770637088233654, "grad_norm": 0.5768111944198608, "learning_rate": 8.833333333333333e-05, "loss": 1.5731, "step": 265 }, { "epoch": 0.0148263753413968, "grad_norm": 0.552629292011261, "learning_rate": 8.866666666666668e-05, "loss": 1.9837, "step": 266 }, { "epoch": 0.014882113594559947, "grad_norm": 0.5081507563591003, "learning_rate": 8.900000000000001e-05, "loss": 1.8844, "step": 267 }, { "epoch": 0.014937851847723092, "grad_norm": 0.563845694065094, "learning_rate": 8.933333333333334e-05, "loss": 1.9141, "step": 268 }, { "epoch": 0.014993590100886238, "grad_norm": 0.5855246186256409, "learning_rate": 8.966666666666666e-05, "loss": 2.1101, "step": 269 }, { "epoch": 0.015049328354049384, "grad_norm": 0.5010532736778259, "learning_rate": 9e-05, "loss": 1.8388, "step": 270 }, { "epoch": 0.01510506660721253, "grad_norm": 0.5565475225448608, "learning_rate": 9.033333333333334e-05, "loss": 1.8648, "step": 271 }, { "epoch": 0.015160804860375675, "grad_norm": 0.5293692350387573, "learning_rate": 9.066666666666667e-05, "loss": 1.7059, "step": 272 }, { "epoch": 0.015216543113538821, "grad_norm": 0.5180760025978088, "learning_rate": 9.1e-05, "loss": 1.8659, "step": 273 }, { "epoch": 0.015272281366701968, "grad_norm": 0.5416427254676819, "learning_rate": 9.133333333333334e-05, "loss": 1.6187, "step": 274 }, { "epoch": 0.015328019619865114, "grad_norm": 0.603060781955719, "learning_rate": 9.166666666666667e-05, "loss": 1.8554, "step": 275 }, { "epoch": 0.015383757873028259, "grad_norm": 0.5260182023048401, "learning_rate": 9.200000000000001e-05, "loss": 1.8108, "step": 276 }, { "epoch": 0.015439496126191405, "grad_norm": 0.5307485461235046, "learning_rate": 9.233333333333333e-05, "loss": 1.7369, "step": 277 }, { "epoch": 0.015495234379354551, "grad_norm": 0.5671928524971008, "learning_rate": 9.266666666666666e-05, "loss": 1.7879, "step": 278 }, { "epoch": 0.015550972632517698, "grad_norm": 0.5482888221740723, "learning_rate": 9.300000000000001e-05, "loss": 1.8687, "step": 279 }, { "epoch": 0.015606710885680842, "grad_norm": 0.5492271184921265, "learning_rate": 9.333333333333334e-05, "loss": 2.0486, "step": 280 }, { "epoch": 0.01566244913884399, "grad_norm": 0.5533493757247925, "learning_rate": 9.366666666666668e-05, "loss": 1.8764, "step": 281 }, { "epoch": 0.015718187392007133, "grad_norm": 0.5373388528823853, "learning_rate": 9.4e-05, "loss": 1.8098, "step": 282 }, { "epoch": 0.01577392564517028, "grad_norm": 0.5737355351448059, "learning_rate": 9.433333333333334e-05, "loss": 1.8023, "step": 283 }, { "epoch": 0.015829663898333426, "grad_norm": 0.6059421896934509, "learning_rate": 9.466666666666667e-05, "loss": 1.9003, "step": 284 }, { "epoch": 0.015885402151496572, "grad_norm": 0.545070230960846, "learning_rate": 9.5e-05, "loss": 1.6793, "step": 285 }, { "epoch": 0.01594114040465972, "grad_norm": 0.5391154885292053, "learning_rate": 9.533333333333334e-05, "loss": 1.7691, "step": 286 }, { "epoch": 0.015996878657822865, "grad_norm": 0.5233768820762634, "learning_rate": 9.566666666666667e-05, "loss": 1.8312, "step": 287 }, { "epoch": 0.01605261691098601, "grad_norm": 0.5520955920219421, "learning_rate": 9.6e-05, "loss": 1.9652, "step": 288 }, { "epoch": 0.016108355164149154, "grad_norm": 0.5521306991577148, "learning_rate": 9.633333333333335e-05, "loss": 1.8264, "step": 289 }, { "epoch": 0.0161640934173123, "grad_norm": 0.5325077176094055, "learning_rate": 9.666666666666667e-05, "loss": 1.9074, "step": 290 }, { "epoch": 0.016219831670475447, "grad_norm": 0.5402048230171204, "learning_rate": 9.7e-05, "loss": 1.9993, "step": 291 }, { "epoch": 0.016275569923638593, "grad_norm": 0.5164310336112976, "learning_rate": 9.733333333333335e-05, "loss": 1.6385, "step": 292 }, { "epoch": 0.01633130817680174, "grad_norm": 0.5265329480171204, "learning_rate": 9.766666666666668e-05, "loss": 1.8513, "step": 293 }, { "epoch": 0.016387046429964885, "grad_norm": 0.5051769614219666, "learning_rate": 9.8e-05, "loss": 1.7628, "step": 294 }, { "epoch": 0.016442784683128032, "grad_norm": 0.5061401128768921, "learning_rate": 9.833333333333333e-05, "loss": 1.8406, "step": 295 }, { "epoch": 0.016498522936291178, "grad_norm": 0.6622328162193298, "learning_rate": 9.866666666666668e-05, "loss": 1.9504, "step": 296 }, { "epoch": 0.01655426118945432, "grad_norm": 0.5525157451629639, "learning_rate": 9.900000000000001e-05, "loss": 1.9845, "step": 297 }, { "epoch": 0.016609999442617467, "grad_norm": 0.5412437319755554, "learning_rate": 9.933333333333334e-05, "loss": 1.8234, "step": 298 }, { "epoch": 0.016665737695780614, "grad_norm": 0.53217613697052, "learning_rate": 9.966666666666667e-05, "loss": 1.6132, "step": 299 }, { "epoch": 0.01672147594894376, "grad_norm": 0.6531130075454712, "learning_rate": 0.0001, "loss": 2.0395, "step": 300 }, { "epoch": 0.016777214202106906, "grad_norm": 0.49301308393478394, "learning_rate": 9.999999920714576e-05, "loss": 1.6945, "step": 301 }, { "epoch": 0.016832952455270053, "grad_norm": 0.49394482374191284, "learning_rate": 9.999999682858307e-05, "loss": 1.6877, "step": 302 }, { "epoch": 0.0168886907084332, "grad_norm": 0.504688024520874, "learning_rate": 9.9999992864312e-05, "loss": 1.6779, "step": 303 }, { "epoch": 0.016944428961596345, "grad_norm": 0.5286409258842468, "learning_rate": 9.999998731433267e-05, "loss": 1.64, "step": 304 }, { "epoch": 0.017000167214759488, "grad_norm": 0.4911554157733917, "learning_rate": 9.999998017864527e-05, "loss": 1.66, "step": 305 }, { "epoch": 0.017055905467922634, "grad_norm": 0.4851885735988617, "learning_rate": 9.999997145725001e-05, "loss": 1.8884, "step": 306 }, { "epoch": 0.01711164372108578, "grad_norm": 0.521120011806488, "learning_rate": 9.999996115014719e-05, "loss": 1.6844, "step": 307 }, { "epoch": 0.017167381974248927, "grad_norm": 0.5494885444641113, "learning_rate": 9.99999492573371e-05, "loss": 1.7733, "step": 308 }, { "epoch": 0.017223120227412073, "grad_norm": 0.4475904703140259, "learning_rate": 9.999993577882016e-05, "loss": 1.6295, "step": 309 }, { "epoch": 0.01727885848057522, "grad_norm": 0.4610547721385956, "learning_rate": 9.999992071459676e-05, "loss": 1.6118, "step": 310 }, { "epoch": 0.017334596733738366, "grad_norm": 0.49445369839668274, "learning_rate": 9.999990406466741e-05, "loss": 1.594, "step": 311 }, { "epoch": 0.01739033498690151, "grad_norm": 0.5013507008552551, "learning_rate": 9.999988582903262e-05, "loss": 1.6829, "step": 312 }, { "epoch": 0.017446073240064655, "grad_norm": 0.5492314100265503, "learning_rate": 9.999986600769295e-05, "loss": 1.662, "step": 313 }, { "epoch": 0.0175018114932278, "grad_norm": 0.49456071853637695, "learning_rate": 9.999984460064908e-05, "loss": 1.7087, "step": 314 }, { "epoch": 0.017557549746390948, "grad_norm": 0.587954580783844, "learning_rate": 9.999982160790164e-05, "loss": 1.8628, "step": 315 }, { "epoch": 0.017613287999554094, "grad_norm": 0.6061418652534485, "learning_rate": 9.999979702945138e-05, "loss": 2.143, "step": 316 }, { "epoch": 0.01766902625271724, "grad_norm": 0.52556973695755, "learning_rate": 9.999977086529909e-05, "loss": 1.6862, "step": 317 }, { "epoch": 0.017724764505880387, "grad_norm": 0.5804201364517212, "learning_rate": 9.999974311544556e-05, "loss": 1.8495, "step": 318 }, { "epoch": 0.017780502759043533, "grad_norm": 0.5533789396286011, "learning_rate": 9.999971377989172e-05, "loss": 1.9501, "step": 319 }, { "epoch": 0.017836241012206676, "grad_norm": 0.5596528649330139, "learning_rate": 9.999968285863848e-05, "loss": 1.981, "step": 320 }, { "epoch": 0.017891979265369822, "grad_norm": 0.538735568523407, "learning_rate": 9.99996503516868e-05, "loss": 1.9126, "step": 321 }, { "epoch": 0.01794771751853297, "grad_norm": 0.48604801297187805, "learning_rate": 9.999961625903774e-05, "loss": 1.7568, "step": 322 }, { "epoch": 0.018003455771696115, "grad_norm": 0.5091099143028259, "learning_rate": 9.999958058069237e-05, "loss": 1.9625, "step": 323 }, { "epoch": 0.01805919402485926, "grad_norm": 0.4944256842136383, "learning_rate": 9.999954331665182e-05, "loss": 1.6326, "step": 324 }, { "epoch": 0.018114932278022407, "grad_norm": 0.5379263162612915, "learning_rate": 9.999950446691728e-05, "loss": 1.8484, "step": 325 }, { "epoch": 0.018170670531185554, "grad_norm": 0.5548909306526184, "learning_rate": 9.999946403148997e-05, "loss": 1.8855, "step": 326 }, { "epoch": 0.0182264087843487, "grad_norm": 0.5878908634185791, "learning_rate": 9.999942201037118e-05, "loss": 1.8222, "step": 327 }, { "epoch": 0.018282147037511843, "grad_norm": 0.48953092098236084, "learning_rate": 9.999937840356224e-05, "loss": 1.4395, "step": 328 }, { "epoch": 0.01833788529067499, "grad_norm": 0.503923237323761, "learning_rate": 9.999933321106452e-05, "loss": 1.7122, "step": 329 }, { "epoch": 0.018393623543838136, "grad_norm": 0.5150753855705261, "learning_rate": 9.999928643287948e-05, "loss": 1.8863, "step": 330 }, { "epoch": 0.018449361797001282, "grad_norm": 0.5160688757896423, "learning_rate": 9.999923806900859e-05, "loss": 1.8184, "step": 331 }, { "epoch": 0.018505100050164428, "grad_norm": 0.5423057079315186, "learning_rate": 9.99991881194534e-05, "loss": 1.843, "step": 332 }, { "epoch": 0.018560838303327575, "grad_norm": 0.5026907324790955, "learning_rate": 9.999913658421544e-05, "loss": 1.7728, "step": 333 }, { "epoch": 0.01861657655649072, "grad_norm": 0.5391967296600342, "learning_rate": 9.999908346329642e-05, "loss": 1.9225, "step": 334 }, { "epoch": 0.018672314809653867, "grad_norm": 0.5050860047340393, "learning_rate": 9.999902875669797e-05, "loss": 1.7579, "step": 335 }, { "epoch": 0.01872805306281701, "grad_norm": 0.48109737038612366, "learning_rate": 9.999897246442184e-05, "loss": 1.8859, "step": 336 }, { "epoch": 0.018783791315980156, "grad_norm": 0.5002635717391968, "learning_rate": 9.999891458646983e-05, "loss": 1.6809, "step": 337 }, { "epoch": 0.018839529569143303, "grad_norm": 0.5138371586799622, "learning_rate": 9.999885512284375e-05, "loss": 1.7961, "step": 338 }, { "epoch": 0.01889526782230645, "grad_norm": 0.47246232628822327, "learning_rate": 9.999879407354551e-05, "loss": 1.6943, "step": 339 }, { "epoch": 0.018951006075469595, "grad_norm": 0.47807106375694275, "learning_rate": 9.999873143857704e-05, "loss": 1.7652, "step": 340 }, { "epoch": 0.01900674432863274, "grad_norm": 0.4725436270236969, "learning_rate": 9.99986672179403e-05, "loss": 1.7483, "step": 341 }, { "epoch": 0.019062482581795888, "grad_norm": 0.5131480693817139, "learning_rate": 9.999860141163736e-05, "loss": 1.8883, "step": 342 }, { "epoch": 0.01911822083495903, "grad_norm": 0.6150394678115845, "learning_rate": 9.99985340196703e-05, "loss": 2.1536, "step": 343 }, { "epoch": 0.019173959088122177, "grad_norm": 0.5729528069496155, "learning_rate": 9.999846504204124e-05, "loss": 1.9443, "step": 344 }, { "epoch": 0.019229697341285323, "grad_norm": 0.4936676323413849, "learning_rate": 9.999839447875238e-05, "loss": 1.7273, "step": 345 }, { "epoch": 0.01928543559444847, "grad_norm": 0.5480337738990784, "learning_rate": 9.999832232980597e-05, "loss": 1.8024, "step": 346 }, { "epoch": 0.019341173847611616, "grad_norm": 0.4883441925048828, "learning_rate": 9.999824859520428e-05, "loss": 1.6531, "step": 347 }, { "epoch": 0.019396912100774762, "grad_norm": 0.6438686847686768, "learning_rate": 9.999817327494967e-05, "loss": 2.1477, "step": 348 }, { "epoch": 0.01945265035393791, "grad_norm": 0.540684700012207, "learning_rate": 9.999809636904449e-05, "loss": 2.0333, "step": 349 }, { "epoch": 0.019508388607101055, "grad_norm": 0.5322266221046448, "learning_rate": 9.999801787749121e-05, "loss": 1.7542, "step": 350 }, { "epoch": 0.019564126860264198, "grad_norm": 0.5497377514839172, "learning_rate": 9.999793780029232e-05, "loss": 1.9207, "step": 351 }, { "epoch": 0.019619865113427344, "grad_norm": 0.5375553369522095, "learning_rate": 9.999785613745035e-05, "loss": 1.8293, "step": 352 }, { "epoch": 0.01967560336659049, "grad_norm": 0.5242462754249573, "learning_rate": 9.999777288896787e-05, "loss": 1.8176, "step": 353 }, { "epoch": 0.019731341619753637, "grad_norm": 0.5194500088691711, "learning_rate": 9.999768805484757e-05, "loss": 1.961, "step": 354 }, { "epoch": 0.019787079872916783, "grad_norm": 0.4952162504196167, "learning_rate": 9.999760163509209e-05, "loss": 1.6902, "step": 355 }, { "epoch": 0.01984281812607993, "grad_norm": 0.4688204824924469, "learning_rate": 9.99975136297042e-05, "loss": 1.352, "step": 356 }, { "epoch": 0.019898556379243076, "grad_norm": 0.5171904563903809, "learning_rate": 9.999742403868668e-05, "loss": 1.952, "step": 357 }, { "epoch": 0.019954294632406222, "grad_norm": 0.542300283908844, "learning_rate": 9.999733286204238e-05, "loss": 1.8768, "step": 358 }, { "epoch": 0.020010032885569365, "grad_norm": 0.5278236865997314, "learning_rate": 9.99972400997742e-05, "loss": 1.8014, "step": 359 }, { "epoch": 0.02006577113873251, "grad_norm": 0.587790846824646, "learning_rate": 9.999714575188505e-05, "loss": 1.9884, "step": 360 }, { "epoch": 0.020121509391895658, "grad_norm": 0.5114203095436096, "learning_rate": 9.999704981837794e-05, "loss": 1.9038, "step": 361 }, { "epoch": 0.020177247645058804, "grad_norm": 0.538783609867096, "learning_rate": 9.999695229925591e-05, "loss": 1.9049, "step": 362 }, { "epoch": 0.02023298589822195, "grad_norm": 0.5289005637168884, "learning_rate": 9.999685319452208e-05, "loss": 1.7111, "step": 363 }, { "epoch": 0.020288724151385096, "grad_norm": 0.5257157683372498, "learning_rate": 9.999675250417954e-05, "loss": 1.6416, "step": 364 }, { "epoch": 0.020344462404548243, "grad_norm": 0.480473130941391, "learning_rate": 9.999665022823152e-05, "loss": 1.7197, "step": 365 }, { "epoch": 0.020400200657711386, "grad_norm": 0.5564152598381042, "learning_rate": 9.999654636668125e-05, "loss": 1.8762, "step": 366 }, { "epoch": 0.020455938910874532, "grad_norm": 0.6517108082771301, "learning_rate": 9.999644091953204e-05, "loss": 2.4684, "step": 367 }, { "epoch": 0.02051167716403768, "grad_norm": 0.5357886552810669, "learning_rate": 9.999633388678723e-05, "loss": 1.8079, "step": 368 }, { "epoch": 0.020567415417200825, "grad_norm": 0.498740553855896, "learning_rate": 9.999622526845021e-05, "loss": 1.6885, "step": 369 }, { "epoch": 0.02062315367036397, "grad_norm": 0.49749207496643066, "learning_rate": 9.999611506452439e-05, "loss": 1.8686, "step": 370 }, { "epoch": 0.020678891923527117, "grad_norm": 0.5339593291282654, "learning_rate": 9.999600327501333e-05, "loss": 1.8592, "step": 371 }, { "epoch": 0.020734630176690264, "grad_norm": 0.5533782839775085, "learning_rate": 9.999588989992052e-05, "loss": 1.8752, "step": 372 }, { "epoch": 0.02079036842985341, "grad_norm": 0.459504634141922, "learning_rate": 9.99957749392496e-05, "loss": 1.7596, "step": 373 }, { "epoch": 0.020846106683016553, "grad_norm": 0.4722179174423218, "learning_rate": 9.999565839300419e-05, "loss": 1.7573, "step": 374 }, { "epoch": 0.0209018449361797, "grad_norm": 0.49677354097366333, "learning_rate": 9.999554026118798e-05, "loss": 1.9692, "step": 375 }, { "epoch": 0.020957583189342845, "grad_norm": 0.49444639682769775, "learning_rate": 9.999542054380473e-05, "loss": 1.8881, "step": 376 }, { "epoch": 0.02101332144250599, "grad_norm": 0.4882863461971283, "learning_rate": 9.999529924085824e-05, "loss": 1.8369, "step": 377 }, { "epoch": 0.021069059695669138, "grad_norm": 0.475211501121521, "learning_rate": 9.999517635235237e-05, "loss": 1.3352, "step": 378 }, { "epoch": 0.021124797948832284, "grad_norm": 0.5699715614318848, "learning_rate": 9.999505187829096e-05, "loss": 1.763, "step": 379 }, { "epoch": 0.02118053620199543, "grad_norm": 0.5538257360458374, "learning_rate": 9.9994925818678e-05, "loss": 1.7431, "step": 380 }, { "epoch": 0.021236274455158577, "grad_norm": 0.48163720965385437, "learning_rate": 9.99947981735175e-05, "loss": 1.7356, "step": 381 }, { "epoch": 0.02129201270832172, "grad_norm": 0.5482640266418457, "learning_rate": 9.99946689428135e-05, "loss": 1.861, "step": 382 }, { "epoch": 0.021347750961484866, "grad_norm": 0.5083199739456177, "learning_rate": 9.999453812657007e-05, "loss": 1.9594, "step": 383 }, { "epoch": 0.021403489214648012, "grad_norm": 0.513034999370575, "learning_rate": 9.99944057247914e-05, "loss": 2.0073, "step": 384 }, { "epoch": 0.02145922746781116, "grad_norm": 0.5045239329338074, "learning_rate": 9.999427173748164e-05, "loss": 1.6862, "step": 385 }, { "epoch": 0.021514965720974305, "grad_norm": 0.5097934603691101, "learning_rate": 9.999413616464508e-05, "loss": 1.8631, "step": 386 }, { "epoch": 0.02157070397413745, "grad_norm": 0.522888720035553, "learning_rate": 9.999399900628601e-05, "loss": 1.8636, "step": 387 }, { "epoch": 0.021626442227300598, "grad_norm": 0.49189141392707825, "learning_rate": 9.999386026240878e-05, "loss": 1.7465, "step": 388 }, { "epoch": 0.021682180480463744, "grad_norm": 0.5114362239837646, "learning_rate": 9.999371993301779e-05, "loss": 1.6336, "step": 389 }, { "epoch": 0.021737918733626887, "grad_norm": 0.4647996723651886, "learning_rate": 9.999357801811748e-05, "loss": 1.6755, "step": 390 }, { "epoch": 0.021793656986790033, "grad_norm": 0.5380472540855408, "learning_rate": 9.999343451771234e-05, "loss": 1.9477, "step": 391 }, { "epoch": 0.02184939523995318, "grad_norm": 0.4583854377269745, "learning_rate": 9.999328943180697e-05, "loss": 1.7902, "step": 392 }, { "epoch": 0.021905133493116326, "grad_norm": 0.45304641127586365, "learning_rate": 9.999314276040592e-05, "loss": 1.6744, "step": 393 }, { "epoch": 0.021960871746279472, "grad_norm": 0.49699023365974426, "learning_rate": 9.999299450351387e-05, "loss": 1.8258, "step": 394 }, { "epoch": 0.02201660999944262, "grad_norm": 0.49681130051612854, "learning_rate": 9.999284466113552e-05, "loss": 1.8488, "step": 395 }, { "epoch": 0.022072348252605765, "grad_norm": 0.5959085822105408, "learning_rate": 9.999269323327561e-05, "loss": 2.1775, "step": 396 }, { "epoch": 0.022128086505768908, "grad_norm": 0.5063357949256897, "learning_rate": 9.999254021993895e-05, "loss": 1.6503, "step": 397 }, { "epoch": 0.022183824758932054, "grad_norm": 0.5273301005363464, "learning_rate": 9.999238562113038e-05, "loss": 1.8169, "step": 398 }, { "epoch": 0.0222395630120952, "grad_norm": 0.5033614635467529, "learning_rate": 9.999222943685482e-05, "loss": 1.647, "step": 399 }, { "epoch": 0.022295301265258347, "grad_norm": 0.5118756890296936, "learning_rate": 9.999207166711723e-05, "loss": 1.6712, "step": 400 }, { "epoch": 0.022351039518421493, "grad_norm": 0.5338667035102844, "learning_rate": 9.999191231192258e-05, "loss": 1.8125, "step": 401 }, { "epoch": 0.02240677777158464, "grad_norm": 0.5460575819015503, "learning_rate": 9.999175137127596e-05, "loss": 1.8486, "step": 402 }, { "epoch": 0.022462516024747785, "grad_norm": 0.4892098009586334, "learning_rate": 9.999158884518245e-05, "loss": 1.6692, "step": 403 }, { "epoch": 0.022518254277910932, "grad_norm": 0.4894774258136749, "learning_rate": 9.999142473364722e-05, "loss": 1.5916, "step": 404 }, { "epoch": 0.022573992531074075, "grad_norm": 0.4909743070602417, "learning_rate": 9.999125903667545e-05, "loss": 1.646, "step": 405 }, { "epoch": 0.02262973078423722, "grad_norm": 0.48369649052619934, "learning_rate": 9.999109175427243e-05, "loss": 1.6874, "step": 406 }, { "epoch": 0.022685469037400367, "grad_norm": 0.4719717502593994, "learning_rate": 9.999092288644345e-05, "loss": 1.9116, "step": 407 }, { "epoch": 0.022741207290563514, "grad_norm": 0.4719882309436798, "learning_rate": 9.999075243319386e-05, "loss": 1.4898, "step": 408 }, { "epoch": 0.02279694554372666, "grad_norm": 0.5169988870620728, "learning_rate": 9.999058039452906e-05, "loss": 1.7671, "step": 409 }, { "epoch": 0.022852683796889806, "grad_norm": 0.4469069540500641, "learning_rate": 9.999040677045453e-05, "loss": 1.7068, "step": 410 }, { "epoch": 0.022908422050052953, "grad_norm": 0.508651077747345, "learning_rate": 9.999023156097575e-05, "loss": 1.912, "step": 411 }, { "epoch": 0.0229641603032161, "grad_norm": 0.48365309834480286, "learning_rate": 9.99900547660983e-05, "loss": 1.7907, "step": 412 }, { "epoch": 0.02301989855637924, "grad_norm": 0.5189946889877319, "learning_rate": 9.998987638582775e-05, "loss": 1.8333, "step": 413 }, { "epoch": 0.023075636809542388, "grad_norm": 0.5238891839981079, "learning_rate": 9.99896964201698e-05, "loss": 2.0069, "step": 414 }, { "epoch": 0.023131375062705534, "grad_norm": 0.5390001535415649, "learning_rate": 9.998951486913015e-05, "loss": 1.8571, "step": 415 }, { "epoch": 0.02318711331586868, "grad_norm": 0.5339745283126831, "learning_rate": 9.998933173271453e-05, "loss": 1.6536, "step": 416 }, { "epoch": 0.023242851569031827, "grad_norm": 0.48661404848098755, "learning_rate": 9.998914701092877e-05, "loss": 1.8969, "step": 417 }, { "epoch": 0.023298589822194973, "grad_norm": 0.5701104402542114, "learning_rate": 9.998896070377873e-05, "loss": 1.9305, "step": 418 }, { "epoch": 0.02335432807535812, "grad_norm": 0.5289365649223328, "learning_rate": 9.99887728112703e-05, "loss": 1.9801, "step": 419 }, { "epoch": 0.023410066328521262, "grad_norm": 0.4870493412017822, "learning_rate": 9.998858333340945e-05, "loss": 1.879, "step": 420 }, { "epoch": 0.02346580458168441, "grad_norm": 0.46179860830307007, "learning_rate": 9.998839227020221e-05, "loss": 1.6029, "step": 421 }, { "epoch": 0.023521542834847555, "grad_norm": 0.5245276689529419, "learning_rate": 9.998819962165462e-05, "loss": 1.9165, "step": 422 }, { "epoch": 0.0235772810880107, "grad_norm": 0.4952642321586609, "learning_rate": 9.998800538777278e-05, "loss": 1.6276, "step": 423 }, { "epoch": 0.023633019341173848, "grad_norm": 0.48968929052352905, "learning_rate": 9.998780956856285e-05, "loss": 1.5287, "step": 424 }, { "epoch": 0.023688757594336994, "grad_norm": 0.4968630373477936, "learning_rate": 9.998761216403106e-05, "loss": 1.8008, "step": 425 }, { "epoch": 0.02374449584750014, "grad_norm": 0.5983918309211731, "learning_rate": 9.998741317418366e-05, "loss": 2.0055, "step": 426 }, { "epoch": 0.023800234100663287, "grad_norm": 0.49322110414505005, "learning_rate": 9.998721259902694e-05, "loss": 1.6324, "step": 427 }, { "epoch": 0.02385597235382643, "grad_norm": 0.4888675808906555, "learning_rate": 9.99870104385673e-05, "loss": 1.6075, "step": 428 }, { "epoch": 0.023911710606989576, "grad_norm": 0.4783425033092499, "learning_rate": 9.998680669281116e-05, "loss": 1.6517, "step": 429 }, { "epoch": 0.023967448860152722, "grad_norm": 0.5173685550689697, "learning_rate": 9.998660136176492e-05, "loss": 1.6884, "step": 430 }, { "epoch": 0.02402318711331587, "grad_norm": 0.518741250038147, "learning_rate": 9.998639444543514e-05, "loss": 1.7113, "step": 431 }, { "epoch": 0.024078925366479015, "grad_norm": 0.446850448846817, "learning_rate": 9.998618594382836e-05, "loss": 1.5067, "step": 432 }, { "epoch": 0.02413466361964216, "grad_norm": 0.46661272644996643, "learning_rate": 9.99859758569512e-05, "loss": 1.6967, "step": 433 }, { "epoch": 0.024190401872805307, "grad_norm": 0.5824592709541321, "learning_rate": 9.998576418481033e-05, "loss": 2.0151, "step": 434 }, { "epoch": 0.024246140125968454, "grad_norm": 0.4715226888656616, "learning_rate": 9.998555092741247e-05, "loss": 1.6199, "step": 435 }, { "epoch": 0.024301878379131597, "grad_norm": 0.5396628975868225, "learning_rate": 9.998533608476435e-05, "loss": 1.8874, "step": 436 }, { "epoch": 0.024357616632294743, "grad_norm": 0.4999384582042694, "learning_rate": 9.99851196568728e-05, "loss": 1.8761, "step": 437 }, { "epoch": 0.02441335488545789, "grad_norm": 0.4719383418560028, "learning_rate": 9.998490164374472e-05, "loss": 1.6399, "step": 438 }, { "epoch": 0.024469093138621036, "grad_norm": 0.49223801493644714, "learning_rate": 9.998468204538696e-05, "loss": 1.8343, "step": 439 }, { "epoch": 0.024524831391784182, "grad_norm": 0.5116458535194397, "learning_rate": 9.998446086180653e-05, "loss": 2.0423, "step": 440 }, { "epoch": 0.024580569644947328, "grad_norm": 0.48448118567466736, "learning_rate": 9.998423809301043e-05, "loss": 1.5796, "step": 441 }, { "epoch": 0.024636307898110475, "grad_norm": 0.48682916164398193, "learning_rate": 9.998401373900573e-05, "loss": 1.661, "step": 442 }, { "epoch": 0.024692046151273617, "grad_norm": 0.5474771857261658, "learning_rate": 9.998378779979954e-05, "loss": 1.9646, "step": 443 }, { "epoch": 0.024747784404436764, "grad_norm": 0.48878610134124756, "learning_rate": 9.998356027539901e-05, "loss": 1.7896, "step": 444 }, { "epoch": 0.02480352265759991, "grad_norm": 0.49135512113571167, "learning_rate": 9.99833311658114e-05, "loss": 1.7329, "step": 445 }, { "epoch": 0.024859260910763056, "grad_norm": 0.5220357775688171, "learning_rate": 9.998310047104393e-05, "loss": 2.0303, "step": 446 }, { "epoch": 0.024914999163926203, "grad_norm": 0.4597051739692688, "learning_rate": 9.998286819110394e-05, "loss": 1.6114, "step": 447 }, { "epoch": 0.02497073741708935, "grad_norm": 0.5005029439926147, "learning_rate": 9.99826343259988e-05, "loss": 1.8658, "step": 448 }, { "epoch": 0.025026475670252495, "grad_norm": 0.5835437774658203, "learning_rate": 9.99823988757359e-05, "loss": 1.8958, "step": 449 }, { "epoch": 0.02508221392341564, "grad_norm": 0.4960596263408661, "learning_rate": 9.998216184032274e-05, "loss": 1.7768, "step": 450 }, { "epoch": 0.025137952176578784, "grad_norm": 0.4787440299987793, "learning_rate": 9.99819232197668e-05, "loss": 1.7367, "step": 451 }, { "epoch": 0.02519369042974193, "grad_norm": 0.4575479030609131, "learning_rate": 9.99816830140757e-05, "loss": 1.6027, "step": 452 }, { "epoch": 0.025249428682905077, "grad_norm": 0.5182919502258301, "learning_rate": 9.998144122325702e-05, "loss": 1.8879, "step": 453 }, { "epoch": 0.025305166936068223, "grad_norm": 0.49592286348342896, "learning_rate": 9.998119784731843e-05, "loss": 1.954, "step": 454 }, { "epoch": 0.02536090518923137, "grad_norm": 0.4686327576637268, "learning_rate": 9.998095288626765e-05, "loss": 1.6971, "step": 455 }, { "epoch": 0.025416643442394516, "grad_norm": 0.5634790658950806, "learning_rate": 9.998070634011246e-05, "loss": 1.8801, "step": 456 }, { "epoch": 0.025472381695557662, "grad_norm": 0.49380773305892944, "learning_rate": 9.998045820886068e-05, "loss": 1.8882, "step": 457 }, { "epoch": 0.02552811994872081, "grad_norm": 0.5319178104400635, "learning_rate": 9.998020849252017e-05, "loss": 1.7204, "step": 458 }, { "epoch": 0.02558385820188395, "grad_norm": 0.4578639268875122, "learning_rate": 9.997995719109884e-05, "loss": 1.6934, "step": 459 }, { "epoch": 0.025639596455047098, "grad_norm": 0.4672851264476776, "learning_rate": 9.997970430460468e-05, "loss": 1.5534, "step": 460 }, { "epoch": 0.025695334708210244, "grad_norm": 0.4967419505119324, "learning_rate": 9.99794498330457e-05, "loss": 1.7817, "step": 461 }, { "epoch": 0.02575107296137339, "grad_norm": 0.494781494140625, "learning_rate": 9.997919377642997e-05, "loss": 1.759, "step": 462 }, { "epoch": 0.025806811214536537, "grad_norm": 0.47715312242507935, "learning_rate": 9.997893613476561e-05, "loss": 1.6342, "step": 463 }, { "epoch": 0.025862549467699683, "grad_norm": 0.5014367699623108, "learning_rate": 9.99786769080608e-05, "loss": 1.7754, "step": 464 }, { "epoch": 0.02591828772086283, "grad_norm": 0.503808319568634, "learning_rate": 9.997841609632375e-05, "loss": 1.9323, "step": 465 }, { "epoch": 0.025974025974025976, "grad_norm": 0.4935349225997925, "learning_rate": 9.997815369956273e-05, "loss": 1.945, "step": 466 }, { "epoch": 0.02602976422718912, "grad_norm": 0.45313507318496704, "learning_rate": 9.997788971778608e-05, "loss": 1.5908, "step": 467 }, { "epoch": 0.026085502480352265, "grad_norm": 0.48407676815986633, "learning_rate": 9.997762415100214e-05, "loss": 1.449, "step": 468 }, { "epoch": 0.02614124073351541, "grad_norm": 0.4917304813861847, "learning_rate": 9.997735699921938e-05, "loss": 1.7667, "step": 469 }, { "epoch": 0.026196978986678558, "grad_norm": 0.5684965252876282, "learning_rate": 9.997708826244623e-05, "loss": 2.0801, "step": 470 }, { "epoch": 0.026252717239841704, "grad_norm": 0.5034363865852356, "learning_rate": 9.997681794069123e-05, "loss": 1.9385, "step": 471 }, { "epoch": 0.02630845549300485, "grad_norm": 0.5185155272483826, "learning_rate": 9.997654603396294e-05, "loss": 1.9021, "step": 472 }, { "epoch": 0.026364193746167996, "grad_norm": 0.4756320118904114, "learning_rate": 9.997627254227e-05, "loss": 1.7698, "step": 473 }, { "epoch": 0.02641993199933114, "grad_norm": 0.47013306617736816, "learning_rate": 9.997599746562108e-05, "loss": 1.6786, "step": 474 }, { "epoch": 0.026475670252494286, "grad_norm": 0.4797370731830597, "learning_rate": 9.997572080402488e-05, "loss": 1.8663, "step": 475 }, { "epoch": 0.026531408505657432, "grad_norm": 0.4647987186908722, "learning_rate": 9.997544255749021e-05, "loss": 1.6064, "step": 476 }, { "epoch": 0.02658714675882058, "grad_norm": 0.5362509489059448, "learning_rate": 9.99751627260259e-05, "loss": 2.035, "step": 477 }, { "epoch": 0.026642885011983725, "grad_norm": 0.501615047454834, "learning_rate": 9.997488130964077e-05, "loss": 1.7838, "step": 478 }, { "epoch": 0.02669862326514687, "grad_norm": 0.48956695199012756, "learning_rate": 9.997459830834379e-05, "loss": 1.7242, "step": 479 }, { "epoch": 0.026754361518310017, "grad_norm": 0.518091082572937, "learning_rate": 9.997431372214394e-05, "loss": 1.8634, "step": 480 }, { "epoch": 0.026810099771473164, "grad_norm": 0.5070821642875671, "learning_rate": 9.997402755105022e-05, "loss": 1.678, "step": 481 }, { "epoch": 0.026865838024636306, "grad_norm": 0.49108657240867615, "learning_rate": 9.997373979507169e-05, "loss": 1.6952, "step": 482 }, { "epoch": 0.026921576277799453, "grad_norm": 0.4824698269367218, "learning_rate": 9.997345045421753e-05, "loss": 1.6948, "step": 483 }, { "epoch": 0.0269773145309626, "grad_norm": 0.537356972694397, "learning_rate": 9.997315952849688e-05, "loss": 1.9746, "step": 484 }, { "epoch": 0.027033052784125745, "grad_norm": 0.5354846119880676, "learning_rate": 9.997286701791896e-05, "loss": 1.9413, "step": 485 }, { "epoch": 0.02708879103728889, "grad_norm": 0.49684658646583557, "learning_rate": 9.99725729224931e-05, "loss": 1.7646, "step": 486 }, { "epoch": 0.027144529290452038, "grad_norm": 0.5149616599082947, "learning_rate": 9.997227724222855e-05, "loss": 1.6941, "step": 487 }, { "epoch": 0.027200267543615184, "grad_norm": 0.48285308480262756, "learning_rate": 9.997197997713473e-05, "loss": 1.6994, "step": 488 }, { "epoch": 0.02725600579677833, "grad_norm": 0.47129902243614197, "learning_rate": 9.997168112722107e-05, "loss": 1.8408, "step": 489 }, { "epoch": 0.027311744049941473, "grad_norm": 0.44259312748908997, "learning_rate": 9.997138069249703e-05, "loss": 1.636, "step": 490 }, { "epoch": 0.02736748230310462, "grad_norm": 0.4475281238555908, "learning_rate": 9.997107867297216e-05, "loss": 1.5011, "step": 491 }, { "epoch": 0.027423220556267766, "grad_norm": 0.5637838244438171, "learning_rate": 9.997077506865602e-05, "loss": 2.0265, "step": 492 }, { "epoch": 0.027478958809430912, "grad_norm": 0.5333039164543152, "learning_rate": 9.997046987955824e-05, "loss": 2.0372, "step": 493 }, { "epoch": 0.02753469706259406, "grad_norm": 0.49768728017807007, "learning_rate": 9.997016310568851e-05, "loss": 1.8226, "step": 494 }, { "epoch": 0.027590435315757205, "grad_norm": 0.5524271130561829, "learning_rate": 9.996985474705654e-05, "loss": 1.7598, "step": 495 }, { "epoch": 0.02764617356892035, "grad_norm": 0.5334012508392334, "learning_rate": 9.996954480367214e-05, "loss": 1.9021, "step": 496 }, { "epoch": 0.027701911822083494, "grad_norm": 0.5297475457191467, "learning_rate": 9.996923327554511e-05, "loss": 1.7989, "step": 497 }, { "epoch": 0.02775765007524664, "grad_norm": 0.5096792578697205, "learning_rate": 9.996892016268535e-05, "loss": 1.7904, "step": 498 }, { "epoch": 0.027813388328409787, "grad_norm": 0.47295787930488586, "learning_rate": 9.996860546510278e-05, "loss": 1.5494, "step": 499 }, { "epoch": 0.027869126581572933, "grad_norm": 0.48092177510261536, "learning_rate": 9.996828918280737e-05, "loss": 1.6759, "step": 500 }, { "epoch": 0.02792486483473608, "grad_norm": 0.4752250611782074, "learning_rate": 9.996797131580917e-05, "loss": 1.7032, "step": 501 }, { "epoch": 0.027980603087899226, "grad_norm": 0.49519795179367065, "learning_rate": 9.996765186411827e-05, "loss": 1.7786, "step": 502 }, { "epoch": 0.028036341341062372, "grad_norm": 0.5053145289421082, "learning_rate": 9.996733082774477e-05, "loss": 1.9493, "step": 503 }, { "epoch": 0.02809207959422552, "grad_norm": 0.5514931678771973, "learning_rate": 9.996700820669886e-05, "loss": 2.0257, "step": 504 }, { "epoch": 0.02814781784738866, "grad_norm": 0.5103058218955994, "learning_rate": 9.996668400099077e-05, "loss": 1.8291, "step": 505 }, { "epoch": 0.028203556100551808, "grad_norm": 0.4987359941005707, "learning_rate": 9.99663582106308e-05, "loss": 1.6841, "step": 506 }, { "epoch": 0.028259294353714954, "grad_norm": 0.570788562297821, "learning_rate": 9.996603083562928e-05, "loss": 2.1915, "step": 507 }, { "epoch": 0.0283150326068781, "grad_norm": 0.4610704481601715, "learning_rate": 9.996570187599658e-05, "loss": 1.6893, "step": 508 }, { "epoch": 0.028370770860041247, "grad_norm": 0.4623680114746094, "learning_rate": 9.996537133174313e-05, "loss": 1.5927, "step": 509 }, { "epoch": 0.028426509113204393, "grad_norm": 0.4911310076713562, "learning_rate": 9.996503920287942e-05, "loss": 1.6685, "step": 510 }, { "epoch": 0.02848224736636754, "grad_norm": 0.4995778799057007, "learning_rate": 9.996470548941598e-05, "loss": 1.8294, "step": 511 }, { "epoch": 0.028537985619530686, "grad_norm": 0.518905758857727, "learning_rate": 9.996437019136342e-05, "loss": 1.6819, "step": 512 }, { "epoch": 0.02859372387269383, "grad_norm": 0.5348454117774963, "learning_rate": 9.996403330873233e-05, "loss": 1.8129, "step": 513 }, { "epoch": 0.028649462125856975, "grad_norm": 0.49906015396118164, "learning_rate": 9.996369484153342e-05, "loss": 1.8961, "step": 514 }, { "epoch": 0.02870520037902012, "grad_norm": 0.5471760034561157, "learning_rate": 9.996335478977741e-05, "loss": 1.7716, "step": 515 }, { "epoch": 0.028760938632183267, "grad_norm": 0.4836637079715729, "learning_rate": 9.99630131534751e-05, "loss": 1.7395, "step": 516 }, { "epoch": 0.028816676885346414, "grad_norm": 0.4034901261329651, "learning_rate": 9.996266993263732e-05, "loss": 0.9524, "step": 517 }, { "epoch": 0.02887241513850956, "grad_norm": 0.5080105662345886, "learning_rate": 9.996232512727495e-05, "loss": 1.5957, "step": 518 }, { "epoch": 0.028928153391672706, "grad_norm": 0.4828059673309326, "learning_rate": 9.996197873739892e-05, "loss": 1.8356, "step": 519 }, { "epoch": 0.02898389164483585, "grad_norm": 0.47908416390419006, "learning_rate": 9.996163076302023e-05, "loss": 1.7832, "step": 520 }, { "epoch": 0.029039629897998995, "grad_norm": 0.5064157247543335, "learning_rate": 9.996128120414989e-05, "loss": 1.696, "step": 521 }, { "epoch": 0.029095368151162142, "grad_norm": 0.5058413147926331, "learning_rate": 9.996093006079903e-05, "loss": 1.8185, "step": 522 }, { "epoch": 0.029151106404325288, "grad_norm": 0.5816233158111572, "learning_rate": 9.996057733297876e-05, "loss": 2.0013, "step": 523 }, { "epoch": 0.029206844657488434, "grad_norm": 0.506596028804779, "learning_rate": 9.996022302070025e-05, "loss": 1.7923, "step": 524 }, { "epoch": 0.02926258291065158, "grad_norm": 0.48481589555740356, "learning_rate": 9.995986712397477e-05, "loss": 1.674, "step": 525 }, { "epoch": 0.029318321163814727, "grad_norm": 0.6215664148330688, "learning_rate": 9.995950964281357e-05, "loss": 2.041, "step": 526 }, { "epoch": 0.029374059416977873, "grad_norm": 0.5243876576423645, "learning_rate": 9.995915057722804e-05, "loss": 1.9253, "step": 527 }, { "epoch": 0.029429797670141016, "grad_norm": 0.4525597393512726, "learning_rate": 9.995878992722951e-05, "loss": 1.5032, "step": 528 }, { "epoch": 0.029485535923304163, "grad_norm": 0.5035833716392517, "learning_rate": 9.995842769282946e-05, "loss": 1.8901, "step": 529 }, { "epoch": 0.02954127417646731, "grad_norm": 0.5944721698760986, "learning_rate": 9.995806387403934e-05, "loss": 2.1208, "step": 530 }, { "epoch": 0.029597012429630455, "grad_norm": 0.5121837854385376, "learning_rate": 9.995769847087073e-05, "loss": 1.9563, "step": 531 }, { "epoch": 0.0296527506827936, "grad_norm": 0.5083540678024292, "learning_rate": 9.99573314833352e-05, "loss": 2.0126, "step": 532 }, { "epoch": 0.029708488935956748, "grad_norm": 0.4877237379550934, "learning_rate": 9.995696291144438e-05, "loss": 1.92, "step": 533 }, { "epoch": 0.029764227189119894, "grad_norm": 0.4935770034790039, "learning_rate": 9.995659275520995e-05, "loss": 1.5072, "step": 534 }, { "epoch": 0.02981996544228304, "grad_norm": 0.5800178050994873, "learning_rate": 9.995622101464368e-05, "loss": 2.0751, "step": 535 }, { "epoch": 0.029875703695446183, "grad_norm": 0.5653755068778992, "learning_rate": 9.995584768975734e-05, "loss": 2.0538, "step": 536 }, { "epoch": 0.02993144194860933, "grad_norm": 0.463131844997406, "learning_rate": 9.995547278056279e-05, "loss": 1.6813, "step": 537 }, { "epoch": 0.029987180201772476, "grad_norm": 0.5227254629135132, "learning_rate": 9.995509628707189e-05, "loss": 1.9213, "step": 538 }, { "epoch": 0.030042918454935622, "grad_norm": 0.49530157446861267, "learning_rate": 9.99547182092966e-05, "loss": 1.7977, "step": 539 }, { "epoch": 0.03009865670809877, "grad_norm": 0.5396206974983215, "learning_rate": 9.99543385472489e-05, "loss": 1.9346, "step": 540 }, { "epoch": 0.030154394961261915, "grad_norm": 0.517638087272644, "learning_rate": 9.995395730094083e-05, "loss": 1.7214, "step": 541 }, { "epoch": 0.03021013321442506, "grad_norm": 0.5086343884468079, "learning_rate": 9.99535744703845e-05, "loss": 1.6459, "step": 542 }, { "epoch": 0.030265871467588207, "grad_norm": 0.49579426646232605, "learning_rate": 9.995319005559202e-05, "loss": 1.7781, "step": 543 }, { "epoch": 0.03032160972075135, "grad_norm": 0.500481128692627, "learning_rate": 9.995280405657561e-05, "loss": 1.8662, "step": 544 }, { "epoch": 0.030377347973914497, "grad_norm": 0.47389981150627136, "learning_rate": 9.99524164733475e-05, "loss": 1.7803, "step": 545 }, { "epoch": 0.030433086227077643, "grad_norm": 0.4981273114681244, "learning_rate": 9.995202730591997e-05, "loss": 1.7344, "step": 546 }, { "epoch": 0.03048882448024079, "grad_norm": 0.507570207118988, "learning_rate": 9.995163655430539e-05, "loss": 1.864, "step": 547 }, { "epoch": 0.030544562733403936, "grad_norm": 0.4923110008239746, "learning_rate": 9.995124421851614e-05, "loss": 1.711, "step": 548 }, { "epoch": 0.030600300986567082, "grad_norm": 0.42948779463768005, "learning_rate": 9.995085029856464e-05, "loss": 1.4136, "step": 549 }, { "epoch": 0.030656039239730228, "grad_norm": 0.5023720264434814, "learning_rate": 9.99504547944634e-05, "loss": 1.8524, "step": 550 }, { "epoch": 0.03071177749289337, "grad_norm": 0.4656638205051422, "learning_rate": 9.995005770622499e-05, "loss": 1.5452, "step": 551 }, { "epoch": 0.030767515746056517, "grad_norm": 0.49939560890197754, "learning_rate": 9.994965903386198e-05, "loss": 1.8935, "step": 552 }, { "epoch": 0.030823253999219664, "grad_norm": 0.5469990372657776, "learning_rate": 9.994925877738698e-05, "loss": 1.9558, "step": 553 }, { "epoch": 0.03087899225238281, "grad_norm": 0.46579065918922424, "learning_rate": 9.994885693681274e-05, "loss": 1.6339, "step": 554 }, { "epoch": 0.030934730505545956, "grad_norm": 0.4826100468635559, "learning_rate": 9.994845351215199e-05, "loss": 1.6943, "step": 555 }, { "epoch": 0.030990468758709103, "grad_norm": 0.527716338634491, "learning_rate": 9.994804850341748e-05, "loss": 1.9641, "step": 556 }, { "epoch": 0.03104620701187225, "grad_norm": 0.4857400059700012, "learning_rate": 9.994764191062212e-05, "loss": 1.9041, "step": 557 }, { "epoch": 0.031101945265035395, "grad_norm": 0.483614057302475, "learning_rate": 9.994723373377876e-05, "loss": 1.6671, "step": 558 }, { "epoch": 0.031157683518198538, "grad_norm": 0.46863991022109985, "learning_rate": 9.994682397290036e-05, "loss": 1.6415, "step": 559 }, { "epoch": 0.031213421771361684, "grad_norm": 0.5118616223335266, "learning_rate": 9.99464126279999e-05, "loss": 1.9253, "step": 560 }, { "epoch": 0.03126916002452483, "grad_norm": 0.4958517849445343, "learning_rate": 9.994599969909047e-05, "loss": 1.5449, "step": 561 }, { "epoch": 0.03132489827768798, "grad_norm": 0.513558030128479, "learning_rate": 9.99455851861851e-05, "loss": 1.8665, "step": 562 }, { "epoch": 0.03138063653085112, "grad_norm": 0.49571189284324646, "learning_rate": 9.9945169089297e-05, "loss": 1.8442, "step": 563 }, { "epoch": 0.031436374784014266, "grad_norm": 0.550983190536499, "learning_rate": 9.994475140843933e-05, "loss": 1.8436, "step": 564 }, { "epoch": 0.031492113037177416, "grad_norm": 0.4547099173069, "learning_rate": 9.994433214362532e-05, "loss": 1.7172, "step": 565 }, { "epoch": 0.03154785129034056, "grad_norm": 0.4933796525001526, "learning_rate": 9.994391129486833e-05, "loss": 1.6919, "step": 566 }, { "epoch": 0.03160358954350371, "grad_norm": 0.5890671610832214, "learning_rate": 9.994348886218163e-05, "loss": 2.1026, "step": 567 }, { "epoch": 0.03165932779666685, "grad_norm": 0.5334300398826599, "learning_rate": 9.994306484557868e-05, "loss": 1.9232, "step": 568 }, { "epoch": 0.03171506604983, "grad_norm": 0.4899601340293884, "learning_rate": 9.99426392450729e-05, "loss": 1.6408, "step": 569 }, { "epoch": 0.031770804302993144, "grad_norm": 0.5135582089424133, "learning_rate": 9.994221206067777e-05, "loss": 1.8562, "step": 570 }, { "epoch": 0.03182654255615629, "grad_norm": 0.5050702095031738, "learning_rate": 9.994178329240686e-05, "loss": 1.7045, "step": 571 }, { "epoch": 0.03188228080931944, "grad_norm": 0.4874882102012634, "learning_rate": 9.994135294027378e-05, "loss": 1.8015, "step": 572 }, { "epoch": 0.03193801906248258, "grad_norm": 0.6017099022865295, "learning_rate": 9.994092100429215e-05, "loss": 2.1681, "step": 573 }, { "epoch": 0.03199375731564573, "grad_norm": 0.4922308325767517, "learning_rate": 9.994048748447569e-05, "loss": 1.6771, "step": 574 }, { "epoch": 0.03204949556880887, "grad_norm": 0.5013367533683777, "learning_rate": 9.994005238083815e-05, "loss": 1.7157, "step": 575 }, { "epoch": 0.03210523382197202, "grad_norm": 0.47761455178260803, "learning_rate": 9.99396156933933e-05, "loss": 1.8095, "step": 576 }, { "epoch": 0.032160972075135165, "grad_norm": 0.5500997304916382, "learning_rate": 9.993917742215502e-05, "loss": 2.2013, "step": 577 }, { "epoch": 0.03221671032829831, "grad_norm": 0.5222569108009338, "learning_rate": 9.993873756713719e-05, "loss": 1.9967, "step": 578 }, { "epoch": 0.03227244858146146, "grad_norm": 0.520000696182251, "learning_rate": 9.993829612835378e-05, "loss": 1.6328, "step": 579 }, { "epoch": 0.0323281868346246, "grad_norm": 0.501677930355072, "learning_rate": 9.993785310581875e-05, "loss": 1.9793, "step": 580 }, { "epoch": 0.03238392508778775, "grad_norm": 0.4832457900047302, "learning_rate": 9.993740849954619e-05, "loss": 1.7687, "step": 581 }, { "epoch": 0.03243966334095089, "grad_norm": 0.4854641556739807, "learning_rate": 9.99369623095502e-05, "loss": 1.8983, "step": 582 }, { "epoch": 0.03249540159411404, "grad_norm": 0.48794299364089966, "learning_rate": 9.993651453584491e-05, "loss": 1.6625, "step": 583 }, { "epoch": 0.032551139847277186, "grad_norm": 0.4691779911518097, "learning_rate": 9.993606517844452e-05, "loss": 1.7413, "step": 584 }, { "epoch": 0.032606878100440335, "grad_norm": 0.531639039516449, "learning_rate": 9.993561423736331e-05, "loss": 1.875, "step": 585 }, { "epoch": 0.03266261635360348, "grad_norm": 0.5259484648704529, "learning_rate": 9.993516171261555e-05, "loss": 1.9669, "step": 586 }, { "epoch": 0.03271835460676662, "grad_norm": 0.4976826012134552, "learning_rate": 9.993470760421559e-05, "loss": 1.808, "step": 587 }, { "epoch": 0.03277409285992977, "grad_norm": 0.4722268283367157, "learning_rate": 9.993425191217787e-05, "loss": 1.7654, "step": 588 }, { "epoch": 0.032829831113092914, "grad_norm": 0.4951403737068176, "learning_rate": 9.993379463651679e-05, "loss": 1.8282, "step": 589 }, { "epoch": 0.032885569366256064, "grad_norm": 0.4893924295902252, "learning_rate": 9.99333357772469e-05, "loss": 1.6477, "step": 590 }, { "epoch": 0.032941307619419206, "grad_norm": 0.4877261519432068, "learning_rate": 9.993287533438273e-05, "loss": 1.6518, "step": 591 }, { "epoch": 0.032997045872582356, "grad_norm": 0.48906272649765015, "learning_rate": 9.993241330793888e-05, "loss": 1.6485, "step": 592 }, { "epoch": 0.0330527841257455, "grad_norm": 0.5735100507736206, "learning_rate": 9.993194969792999e-05, "loss": 2.0397, "step": 593 }, { "epoch": 0.03310852237890864, "grad_norm": 0.45156189799308777, "learning_rate": 9.99314845043708e-05, "loss": 1.6368, "step": 594 }, { "epoch": 0.03316426063207179, "grad_norm": 0.4821372628211975, "learning_rate": 9.993101772727602e-05, "loss": 1.6886, "step": 595 }, { "epoch": 0.033219998885234935, "grad_norm": 0.501278817653656, "learning_rate": 9.993054936666048e-05, "loss": 1.7587, "step": 596 }, { "epoch": 0.033275737138398084, "grad_norm": 0.5598791241645813, "learning_rate": 9.993007942253905e-05, "loss": 1.8861, "step": 597 }, { "epoch": 0.03333147539156123, "grad_norm": 0.48821693658828735, "learning_rate": 9.99296078949266e-05, "loss": 1.6563, "step": 598 }, { "epoch": 0.03338721364472438, "grad_norm": 0.4853152632713318, "learning_rate": 9.99291347838381e-05, "loss": 1.5493, "step": 599 }, { "epoch": 0.03344295189788752, "grad_norm": 0.5629671812057495, "learning_rate": 9.992866008928855e-05, "loss": 2.1359, "step": 600 }, { "epoch": 0.03349869015105066, "grad_norm": 0.5176377892494202, "learning_rate": 9.9928183811293e-05, "loss": 2.0139, "step": 601 }, { "epoch": 0.03355442840421381, "grad_norm": 0.46964964270591736, "learning_rate": 9.992770594986658e-05, "loss": 1.6594, "step": 602 }, { "epoch": 0.033610166657376955, "grad_norm": 0.49720609188079834, "learning_rate": 9.992722650502442e-05, "loss": 1.8432, "step": 603 }, { "epoch": 0.033665904910540105, "grad_norm": 0.4787680506706238, "learning_rate": 9.992674547678171e-05, "loss": 1.8071, "step": 604 }, { "epoch": 0.03372164316370325, "grad_norm": 0.4432480037212372, "learning_rate": 9.992626286515373e-05, "loss": 1.6391, "step": 605 }, { "epoch": 0.0337773814168664, "grad_norm": 0.5781794786453247, "learning_rate": 9.992577867015581e-05, "loss": 2.0711, "step": 606 }, { "epoch": 0.03383311967002954, "grad_norm": 0.45807138085365295, "learning_rate": 9.992529289180326e-05, "loss": 1.5886, "step": 607 }, { "epoch": 0.03388885792319269, "grad_norm": 0.5234102606773376, "learning_rate": 9.992480553011151e-05, "loss": 1.9211, "step": 608 }, { "epoch": 0.03394459617635583, "grad_norm": 0.5202253460884094, "learning_rate": 9.9924316585096e-05, "loss": 1.819, "step": 609 }, { "epoch": 0.034000334429518976, "grad_norm": 0.4516846537590027, "learning_rate": 9.992382605677226e-05, "loss": 1.6631, "step": 610 }, { "epoch": 0.034056072682682126, "grad_norm": 0.5501968860626221, "learning_rate": 9.992333394515583e-05, "loss": 2.0759, "step": 611 }, { "epoch": 0.03411181093584527, "grad_norm": 0.4812159836292267, "learning_rate": 9.992284025026231e-05, "loss": 1.6721, "step": 612 }, { "epoch": 0.03416754918900842, "grad_norm": 0.5236145257949829, "learning_rate": 9.992234497210737e-05, "loss": 1.807, "step": 613 }, { "epoch": 0.03422328744217156, "grad_norm": 0.5123412609100342, "learning_rate": 9.992184811070673e-05, "loss": 1.9095, "step": 614 }, { "epoch": 0.03427902569533471, "grad_norm": 0.49797573685646057, "learning_rate": 9.992134966607612e-05, "loss": 1.7303, "step": 615 }, { "epoch": 0.034334763948497854, "grad_norm": 0.48441436886787415, "learning_rate": 9.992084963823136e-05, "loss": 1.6339, "step": 616 }, { "epoch": 0.034390502201661, "grad_norm": 0.5459060668945312, "learning_rate": 9.992034802718832e-05, "loss": 1.8881, "step": 617 }, { "epoch": 0.03444624045482415, "grad_norm": 0.5051499009132385, "learning_rate": 9.991984483296288e-05, "loss": 1.9386, "step": 618 }, { "epoch": 0.03450197870798729, "grad_norm": 0.5421403050422668, "learning_rate": 9.991934005557103e-05, "loss": 2.0836, "step": 619 }, { "epoch": 0.03455771696115044, "grad_norm": 0.4838196933269501, "learning_rate": 9.991883369502874e-05, "loss": 1.6526, "step": 620 }, { "epoch": 0.03461345521431358, "grad_norm": 0.49810105562210083, "learning_rate": 9.991832575135211e-05, "loss": 1.7326, "step": 621 }, { "epoch": 0.03466919346747673, "grad_norm": 0.46195507049560547, "learning_rate": 9.991781622455723e-05, "loss": 1.6398, "step": 622 }, { "epoch": 0.034724931720639875, "grad_norm": 0.46615251898765564, "learning_rate": 9.991730511466026e-05, "loss": 1.7927, "step": 623 }, { "epoch": 0.03478066997380302, "grad_norm": 0.5302008390426636, "learning_rate": 9.991679242167741e-05, "loss": 1.8047, "step": 624 }, { "epoch": 0.03483640822696617, "grad_norm": 0.49787190556526184, "learning_rate": 9.991627814562494e-05, "loss": 1.9146, "step": 625 }, { "epoch": 0.03489214648012931, "grad_norm": 0.5156252384185791, "learning_rate": 9.991576228651915e-05, "loss": 1.9453, "step": 626 }, { "epoch": 0.03494788473329246, "grad_norm": 0.45635107159614563, "learning_rate": 9.991524484437642e-05, "loss": 1.7143, "step": 627 }, { "epoch": 0.0350036229864556, "grad_norm": 0.48797038197517395, "learning_rate": 9.991472581921316e-05, "loss": 1.7371, "step": 628 }, { "epoch": 0.03505936123961875, "grad_norm": 0.549708366394043, "learning_rate": 9.99142052110458e-05, "loss": 1.9569, "step": 629 }, { "epoch": 0.035115099492781895, "grad_norm": 0.4693654179573059, "learning_rate": 9.991368301989088e-05, "loss": 1.4609, "step": 630 }, { "epoch": 0.035170837745945045, "grad_norm": 0.5259846448898315, "learning_rate": 9.991315924576495e-05, "loss": 1.7577, "step": 631 }, { "epoch": 0.03522657599910819, "grad_norm": 0.49805745482444763, "learning_rate": 9.991263388868461e-05, "loss": 1.8534, "step": 632 }, { "epoch": 0.03528231425227133, "grad_norm": 0.4565132260322571, "learning_rate": 9.991210694866654e-05, "loss": 1.6853, "step": 633 }, { "epoch": 0.03533805250543448, "grad_norm": 0.5158933401107788, "learning_rate": 9.991157842572747e-05, "loss": 1.8088, "step": 634 }, { "epoch": 0.035393790758597624, "grad_norm": 0.49667277932167053, "learning_rate": 9.991104831988412e-05, "loss": 1.9148, "step": 635 }, { "epoch": 0.03544952901176077, "grad_norm": 0.48701363801956177, "learning_rate": 9.991051663115331e-05, "loss": 1.7816, "step": 636 }, { "epoch": 0.035505267264923916, "grad_norm": 0.5608890056610107, "learning_rate": 9.990998335955193e-05, "loss": 1.8764, "step": 637 }, { "epoch": 0.035561005518087066, "grad_norm": 0.49871060252189636, "learning_rate": 9.990944850509685e-05, "loss": 1.6103, "step": 638 }, { "epoch": 0.03561674377125021, "grad_norm": 0.46610593795776367, "learning_rate": 9.990891206780506e-05, "loss": 1.7798, "step": 639 }, { "epoch": 0.03567248202441335, "grad_norm": 0.5284513831138611, "learning_rate": 9.990837404769358e-05, "loss": 1.8771, "step": 640 }, { "epoch": 0.0357282202775765, "grad_norm": 0.5929260849952698, "learning_rate": 9.990783444477946e-05, "loss": 2.0712, "step": 641 }, { "epoch": 0.035783958530739644, "grad_norm": 0.5146616697311401, "learning_rate": 9.990729325907981e-05, "loss": 1.7693, "step": 642 }, { "epoch": 0.035839696783902794, "grad_norm": 0.5243765711784363, "learning_rate": 9.99067504906118e-05, "loss": 1.8675, "step": 643 }, { "epoch": 0.03589543503706594, "grad_norm": 0.48738136887550354, "learning_rate": 9.990620613939263e-05, "loss": 1.7557, "step": 644 }, { "epoch": 0.03595117329022909, "grad_norm": 0.5006791353225708, "learning_rate": 9.990566020543959e-05, "loss": 1.7199, "step": 645 }, { "epoch": 0.03600691154339223, "grad_norm": 0.5283340811729431, "learning_rate": 9.990511268876998e-05, "loss": 1.9156, "step": 646 }, { "epoch": 0.03606264979655538, "grad_norm": 0.47615885734558105, "learning_rate": 9.990456358940115e-05, "loss": 1.6183, "step": 647 }, { "epoch": 0.03611838804971852, "grad_norm": 0.48326513171195984, "learning_rate": 9.990401290735053e-05, "loss": 1.8159, "step": 648 }, { "epoch": 0.036174126302881665, "grad_norm": 0.489183247089386, "learning_rate": 9.990346064263558e-05, "loss": 1.9306, "step": 649 }, { "epoch": 0.036229864556044815, "grad_norm": 0.44880211353302, "learning_rate": 9.990290679527382e-05, "loss": 1.4257, "step": 650 }, { "epoch": 0.03628560280920796, "grad_norm": 0.49666327238082886, "learning_rate": 9.990235136528281e-05, "loss": 1.6587, "step": 651 }, { "epoch": 0.03634134106237111, "grad_norm": 0.5396116971969604, "learning_rate": 9.990179435268017e-05, "loss": 1.9138, "step": 652 }, { "epoch": 0.03639707931553425, "grad_norm": 0.512506902217865, "learning_rate": 9.990123575748355e-05, "loss": 2.0153, "step": 653 }, { "epoch": 0.0364528175686974, "grad_norm": 0.48785391449928284, "learning_rate": 9.990067557971068e-05, "loss": 1.9489, "step": 654 }, { "epoch": 0.03650855582186054, "grad_norm": 0.49123311042785645, "learning_rate": 9.990011381937933e-05, "loss": 1.6926, "step": 655 }, { "epoch": 0.036564294075023686, "grad_norm": 0.4744409918785095, "learning_rate": 9.98995504765073e-05, "loss": 1.7961, "step": 656 }, { "epoch": 0.036620032328186836, "grad_norm": 0.5175344944000244, "learning_rate": 9.989898555111245e-05, "loss": 1.8846, "step": 657 }, { "epoch": 0.03667577058134998, "grad_norm": 0.4825249910354614, "learning_rate": 9.989841904321274e-05, "loss": 1.7094, "step": 658 }, { "epoch": 0.03673150883451313, "grad_norm": 0.5392758250236511, "learning_rate": 9.989785095282609e-05, "loss": 1.8777, "step": 659 }, { "epoch": 0.03678724708767627, "grad_norm": 0.5122122764587402, "learning_rate": 9.989728127997052e-05, "loss": 1.8686, "step": 660 }, { "epoch": 0.03684298534083942, "grad_norm": 0.4976766109466553, "learning_rate": 9.989671002466412e-05, "loss": 1.7542, "step": 661 }, { "epoch": 0.036898723594002564, "grad_norm": 0.4618877172470093, "learning_rate": 9.989613718692501e-05, "loss": 1.4741, "step": 662 }, { "epoch": 0.03695446184716571, "grad_norm": 0.4870270788669586, "learning_rate": 9.989556276677133e-05, "loss": 1.6816, "step": 663 }, { "epoch": 0.037010200100328856, "grad_norm": 0.5549145936965942, "learning_rate": 9.989498676422131e-05, "loss": 1.8716, "step": 664 }, { "epoch": 0.037065938353492, "grad_norm": 0.501438319683075, "learning_rate": 9.989440917929321e-05, "loss": 1.7686, "step": 665 }, { "epoch": 0.03712167660665515, "grad_norm": 0.5713873505592346, "learning_rate": 9.989383001200536e-05, "loss": 2.116, "step": 666 }, { "epoch": 0.03717741485981829, "grad_norm": 0.4839586615562439, "learning_rate": 9.989324926237613e-05, "loss": 1.8245, "step": 667 }, { "epoch": 0.03723315311298144, "grad_norm": 0.5154809355735779, "learning_rate": 9.989266693042394e-05, "loss": 1.661, "step": 668 }, { "epoch": 0.037288891366144584, "grad_norm": 0.4965420365333557, "learning_rate": 9.989208301616724e-05, "loss": 1.6531, "step": 669 }, { "epoch": 0.037344629619307734, "grad_norm": 0.4850505292415619, "learning_rate": 9.989149751962455e-05, "loss": 1.8691, "step": 670 }, { "epoch": 0.03740036787247088, "grad_norm": 0.47275611758232117, "learning_rate": 9.989091044081445e-05, "loss": 1.7718, "step": 671 }, { "epoch": 0.03745610612563402, "grad_norm": 0.5606955885887146, "learning_rate": 9.989032177975554e-05, "loss": 2.2129, "step": 672 }, { "epoch": 0.03751184437879717, "grad_norm": 0.49657538533210754, "learning_rate": 9.988973153646654e-05, "loss": 1.9084, "step": 673 }, { "epoch": 0.03756758263196031, "grad_norm": 0.5135958790779114, "learning_rate": 9.988913971096611e-05, "loss": 1.9491, "step": 674 }, { "epoch": 0.03762332088512346, "grad_norm": 0.48900923132896423, "learning_rate": 9.988854630327305e-05, "loss": 1.7176, "step": 675 }, { "epoch": 0.037679059138286605, "grad_norm": 0.463521808385849, "learning_rate": 9.988795131340616e-05, "loss": 1.5625, "step": 676 }, { "epoch": 0.037734797391449755, "grad_norm": 0.48082444071769714, "learning_rate": 9.988735474138433e-05, "loss": 1.7208, "step": 677 }, { "epoch": 0.0377905356446129, "grad_norm": 0.5012754201889038, "learning_rate": 9.988675658722648e-05, "loss": 1.9678, "step": 678 }, { "epoch": 0.03784627389777604, "grad_norm": 0.5888019800186157, "learning_rate": 9.988615685095155e-05, "loss": 2.2239, "step": 679 }, { "epoch": 0.03790201215093919, "grad_norm": 0.47830748558044434, "learning_rate": 9.98855555325786e-05, "loss": 1.6574, "step": 680 }, { "epoch": 0.03795775040410233, "grad_norm": 0.47648170590400696, "learning_rate": 9.988495263212667e-05, "loss": 1.6185, "step": 681 }, { "epoch": 0.03801348865726548, "grad_norm": 0.5321143269538879, "learning_rate": 9.98843481496149e-05, "loss": 2.0788, "step": 682 }, { "epoch": 0.038069226910428626, "grad_norm": 0.4451909363269806, "learning_rate": 9.988374208506243e-05, "loss": 1.7213, "step": 683 }, { "epoch": 0.038124965163591776, "grad_norm": 0.4888899028301239, "learning_rate": 9.988313443848853e-05, "loss": 1.9524, "step": 684 }, { "epoch": 0.03818070341675492, "grad_norm": 0.5075884461402893, "learning_rate": 9.988252520991244e-05, "loss": 1.9489, "step": 685 }, { "epoch": 0.03823644166991806, "grad_norm": 0.5244428515434265, "learning_rate": 9.988191439935348e-05, "loss": 1.8805, "step": 686 }, { "epoch": 0.03829217992308121, "grad_norm": 0.5269452333450317, "learning_rate": 9.988130200683103e-05, "loss": 1.916, "step": 687 }, { "epoch": 0.038347918176244354, "grad_norm": 0.40096086263656616, "learning_rate": 9.98806880323645e-05, "loss": 1.3248, "step": 688 }, { "epoch": 0.038403656429407504, "grad_norm": 0.555325984954834, "learning_rate": 9.988007247597337e-05, "loss": 1.945, "step": 689 }, { "epoch": 0.03845939468257065, "grad_norm": 0.4987097382545471, "learning_rate": 9.987945533767717e-05, "loss": 1.9159, "step": 690 }, { "epoch": 0.0385151329357338, "grad_norm": 0.46860477328300476, "learning_rate": 9.987883661749548e-05, "loss": 1.7105, "step": 691 }, { "epoch": 0.03857087118889694, "grad_norm": 0.4867911636829376, "learning_rate": 9.987821631544789e-05, "loss": 1.6607, "step": 692 }, { "epoch": 0.03862660944206009, "grad_norm": 0.5149185061454773, "learning_rate": 9.987759443155409e-05, "loss": 1.8422, "step": 693 }, { "epoch": 0.03868234769522323, "grad_norm": 0.508399248123169, "learning_rate": 9.98769709658338e-05, "loss": 1.8393, "step": 694 }, { "epoch": 0.038738085948386375, "grad_norm": 0.4841381907463074, "learning_rate": 9.987634591830679e-05, "loss": 1.8819, "step": 695 }, { "epoch": 0.038793824201549525, "grad_norm": 0.4869403541088104, "learning_rate": 9.987571928899288e-05, "loss": 1.7872, "step": 696 }, { "epoch": 0.03884956245471267, "grad_norm": 0.49572715163230896, "learning_rate": 9.987509107791196e-05, "loss": 1.8078, "step": 697 }, { "epoch": 0.03890530070787582, "grad_norm": 0.5188158750534058, "learning_rate": 9.987446128508396e-05, "loss": 1.7838, "step": 698 }, { "epoch": 0.03896103896103896, "grad_norm": 0.4589369595050812, "learning_rate": 9.98738299105288e-05, "loss": 1.7299, "step": 699 }, { "epoch": 0.03901677721420211, "grad_norm": 0.5023289322853088, "learning_rate": 9.987319695426657e-05, "loss": 1.7414, "step": 700 }, { "epoch": 0.03907251546736525, "grad_norm": 0.5241897702217102, "learning_rate": 9.98725624163173e-05, "loss": 1.8223, "step": 701 }, { "epoch": 0.039128253720528396, "grad_norm": 0.4720919728279114, "learning_rate": 9.987192629670112e-05, "loss": 1.791, "step": 702 }, { "epoch": 0.039183991973691545, "grad_norm": 0.5045210719108582, "learning_rate": 9.987128859543824e-05, "loss": 1.7428, "step": 703 }, { "epoch": 0.03923973022685469, "grad_norm": 0.5130773782730103, "learning_rate": 9.987064931254884e-05, "loss": 1.6701, "step": 704 }, { "epoch": 0.03929546848001784, "grad_norm": 0.5155162215232849, "learning_rate": 9.987000844805319e-05, "loss": 1.9592, "step": 705 }, { "epoch": 0.03935120673318098, "grad_norm": 0.46410509943962097, "learning_rate": 9.986936600197165e-05, "loss": 1.786, "step": 706 }, { "epoch": 0.03940694498634413, "grad_norm": 0.5000941157341003, "learning_rate": 9.986872197432459e-05, "loss": 1.7937, "step": 707 }, { "epoch": 0.039462683239507274, "grad_norm": 0.4663851261138916, "learning_rate": 9.986807636513241e-05, "loss": 1.8019, "step": 708 }, { "epoch": 0.039518421492670416, "grad_norm": 0.5445390343666077, "learning_rate": 9.986742917441561e-05, "loss": 1.9214, "step": 709 }, { "epoch": 0.039574159745833566, "grad_norm": 0.49968406558036804, "learning_rate": 9.986678040219469e-05, "loss": 1.7621, "step": 710 }, { "epoch": 0.03962989799899671, "grad_norm": 0.514168381690979, "learning_rate": 9.986613004849024e-05, "loss": 1.7435, "step": 711 }, { "epoch": 0.03968563625215986, "grad_norm": 0.4899461269378662, "learning_rate": 9.986547811332289e-05, "loss": 1.7199, "step": 712 }, { "epoch": 0.039741374505323, "grad_norm": 0.5172072052955627, "learning_rate": 9.986482459671332e-05, "loss": 1.9435, "step": 713 }, { "epoch": 0.03979711275848615, "grad_norm": 0.5198094844818115, "learning_rate": 9.986416949868223e-05, "loss": 1.799, "step": 714 }, { "epoch": 0.039852851011649294, "grad_norm": 0.47976863384246826, "learning_rate": 9.986351281925042e-05, "loss": 1.8455, "step": 715 }, { "epoch": 0.039908589264812444, "grad_norm": 0.4702402949333191, "learning_rate": 9.986285455843872e-05, "loss": 1.5848, "step": 716 }, { "epoch": 0.03996432751797559, "grad_norm": 0.4698415994644165, "learning_rate": 9.986219471626797e-05, "loss": 1.6527, "step": 717 }, { "epoch": 0.04002006577113873, "grad_norm": 0.5518625974655151, "learning_rate": 9.986153329275913e-05, "loss": 1.8773, "step": 718 }, { "epoch": 0.04007580402430188, "grad_norm": 0.5149457454681396, "learning_rate": 9.986087028793316e-05, "loss": 1.8737, "step": 719 }, { "epoch": 0.04013154227746502, "grad_norm": 0.527282178401947, "learning_rate": 9.98602057018111e-05, "loss": 1.9581, "step": 720 }, { "epoch": 0.04018728053062817, "grad_norm": 0.48371025919914246, "learning_rate": 9.985953953441402e-05, "loss": 1.887, "step": 721 }, { "epoch": 0.040243018783791315, "grad_norm": 0.5474866032600403, "learning_rate": 9.985887178576305e-05, "loss": 1.9981, "step": 722 }, { "epoch": 0.040298757036954465, "grad_norm": 0.5417437553405762, "learning_rate": 9.985820245587936e-05, "loss": 2.0195, "step": 723 }, { "epoch": 0.04035449529011761, "grad_norm": 0.458363801240921, "learning_rate": 9.985753154478418e-05, "loss": 1.6134, "step": 724 }, { "epoch": 0.04041023354328075, "grad_norm": 0.49649447202682495, "learning_rate": 9.98568590524988e-05, "loss": 1.7501, "step": 725 }, { "epoch": 0.0404659717964439, "grad_norm": 0.5304057002067566, "learning_rate": 9.985618497904453e-05, "loss": 1.9164, "step": 726 }, { "epoch": 0.04052171004960704, "grad_norm": 0.4757838249206543, "learning_rate": 9.985550932444275e-05, "loss": 1.8159, "step": 727 }, { "epoch": 0.04057744830277019, "grad_norm": 0.48324036598205566, "learning_rate": 9.98548320887149e-05, "loss": 1.6184, "step": 728 }, { "epoch": 0.040633186555933336, "grad_norm": 0.5059638023376465, "learning_rate": 9.985415327188245e-05, "loss": 1.8383, "step": 729 }, { "epoch": 0.040688924809096486, "grad_norm": 0.4717106819152832, "learning_rate": 9.985347287396692e-05, "loss": 1.67, "step": 730 }, { "epoch": 0.04074466306225963, "grad_norm": 0.4953088164329529, "learning_rate": 9.98527908949899e-05, "loss": 1.8185, "step": 731 }, { "epoch": 0.04080040131542277, "grad_norm": 0.49030283093452454, "learning_rate": 9.985210733497301e-05, "loss": 1.7909, "step": 732 }, { "epoch": 0.04085613956858592, "grad_norm": 0.5224010944366455, "learning_rate": 9.985142219393795e-05, "loss": 1.8615, "step": 733 }, { "epoch": 0.040911877821749064, "grad_norm": 0.5008676648139954, "learning_rate": 9.985073547190641e-05, "loss": 1.9337, "step": 734 }, { "epoch": 0.040967616074912214, "grad_norm": 0.4777420163154602, "learning_rate": 9.98500471689002e-05, "loss": 1.8345, "step": 735 }, { "epoch": 0.04102335432807536, "grad_norm": 0.4995800852775574, "learning_rate": 9.984935728494113e-05, "loss": 1.843, "step": 736 }, { "epoch": 0.041079092581238506, "grad_norm": 0.5097813010215759, "learning_rate": 9.984866582005111e-05, "loss": 1.9642, "step": 737 }, { "epoch": 0.04113483083440165, "grad_norm": 0.4956590533256531, "learning_rate": 9.984797277425204e-05, "loss": 1.8874, "step": 738 }, { "epoch": 0.0411905690875648, "grad_norm": 0.5304232239723206, "learning_rate": 9.98472781475659e-05, "loss": 1.9269, "step": 739 }, { "epoch": 0.04124630734072794, "grad_norm": 0.5134212374687195, "learning_rate": 9.984658194001474e-05, "loss": 1.5059, "step": 740 }, { "epoch": 0.041302045593891085, "grad_norm": 0.4551413953304291, "learning_rate": 9.984588415162061e-05, "loss": 1.7386, "step": 741 }, { "epoch": 0.041357783847054234, "grad_norm": 0.5477944612503052, "learning_rate": 9.984518478240568e-05, "loss": 1.9075, "step": 742 }, { "epoch": 0.04141352210021738, "grad_norm": 0.4997386038303375, "learning_rate": 9.98444838323921e-05, "loss": 1.7812, "step": 743 }, { "epoch": 0.04146926035338053, "grad_norm": 0.5239866971969604, "learning_rate": 9.984378130160208e-05, "loss": 1.9155, "step": 744 }, { "epoch": 0.04152499860654367, "grad_norm": 0.46206948161125183, "learning_rate": 9.984307719005795e-05, "loss": 1.6661, "step": 745 }, { "epoch": 0.04158073685970682, "grad_norm": 0.4978305399417877, "learning_rate": 9.984237149778201e-05, "loss": 1.8456, "step": 746 }, { "epoch": 0.04163647511286996, "grad_norm": 0.50936359167099, "learning_rate": 9.984166422479663e-05, "loss": 1.9118, "step": 747 }, { "epoch": 0.041692213366033105, "grad_norm": 0.49744611978530884, "learning_rate": 9.984095537112429e-05, "loss": 1.7721, "step": 748 }, { "epoch": 0.041747951619196255, "grad_norm": 0.536056637763977, "learning_rate": 9.984024493678743e-05, "loss": 1.7968, "step": 749 }, { "epoch": 0.0418036898723594, "grad_norm": 0.5262266993522644, "learning_rate": 9.983953292180857e-05, "loss": 1.858, "step": 750 }, { "epoch": 0.04185942812552255, "grad_norm": 0.5085186958312988, "learning_rate": 9.983881932621033e-05, "loss": 1.751, "step": 751 }, { "epoch": 0.04191516637868569, "grad_norm": 0.4641915261745453, "learning_rate": 9.983810415001531e-05, "loss": 1.5998, "step": 752 }, { "epoch": 0.04197090463184884, "grad_norm": 0.5268242955207825, "learning_rate": 9.983738739324621e-05, "loss": 1.7263, "step": 753 }, { "epoch": 0.04202664288501198, "grad_norm": 0.5283384919166565, "learning_rate": 9.983666905592576e-05, "loss": 1.9334, "step": 754 }, { "epoch": 0.042082381138175126, "grad_norm": 0.5007447600364685, "learning_rate": 9.983594913807672e-05, "loss": 1.6944, "step": 755 }, { "epoch": 0.042138119391338276, "grad_norm": 0.5626598596572876, "learning_rate": 9.983522763972196e-05, "loss": 2.042, "step": 756 }, { "epoch": 0.04219385764450142, "grad_norm": 0.46739470958709717, "learning_rate": 9.983450456088432e-05, "loss": 1.6733, "step": 757 }, { "epoch": 0.04224959589766457, "grad_norm": 0.5124320983886719, "learning_rate": 9.983377990158676e-05, "loss": 1.8463, "step": 758 }, { "epoch": 0.04230533415082771, "grad_norm": 0.4762093722820282, "learning_rate": 9.983305366185223e-05, "loss": 1.7602, "step": 759 }, { "epoch": 0.04236107240399086, "grad_norm": 0.5182420015335083, "learning_rate": 9.983232584170381e-05, "loss": 1.8644, "step": 760 }, { "epoch": 0.042416810657154004, "grad_norm": 0.4640427231788635, "learning_rate": 9.983159644116454e-05, "loss": 1.6919, "step": 761 }, { "epoch": 0.042472548910317154, "grad_norm": 0.4894956946372986, "learning_rate": 9.983086546025759e-05, "loss": 1.9491, "step": 762 }, { "epoch": 0.0425282871634803, "grad_norm": 0.49869638681411743, "learning_rate": 9.98301328990061e-05, "loss": 1.9184, "step": 763 }, { "epoch": 0.04258402541664344, "grad_norm": 0.5161083936691284, "learning_rate": 9.982939875743333e-05, "loss": 1.826, "step": 764 }, { "epoch": 0.04263976366980659, "grad_norm": 0.4913845956325531, "learning_rate": 9.982866303556258e-05, "loss": 1.7675, "step": 765 }, { "epoch": 0.04269550192296973, "grad_norm": 0.49277618527412415, "learning_rate": 9.982792573341713e-05, "loss": 1.8539, "step": 766 }, { "epoch": 0.04275124017613288, "grad_norm": 0.5222828388214111, "learning_rate": 9.982718685102039e-05, "loss": 1.9196, "step": 767 }, { "epoch": 0.042806978429296025, "grad_norm": 0.5137212872505188, "learning_rate": 9.982644638839583e-05, "loss": 1.8719, "step": 768 }, { "epoch": 0.042862716682459175, "grad_norm": 0.646440327167511, "learning_rate": 9.982570434556686e-05, "loss": 1.9678, "step": 769 }, { "epoch": 0.04291845493562232, "grad_norm": 0.4992925524711609, "learning_rate": 9.982496072255708e-05, "loss": 1.7078, "step": 770 }, { "epoch": 0.04297419318878546, "grad_norm": 0.4863613247871399, "learning_rate": 9.982421551939003e-05, "loss": 1.8064, "step": 771 }, { "epoch": 0.04302993144194861, "grad_norm": 0.4646783769130707, "learning_rate": 9.982346873608937e-05, "loss": 1.6427, "step": 772 }, { "epoch": 0.04308566969511175, "grad_norm": 0.5143455266952515, "learning_rate": 9.982272037267877e-05, "loss": 1.7367, "step": 773 }, { "epoch": 0.0431414079482749, "grad_norm": 0.4936600923538208, "learning_rate": 9.982197042918195e-05, "loss": 1.6834, "step": 774 }, { "epoch": 0.043197146201438046, "grad_norm": 0.4923505485057831, "learning_rate": 9.982121890562273e-05, "loss": 1.7545, "step": 775 }, { "epoch": 0.043252884454601195, "grad_norm": 0.5399130582809448, "learning_rate": 9.982046580202493e-05, "loss": 1.784, "step": 776 }, { "epoch": 0.04330862270776434, "grad_norm": 0.49087220430374146, "learning_rate": 9.98197111184124e-05, "loss": 1.8328, "step": 777 }, { "epoch": 0.04336436096092749, "grad_norm": 0.5504277348518372, "learning_rate": 9.981895485480912e-05, "loss": 1.8808, "step": 778 }, { "epoch": 0.04342009921409063, "grad_norm": 0.45953568816185, "learning_rate": 9.981819701123907e-05, "loss": 1.577, "step": 779 }, { "epoch": 0.043475837467253774, "grad_norm": 0.4762939214706421, "learning_rate": 9.981743758772625e-05, "loss": 1.6959, "step": 780 }, { "epoch": 0.043531575720416923, "grad_norm": 0.4667057991027832, "learning_rate": 9.981667658429477e-05, "loss": 1.6635, "step": 781 }, { "epoch": 0.043587313973580066, "grad_norm": 0.5036124587059021, "learning_rate": 9.981591400096877e-05, "loss": 1.854, "step": 782 }, { "epoch": 0.043643052226743216, "grad_norm": 0.48234641551971436, "learning_rate": 9.98151498377724e-05, "loss": 1.781, "step": 783 }, { "epoch": 0.04369879047990636, "grad_norm": 0.4990682005882263, "learning_rate": 9.981438409472994e-05, "loss": 1.6629, "step": 784 }, { "epoch": 0.04375452873306951, "grad_norm": 0.4655357599258423, "learning_rate": 9.981361677186566e-05, "loss": 1.7694, "step": 785 }, { "epoch": 0.04381026698623265, "grad_norm": 0.4690426290035248, "learning_rate": 9.981284786920388e-05, "loss": 1.7242, "step": 786 }, { "epoch": 0.043866005239395794, "grad_norm": 0.46350887417793274, "learning_rate": 9.981207738676899e-05, "loss": 1.6032, "step": 787 }, { "epoch": 0.043921743492558944, "grad_norm": 0.5220307111740112, "learning_rate": 9.981130532458544e-05, "loss": 1.8624, "step": 788 }, { "epoch": 0.04397748174572209, "grad_norm": 0.465497761964798, "learning_rate": 9.98105316826777e-05, "loss": 1.6831, "step": 789 }, { "epoch": 0.04403321999888524, "grad_norm": 0.4893016219139099, "learning_rate": 9.980975646107032e-05, "loss": 1.7933, "step": 790 }, { "epoch": 0.04408895825204838, "grad_norm": 0.4457073509693146, "learning_rate": 9.980897965978787e-05, "loss": 1.6383, "step": 791 }, { "epoch": 0.04414469650521153, "grad_norm": 0.5064904093742371, "learning_rate": 9.980820127885497e-05, "loss": 1.8771, "step": 792 }, { "epoch": 0.04420043475837467, "grad_norm": 0.5663847327232361, "learning_rate": 9.980742131829635e-05, "loss": 2.0977, "step": 793 }, { "epoch": 0.044256173011537815, "grad_norm": 0.558462381362915, "learning_rate": 9.980663977813672e-05, "loss": 1.9813, "step": 794 }, { "epoch": 0.044311911264700965, "grad_norm": 0.5043233633041382, "learning_rate": 9.980585665840087e-05, "loss": 1.7362, "step": 795 }, { "epoch": 0.04436764951786411, "grad_norm": 0.5110850930213928, "learning_rate": 9.980507195911363e-05, "loss": 1.5489, "step": 796 }, { "epoch": 0.04442338777102726, "grad_norm": 0.5611404180526733, "learning_rate": 9.980428568029989e-05, "loss": 1.9545, "step": 797 }, { "epoch": 0.0444791260241904, "grad_norm": 0.44059324264526367, "learning_rate": 9.98034978219846e-05, "loss": 1.6321, "step": 798 }, { "epoch": 0.04453486427735355, "grad_norm": 0.5034955143928528, "learning_rate": 9.980270838419273e-05, "loss": 1.7045, "step": 799 }, { "epoch": 0.04459060253051669, "grad_norm": 0.49383604526519775, "learning_rate": 9.98019173669493e-05, "loss": 1.6414, "step": 800 }, { "epoch": 0.04464634078367984, "grad_norm": 0.5035958290100098, "learning_rate": 9.980112477027942e-05, "loss": 1.8683, "step": 801 }, { "epoch": 0.044702079036842986, "grad_norm": 0.4942208230495453, "learning_rate": 9.980033059420826e-05, "loss": 1.7773, "step": 802 }, { "epoch": 0.04475781729000613, "grad_norm": 0.5211103558540344, "learning_rate": 9.979953483876095e-05, "loss": 2.0631, "step": 803 }, { "epoch": 0.04481355554316928, "grad_norm": 0.5940659046173096, "learning_rate": 9.979873750396273e-05, "loss": 2.0601, "step": 804 }, { "epoch": 0.04486929379633242, "grad_norm": 0.5211898684501648, "learning_rate": 9.979793858983891e-05, "loss": 1.7687, "step": 805 }, { "epoch": 0.04492503204949557, "grad_norm": 0.5175243020057678, "learning_rate": 9.979713809641482e-05, "loss": 1.9662, "step": 806 }, { "epoch": 0.044980770302658714, "grad_norm": 0.5139010548591614, "learning_rate": 9.979633602371586e-05, "loss": 1.7011, "step": 807 }, { "epoch": 0.045036508555821864, "grad_norm": 0.4817015826702118, "learning_rate": 9.979553237176744e-05, "loss": 1.7632, "step": 808 }, { "epoch": 0.045092246808985006, "grad_norm": 0.49766993522644043, "learning_rate": 9.979472714059506e-05, "loss": 1.917, "step": 809 }, { "epoch": 0.04514798506214815, "grad_norm": 0.5208562612533569, "learning_rate": 9.979392033022427e-05, "loss": 1.946, "step": 810 }, { "epoch": 0.0452037233153113, "grad_norm": 0.4790688753128052, "learning_rate": 9.979311194068064e-05, "loss": 1.8072, "step": 811 }, { "epoch": 0.04525946156847444, "grad_norm": 0.46075010299682617, "learning_rate": 9.979230197198981e-05, "loss": 1.6243, "step": 812 }, { "epoch": 0.04531519982163759, "grad_norm": 0.488349974155426, "learning_rate": 9.979149042417749e-05, "loss": 1.7733, "step": 813 }, { "epoch": 0.045370938074800735, "grad_norm": 0.4905661940574646, "learning_rate": 9.979067729726938e-05, "loss": 1.821, "step": 814 }, { "epoch": 0.045426676327963884, "grad_norm": 0.5073617696762085, "learning_rate": 9.978986259129129e-05, "loss": 1.8286, "step": 815 }, { "epoch": 0.04548241458112703, "grad_norm": 0.5074631571769714, "learning_rate": 9.978904630626904e-05, "loss": 1.7967, "step": 816 }, { "epoch": 0.04553815283429017, "grad_norm": 0.5455936193466187, "learning_rate": 9.978822844222855e-05, "loss": 1.9883, "step": 817 }, { "epoch": 0.04559389108745332, "grad_norm": 0.5111860632896423, "learning_rate": 9.978740899919574e-05, "loss": 1.8694, "step": 818 }, { "epoch": 0.04564962934061646, "grad_norm": 0.4975983202457428, "learning_rate": 9.978658797719658e-05, "loss": 1.714, "step": 819 }, { "epoch": 0.04570536759377961, "grad_norm": 0.4770795702934265, "learning_rate": 9.978576537625714e-05, "loss": 1.8288, "step": 820 }, { "epoch": 0.045761105846942755, "grad_norm": 0.5559741854667664, "learning_rate": 9.97849411964035e-05, "loss": 2.093, "step": 821 }, { "epoch": 0.045816844100105905, "grad_norm": 0.4961313307285309, "learning_rate": 9.978411543766177e-05, "loss": 1.6607, "step": 822 }, { "epoch": 0.04587258235326905, "grad_norm": 0.5356935262680054, "learning_rate": 9.978328810005816e-05, "loss": 1.9762, "step": 823 }, { "epoch": 0.0459283206064322, "grad_norm": 0.4933258295059204, "learning_rate": 9.978245918361893e-05, "loss": 1.6018, "step": 824 }, { "epoch": 0.04598405885959534, "grad_norm": 0.5278127193450928, "learning_rate": 9.978162868837034e-05, "loss": 1.8532, "step": 825 }, { "epoch": 0.04603979711275848, "grad_norm": 0.4802572429180145, "learning_rate": 9.978079661433873e-05, "loss": 1.7551, "step": 826 }, { "epoch": 0.04609553536592163, "grad_norm": 0.4906105101108551, "learning_rate": 9.977996296155049e-05, "loss": 1.7463, "step": 827 }, { "epoch": 0.046151273619084776, "grad_norm": 0.43020668625831604, "learning_rate": 9.977912773003206e-05, "loss": 1.6216, "step": 828 }, { "epoch": 0.046207011872247926, "grad_norm": 0.49433162808418274, "learning_rate": 9.977829091980995e-05, "loss": 1.9011, "step": 829 }, { "epoch": 0.04626275012541107, "grad_norm": 0.45222243666648865, "learning_rate": 9.977745253091067e-05, "loss": 1.3583, "step": 830 }, { "epoch": 0.04631848837857422, "grad_norm": 0.4955357015132904, "learning_rate": 9.977661256336081e-05, "loss": 1.7256, "step": 831 }, { "epoch": 0.04637422663173736, "grad_norm": 0.5137125253677368, "learning_rate": 9.977577101718701e-05, "loss": 1.8484, "step": 832 }, { "epoch": 0.046429964884900504, "grad_norm": 0.49741753935813904, "learning_rate": 9.977492789241598e-05, "loss": 1.6564, "step": 833 }, { "epoch": 0.046485703138063654, "grad_norm": 0.4994182586669922, "learning_rate": 9.977408318907444e-05, "loss": 1.721, "step": 834 }, { "epoch": 0.0465414413912268, "grad_norm": 0.539135754108429, "learning_rate": 9.97732369071892e-05, "loss": 2.0474, "step": 835 }, { "epoch": 0.04659717964438995, "grad_norm": 0.49502313137054443, "learning_rate": 9.977238904678707e-05, "loss": 1.4078, "step": 836 }, { "epoch": 0.04665291789755309, "grad_norm": 0.4542715549468994, "learning_rate": 9.977153960789497e-05, "loss": 1.5402, "step": 837 }, { "epoch": 0.04670865615071624, "grad_norm": 0.48588764667510986, "learning_rate": 9.97706885905398e-05, "loss": 1.8641, "step": 838 }, { "epoch": 0.04676439440387938, "grad_norm": 0.529255211353302, "learning_rate": 9.976983599474857e-05, "loss": 1.8055, "step": 839 }, { "epoch": 0.046820132657042525, "grad_norm": 0.4630698561668396, "learning_rate": 9.976898182054832e-05, "loss": 1.5263, "step": 840 }, { "epoch": 0.046875870910205675, "grad_norm": 0.5334575176239014, "learning_rate": 9.976812606796615e-05, "loss": 1.7926, "step": 841 }, { "epoch": 0.04693160916336882, "grad_norm": 0.49275916814804077, "learning_rate": 9.976726873702918e-05, "loss": 1.6341, "step": 842 }, { "epoch": 0.04698734741653197, "grad_norm": 0.5276961326599121, "learning_rate": 9.976640982776461e-05, "loss": 1.882, "step": 843 }, { "epoch": 0.04704308566969511, "grad_norm": 0.49929726123809814, "learning_rate": 9.97655493401997e-05, "loss": 1.6004, "step": 844 }, { "epoch": 0.04709882392285826, "grad_norm": 0.4716168940067291, "learning_rate": 9.97646872743617e-05, "loss": 1.7355, "step": 845 }, { "epoch": 0.0471545621760214, "grad_norm": 0.5293796062469482, "learning_rate": 9.976382363027797e-05, "loss": 1.9073, "step": 846 }, { "epoch": 0.04721030042918455, "grad_norm": 0.47008490562438965, "learning_rate": 9.976295840797589e-05, "loss": 1.6875, "step": 847 }, { "epoch": 0.047266038682347696, "grad_norm": 0.48457372188568115, "learning_rate": 9.976209160748292e-05, "loss": 1.6172, "step": 848 }, { "epoch": 0.04732177693551084, "grad_norm": 0.500151515007019, "learning_rate": 9.976122322882653e-05, "loss": 1.6371, "step": 849 }, { "epoch": 0.04737751518867399, "grad_norm": 0.5459775924682617, "learning_rate": 9.976035327203427e-05, "loss": 1.9283, "step": 850 }, { "epoch": 0.04743325344183713, "grad_norm": 0.5352368950843811, "learning_rate": 9.975948173713374e-05, "loss": 2.0407, "step": 851 }, { "epoch": 0.04748899169500028, "grad_norm": 0.5491572618484497, "learning_rate": 9.975860862415254e-05, "loss": 1.7475, "step": 852 }, { "epoch": 0.047544729948163424, "grad_norm": 0.49011510610580444, "learning_rate": 9.975773393311841e-05, "loss": 1.7922, "step": 853 }, { "epoch": 0.04760046820132657, "grad_norm": 0.5197030305862427, "learning_rate": 9.975685766405906e-05, "loss": 1.7012, "step": 854 }, { "epoch": 0.047656206454489716, "grad_norm": 0.487704336643219, "learning_rate": 9.975597981700228e-05, "loss": 1.6647, "step": 855 }, { "epoch": 0.04771194470765286, "grad_norm": 0.4743403196334839, "learning_rate": 9.975510039197592e-05, "loss": 1.5522, "step": 856 }, { "epoch": 0.04776768296081601, "grad_norm": 0.46670085191726685, "learning_rate": 9.975421938900789e-05, "loss": 1.5235, "step": 857 }, { "epoch": 0.04782342121397915, "grad_norm": 0.48920536041259766, "learning_rate": 9.975333680812609e-05, "loss": 1.8876, "step": 858 }, { "epoch": 0.0478791594671423, "grad_norm": 0.5793198943138123, "learning_rate": 9.975245264935852e-05, "loss": 1.8422, "step": 859 }, { "epoch": 0.047934897720305444, "grad_norm": 0.49111589789390564, "learning_rate": 9.975156691273324e-05, "loss": 1.7702, "step": 860 }, { "epoch": 0.047990635973468594, "grad_norm": 0.5276595950126648, "learning_rate": 9.975067959827833e-05, "loss": 1.9332, "step": 861 }, { "epoch": 0.04804637422663174, "grad_norm": 0.4866962134838104, "learning_rate": 9.974979070602192e-05, "loss": 1.7497, "step": 862 }, { "epoch": 0.04810211247979488, "grad_norm": 0.5197125673294067, "learning_rate": 9.974890023599222e-05, "loss": 2.0405, "step": 863 }, { "epoch": 0.04815785073295803, "grad_norm": 0.49782440066337585, "learning_rate": 9.974800818821746e-05, "loss": 1.7609, "step": 864 }, { "epoch": 0.04821358898612117, "grad_norm": 0.52313232421875, "learning_rate": 9.974711456272593e-05, "loss": 1.9515, "step": 865 }, { "epoch": 0.04826932723928432, "grad_norm": 0.4546637237071991, "learning_rate": 9.974621935954597e-05, "loss": 1.645, "step": 866 }, { "epoch": 0.048325065492447465, "grad_norm": 0.47760143876075745, "learning_rate": 9.974532257870596e-05, "loss": 1.7104, "step": 867 }, { "epoch": 0.048380803745610615, "grad_norm": 0.4868486225605011, "learning_rate": 9.974442422023438e-05, "loss": 1.8043, "step": 868 }, { "epoch": 0.04843654199877376, "grad_norm": 0.5107572078704834, "learning_rate": 9.974352428415968e-05, "loss": 1.9662, "step": 869 }, { "epoch": 0.04849228025193691, "grad_norm": 0.5269783139228821, "learning_rate": 9.974262277051041e-05, "loss": 1.8876, "step": 870 }, { "epoch": 0.04854801850510005, "grad_norm": 0.48782503604888916, "learning_rate": 9.974171967931519e-05, "loss": 1.5996, "step": 871 }, { "epoch": 0.04860375675826319, "grad_norm": 0.5057775974273682, "learning_rate": 9.974081501060259e-05, "loss": 1.6907, "step": 872 }, { "epoch": 0.04865949501142634, "grad_norm": 0.4904307723045349, "learning_rate": 9.973990876440138e-05, "loss": 1.7377, "step": 873 }, { "epoch": 0.048715233264589486, "grad_norm": 0.4725581407546997, "learning_rate": 9.973900094074027e-05, "loss": 1.8001, "step": 874 }, { "epoch": 0.048770971517752636, "grad_norm": 0.527885913848877, "learning_rate": 9.973809153964804e-05, "loss": 1.8128, "step": 875 }, { "epoch": 0.04882670977091578, "grad_norm": 0.5520697236061096, "learning_rate": 9.973718056115354e-05, "loss": 2.0648, "step": 876 }, { "epoch": 0.04888244802407893, "grad_norm": 0.4812840223312378, "learning_rate": 9.973626800528566e-05, "loss": 1.8552, "step": 877 }, { "epoch": 0.04893818627724207, "grad_norm": 0.46856966614723206, "learning_rate": 9.973535387207333e-05, "loss": 1.577, "step": 878 }, { "epoch": 0.048993924530405214, "grad_norm": 0.4921995997428894, "learning_rate": 9.973443816154557e-05, "loss": 1.66, "step": 879 }, { "epoch": 0.049049662783568364, "grad_norm": 0.5018383264541626, "learning_rate": 9.97335208737314e-05, "loss": 1.7623, "step": 880 }, { "epoch": 0.04910540103673151, "grad_norm": 0.5345847010612488, "learning_rate": 9.973260200865991e-05, "loss": 1.8681, "step": 881 }, { "epoch": 0.049161139289894656, "grad_norm": 0.5296522974967957, "learning_rate": 9.973168156636025e-05, "loss": 1.9225, "step": 882 }, { "epoch": 0.0492168775430578, "grad_norm": 0.5303376317024231, "learning_rate": 9.97307595468616e-05, "loss": 1.8308, "step": 883 }, { "epoch": 0.04927261579622095, "grad_norm": 0.45620301365852356, "learning_rate": 9.97298359501932e-05, "loss": 1.5791, "step": 884 }, { "epoch": 0.04932835404938409, "grad_norm": 0.5314328074455261, "learning_rate": 9.972891077638438e-05, "loss": 1.7279, "step": 885 }, { "epoch": 0.049384092302547235, "grad_norm": 0.4765213429927826, "learning_rate": 9.972798402546441e-05, "loss": 1.5131, "step": 886 }, { "epoch": 0.049439830555710385, "grad_norm": 0.4913032054901123, "learning_rate": 9.972705569746274e-05, "loss": 1.6591, "step": 887 }, { "epoch": 0.04949556880887353, "grad_norm": 0.48732152581214905, "learning_rate": 9.972612579240881e-05, "loss": 1.7141, "step": 888 }, { "epoch": 0.04955130706203668, "grad_norm": 0.5283141732215881, "learning_rate": 9.972519431033206e-05, "loss": 1.8636, "step": 889 }, { "epoch": 0.04960704531519982, "grad_norm": 0.5298954844474792, "learning_rate": 9.972426125126209e-05, "loss": 1.7943, "step": 890 }, { "epoch": 0.04966278356836297, "grad_norm": 0.5104478597640991, "learning_rate": 9.972332661522845e-05, "loss": 1.6949, "step": 891 }, { "epoch": 0.04971852182152611, "grad_norm": 0.5439249873161316, "learning_rate": 9.972239040226082e-05, "loss": 1.9313, "step": 892 }, { "epoch": 0.04977426007468926, "grad_norm": 0.4874706566333771, "learning_rate": 9.972145261238884e-05, "loss": 1.8589, "step": 893 }, { "epoch": 0.049829998327852405, "grad_norm": 0.5243585705757141, "learning_rate": 9.972051324564229e-05, "loss": 1.9736, "step": 894 }, { "epoch": 0.04988573658101555, "grad_norm": 0.5669842958450317, "learning_rate": 9.971957230205096e-05, "loss": 2.1093, "step": 895 }, { "epoch": 0.0499414748341787, "grad_norm": 0.4888775050640106, "learning_rate": 9.971862978164466e-05, "loss": 1.6786, "step": 896 }, { "epoch": 0.04999721308734184, "grad_norm": 0.5279240608215332, "learning_rate": 9.971768568445332e-05, "loss": 1.8162, "step": 897 }, { "epoch": 0.05005295134050499, "grad_norm": 0.4473552405834198, "learning_rate": 9.971674001050686e-05, "loss": 1.3044, "step": 898 }, { "epoch": 0.05010868959366813, "grad_norm": 0.4724571704864502, "learning_rate": 9.971579275983527e-05, "loss": 1.7169, "step": 899 }, { "epoch": 0.05016442784683128, "grad_norm": 0.4805344343185425, "learning_rate": 9.971484393246861e-05, "loss": 1.4898, "step": 900 }, { "epoch": 0.050220166099994426, "grad_norm": 0.4852250814437866, "learning_rate": 9.971389352843695e-05, "loss": 1.6325, "step": 901 }, { "epoch": 0.05027590435315757, "grad_norm": 0.49681854248046875, "learning_rate": 9.971294154777044e-05, "loss": 1.5962, "step": 902 }, { "epoch": 0.05033164260632072, "grad_norm": 0.5085350871086121, "learning_rate": 9.971198799049928e-05, "loss": 1.8215, "step": 903 }, { "epoch": 0.05038738085948386, "grad_norm": 0.49748629331588745, "learning_rate": 9.971103285665369e-05, "loss": 1.9647, "step": 904 }, { "epoch": 0.05044311911264701, "grad_norm": 0.4835662543773651, "learning_rate": 9.971007614626397e-05, "loss": 1.6109, "step": 905 }, { "epoch": 0.050498857365810154, "grad_norm": 0.5058585405349731, "learning_rate": 9.970911785936047e-05, "loss": 1.6419, "step": 906 }, { "epoch": 0.050554595618973304, "grad_norm": 0.5386664271354675, "learning_rate": 9.970815799597358e-05, "loss": 1.6144, "step": 907 }, { "epoch": 0.05061033387213645, "grad_norm": 0.5337561964988708, "learning_rate": 9.970719655613373e-05, "loss": 1.7978, "step": 908 }, { "epoch": 0.05066607212529959, "grad_norm": 0.532317578792572, "learning_rate": 9.970623353987141e-05, "loss": 1.8175, "step": 909 }, { "epoch": 0.05072181037846274, "grad_norm": 0.5630917549133301, "learning_rate": 9.97052689472172e-05, "loss": 2.043, "step": 910 }, { "epoch": 0.05077754863162588, "grad_norm": 0.554322361946106, "learning_rate": 9.970430277820165e-05, "loss": 1.9165, "step": 911 }, { "epoch": 0.05083328688478903, "grad_norm": 0.49685636162757874, "learning_rate": 9.970333503285539e-05, "loss": 1.8203, "step": 912 }, { "epoch": 0.050889025137952175, "grad_norm": 0.5380950570106506, "learning_rate": 9.970236571120915e-05, "loss": 1.9429, "step": 913 }, { "epoch": 0.050944763391115325, "grad_norm": 0.5279613733291626, "learning_rate": 9.970139481329364e-05, "loss": 2.0989, "step": 914 }, { "epoch": 0.05100050164427847, "grad_norm": 0.509904682636261, "learning_rate": 9.970042233913968e-05, "loss": 1.7213, "step": 915 }, { "epoch": 0.05105623989744162, "grad_norm": 0.48252367973327637, "learning_rate": 9.96994482887781e-05, "loss": 1.6979, "step": 916 }, { "epoch": 0.05111197815060476, "grad_norm": 0.5245582461357117, "learning_rate": 9.969847266223979e-05, "loss": 1.7629, "step": 917 }, { "epoch": 0.0511677164037679, "grad_norm": 0.48625627160072327, "learning_rate": 9.969749545955567e-05, "loss": 1.7208, "step": 918 }, { "epoch": 0.05122345465693105, "grad_norm": 0.5168225169181824, "learning_rate": 9.969651668075678e-05, "loss": 1.952, "step": 919 }, { "epoch": 0.051279192910094196, "grad_norm": 0.47759923338890076, "learning_rate": 9.969553632587409e-05, "loss": 1.6574, "step": 920 }, { "epoch": 0.051334931163257345, "grad_norm": 0.49498680233955383, "learning_rate": 9.969455439493877e-05, "loss": 1.6173, "step": 921 }, { "epoch": 0.05139066941642049, "grad_norm": 0.48092684149742126, "learning_rate": 9.96935708879819e-05, "loss": 1.6471, "step": 922 }, { "epoch": 0.05144640766958364, "grad_norm": 0.5342095494270325, "learning_rate": 9.969258580503471e-05, "loss": 2.0134, "step": 923 }, { "epoch": 0.05150214592274678, "grad_norm": 0.58601975440979, "learning_rate": 9.969159914612843e-05, "loss": 2.1658, "step": 924 }, { "epoch": 0.051557884175909924, "grad_norm": 0.4867340922355652, "learning_rate": 9.969061091129433e-05, "loss": 1.9766, "step": 925 }, { "epoch": 0.051613622429073074, "grad_norm": 0.4857270121574402, "learning_rate": 9.968962110056379e-05, "loss": 1.678, "step": 926 }, { "epoch": 0.051669360682236216, "grad_norm": 0.5170820355415344, "learning_rate": 9.968862971396816e-05, "loss": 1.8249, "step": 927 }, { "epoch": 0.051725098935399366, "grad_norm": 0.4657866358757019, "learning_rate": 9.96876367515389e-05, "loss": 1.7606, "step": 928 }, { "epoch": 0.05178083718856251, "grad_norm": 0.5119996666908264, "learning_rate": 9.968664221330751e-05, "loss": 1.8612, "step": 929 }, { "epoch": 0.05183657544172566, "grad_norm": 0.5372640490531921, "learning_rate": 9.968564609930553e-05, "loss": 1.8672, "step": 930 }, { "epoch": 0.0518923136948888, "grad_norm": 0.49778059124946594, "learning_rate": 9.968464840956453e-05, "loss": 1.766, "step": 931 }, { "epoch": 0.05194805194805195, "grad_norm": 0.5260003805160522, "learning_rate": 9.968364914411616e-05, "loss": 1.8631, "step": 932 }, { "epoch": 0.052003790201215094, "grad_norm": 0.5278846621513367, "learning_rate": 9.968264830299213e-05, "loss": 1.5441, "step": 933 }, { "epoch": 0.05205952845437824, "grad_norm": 0.5427425503730774, "learning_rate": 9.968164588622415e-05, "loss": 1.7751, "step": 934 }, { "epoch": 0.05211526670754139, "grad_norm": 0.4653323292732239, "learning_rate": 9.968064189384403e-05, "loss": 1.662, "step": 935 }, { "epoch": 0.05217100496070453, "grad_norm": 0.5192728638648987, "learning_rate": 9.967963632588362e-05, "loss": 1.7384, "step": 936 }, { "epoch": 0.05222674321386768, "grad_norm": 0.4995409846305847, "learning_rate": 9.96786291823748e-05, "loss": 1.8133, "step": 937 }, { "epoch": 0.05228248146703082, "grad_norm": 0.5626217722892761, "learning_rate": 9.96776204633495e-05, "loss": 1.8851, "step": 938 }, { "epoch": 0.05233821972019397, "grad_norm": 0.5185354351997375, "learning_rate": 9.967661016883972e-05, "loss": 1.6583, "step": 939 }, { "epoch": 0.052393957973357115, "grad_norm": 0.5034851431846619, "learning_rate": 9.967559829887749e-05, "loss": 1.6385, "step": 940 }, { "epoch": 0.05244969622652026, "grad_norm": 0.4795439541339874, "learning_rate": 9.967458485349492e-05, "loss": 1.6901, "step": 941 }, { "epoch": 0.05250543447968341, "grad_norm": 0.6365668177604675, "learning_rate": 9.967356983272414e-05, "loss": 1.9757, "step": 942 }, { "epoch": 0.05256117273284655, "grad_norm": 0.48566654324531555, "learning_rate": 9.967255323659734e-05, "loss": 1.6266, "step": 943 }, { "epoch": 0.0526169109860097, "grad_norm": 0.4971524775028229, "learning_rate": 9.967153506514677e-05, "loss": 1.6938, "step": 944 }, { "epoch": 0.05267264923917284, "grad_norm": 0.5263299345970154, "learning_rate": 9.967051531840471e-05, "loss": 1.8448, "step": 945 }, { "epoch": 0.05272838749233599, "grad_norm": 0.4903882145881653, "learning_rate": 9.96694939964035e-05, "loss": 1.5313, "step": 946 }, { "epoch": 0.052784125745499136, "grad_norm": 0.5515956878662109, "learning_rate": 9.966847109917555e-05, "loss": 1.9398, "step": 947 }, { "epoch": 0.05283986399866228, "grad_norm": 0.47069814801216125, "learning_rate": 9.966744662675326e-05, "loss": 1.8052, "step": 948 }, { "epoch": 0.05289560225182543, "grad_norm": 0.4904758036136627, "learning_rate": 9.966642057916915e-05, "loss": 1.7875, "step": 949 }, { "epoch": 0.05295134050498857, "grad_norm": 0.5010367035865784, "learning_rate": 9.966539295645576e-05, "loss": 1.6786, "step": 950 }, { "epoch": 0.05300707875815172, "grad_norm": 0.4812747538089752, "learning_rate": 9.966436375864567e-05, "loss": 1.473, "step": 951 }, { "epoch": 0.053062817011314864, "grad_norm": 0.5010087490081787, "learning_rate": 9.966333298577154e-05, "loss": 1.7648, "step": 952 }, { "epoch": 0.053118555264478014, "grad_norm": 0.5247920155525208, "learning_rate": 9.966230063786602e-05, "loss": 1.6435, "step": 953 }, { "epoch": 0.05317429351764116, "grad_norm": 0.5183125734329224, "learning_rate": 9.96612667149619e-05, "loss": 1.762, "step": 954 }, { "epoch": 0.053230031770804306, "grad_norm": 0.5197505950927734, "learning_rate": 9.966023121709192e-05, "loss": 1.8957, "step": 955 }, { "epoch": 0.05328577002396745, "grad_norm": 0.4871842563152313, "learning_rate": 9.965919414428896e-05, "loss": 1.8783, "step": 956 }, { "epoch": 0.05334150827713059, "grad_norm": 0.4965290427207947, "learning_rate": 9.965815549658589e-05, "loss": 1.8575, "step": 957 }, { "epoch": 0.05339724653029374, "grad_norm": 0.5005083680152893, "learning_rate": 9.965711527401567e-05, "loss": 1.7704, "step": 958 }, { "epoch": 0.053452984783456885, "grad_norm": 0.4561206102371216, "learning_rate": 9.965607347661125e-05, "loss": 1.6103, "step": 959 }, { "epoch": 0.053508723036620034, "grad_norm": 0.5352826714515686, "learning_rate": 9.965503010440571e-05, "loss": 1.9864, "step": 960 }, { "epoch": 0.05356446128978318, "grad_norm": 0.4568333327770233, "learning_rate": 9.965398515743212e-05, "loss": 1.7264, "step": 961 }, { "epoch": 0.05362019954294633, "grad_norm": 0.5570031404495239, "learning_rate": 9.965293863572363e-05, "loss": 2.2176, "step": 962 }, { "epoch": 0.05367593779610947, "grad_norm": 0.5380359888076782, "learning_rate": 9.96518905393134e-05, "loss": 2.0434, "step": 963 }, { "epoch": 0.05373167604927261, "grad_norm": 0.46430766582489014, "learning_rate": 9.965084086823472e-05, "loss": 1.4151, "step": 964 }, { "epoch": 0.05378741430243576, "grad_norm": 0.4653235077857971, "learning_rate": 9.964978962252085e-05, "loss": 1.7144, "step": 965 }, { "epoch": 0.053843152555598905, "grad_norm": 0.49018028378486633, "learning_rate": 9.964873680220512e-05, "loss": 1.6531, "step": 966 }, { "epoch": 0.053898890808762055, "grad_norm": 0.5718449354171753, "learning_rate": 9.964768240732093e-05, "loss": 1.9851, "step": 967 }, { "epoch": 0.0539546290619252, "grad_norm": 0.5048679113388062, "learning_rate": 9.964662643790173e-05, "loss": 1.9137, "step": 968 }, { "epoch": 0.05401036731508835, "grad_norm": 0.5291681885719299, "learning_rate": 9.9645568893981e-05, "loss": 1.8972, "step": 969 }, { "epoch": 0.05406610556825149, "grad_norm": 0.5041894316673279, "learning_rate": 9.964450977559226e-05, "loss": 1.5612, "step": 970 }, { "epoch": 0.054121843821414634, "grad_norm": 0.561788022518158, "learning_rate": 9.964344908276914e-05, "loss": 2.0708, "step": 971 }, { "epoch": 0.05417758207457778, "grad_norm": 0.4838697016239166, "learning_rate": 9.964238681554524e-05, "loss": 1.6573, "step": 972 }, { "epoch": 0.054233320327740926, "grad_norm": 0.5092923641204834, "learning_rate": 9.964132297395428e-05, "loss": 1.918, "step": 973 }, { "epoch": 0.054289058580904076, "grad_norm": 0.5128215551376343, "learning_rate": 9.964025755802997e-05, "loss": 1.721, "step": 974 }, { "epoch": 0.05434479683406722, "grad_norm": 0.597062885761261, "learning_rate": 9.963919056780612e-05, "loss": 1.9453, "step": 975 }, { "epoch": 0.05440053508723037, "grad_norm": 0.5623565316200256, "learning_rate": 9.963812200331656e-05, "loss": 1.9271, "step": 976 }, { "epoch": 0.05445627334039351, "grad_norm": 0.4568030834197998, "learning_rate": 9.963705186459517e-05, "loss": 1.5766, "step": 977 }, { "epoch": 0.05451201159355666, "grad_norm": 0.4906899631023407, "learning_rate": 9.963598015167592e-05, "loss": 1.7721, "step": 978 }, { "epoch": 0.054567749846719804, "grad_norm": 0.5041657090187073, "learning_rate": 9.963490686459277e-05, "loss": 1.6293, "step": 979 }, { "epoch": 0.05462348809988295, "grad_norm": 0.533762514591217, "learning_rate": 9.963383200337977e-05, "loss": 1.8723, "step": 980 }, { "epoch": 0.0546792263530461, "grad_norm": 0.4968359172344208, "learning_rate": 9.963275556807098e-05, "loss": 1.7368, "step": 981 }, { "epoch": 0.05473496460620924, "grad_norm": 0.4822302758693695, "learning_rate": 9.963167755870059e-05, "loss": 1.4994, "step": 982 }, { "epoch": 0.05479070285937239, "grad_norm": 0.5066803097724915, "learning_rate": 9.963059797530274e-05, "loss": 1.8058, "step": 983 }, { "epoch": 0.05484644111253553, "grad_norm": 0.518132209777832, "learning_rate": 9.96295168179117e-05, "loss": 1.7393, "step": 984 }, { "epoch": 0.05490217936569868, "grad_norm": 0.5607625842094421, "learning_rate": 9.962843408656176e-05, "loss": 2.149, "step": 985 }, { "epoch": 0.054957917618861825, "grad_norm": 0.5685406923294067, "learning_rate": 9.962734978128723e-05, "loss": 2.1734, "step": 986 }, { "epoch": 0.05501365587202497, "grad_norm": 0.5319599509239197, "learning_rate": 9.962626390212251e-05, "loss": 1.8782, "step": 987 }, { "epoch": 0.05506939412518812, "grad_norm": 0.4679426848888397, "learning_rate": 9.962517644910204e-05, "loss": 1.7033, "step": 988 }, { "epoch": 0.05512513237835126, "grad_norm": 0.5416939854621887, "learning_rate": 9.962408742226032e-05, "loss": 1.969, "step": 989 }, { "epoch": 0.05518087063151441, "grad_norm": 0.49005210399627686, "learning_rate": 9.962299682163186e-05, "loss": 1.8229, "step": 990 }, { "epoch": 0.05523660888467755, "grad_norm": 0.5170348286628723, "learning_rate": 9.962190464725128e-05, "loss": 1.8161, "step": 991 }, { "epoch": 0.0552923471378407, "grad_norm": 0.5188906192779541, "learning_rate": 9.962081089915319e-05, "loss": 1.938, "step": 992 }, { "epoch": 0.055348085391003846, "grad_norm": 0.4945777952671051, "learning_rate": 9.961971557737227e-05, "loss": 1.7414, "step": 993 }, { "epoch": 0.05540382364416699, "grad_norm": 0.511976420879364, "learning_rate": 9.96186186819433e-05, "loss": 1.8595, "step": 994 }, { "epoch": 0.05545956189733014, "grad_norm": 0.5381083488464355, "learning_rate": 9.961752021290103e-05, "loss": 1.8233, "step": 995 }, { "epoch": 0.05551530015049328, "grad_norm": 0.4679305851459503, "learning_rate": 9.961642017028033e-05, "loss": 1.6666, "step": 996 }, { "epoch": 0.05557103840365643, "grad_norm": 0.5513458847999573, "learning_rate": 9.961531855411603e-05, "loss": 2.0589, "step": 997 }, { "epoch": 0.055626776656819574, "grad_norm": 0.5168341994285583, "learning_rate": 9.961421536444313e-05, "loss": 2.0774, "step": 998 }, { "epoch": 0.055682514909982724, "grad_norm": 0.5111126899719238, "learning_rate": 9.961311060129659e-05, "loss": 1.5936, "step": 999 }, { "epoch": 0.055738253163145866, "grad_norm": 0.5352098941802979, "learning_rate": 9.961200426471142e-05, "loss": 1.8414, "step": 1000 }, { "epoch": 0.055793991416309016, "grad_norm": 0.47616758942604065, "learning_rate": 9.961089635472276e-05, "loss": 1.6496, "step": 1001 }, { "epoch": 0.05584972966947216, "grad_norm": 0.4767918288707733, "learning_rate": 9.96097868713657e-05, "loss": 1.3193, "step": 1002 }, { "epoch": 0.0559054679226353, "grad_norm": 0.46608811616897583, "learning_rate": 9.960867581467546e-05, "loss": 1.6453, "step": 1003 }, { "epoch": 0.05596120617579845, "grad_norm": 0.5042111277580261, "learning_rate": 9.960756318468726e-05, "loss": 1.8798, "step": 1004 }, { "epoch": 0.056016944428961594, "grad_norm": 0.5502855777740479, "learning_rate": 9.960644898143639e-05, "loss": 1.9322, "step": 1005 }, { "epoch": 0.056072682682124744, "grad_norm": 0.4749864935874939, "learning_rate": 9.960533320495818e-05, "loss": 1.5659, "step": 1006 }, { "epoch": 0.05612842093528789, "grad_norm": 0.4787498712539673, "learning_rate": 9.960421585528802e-05, "loss": 1.8482, "step": 1007 }, { "epoch": 0.05618415918845104, "grad_norm": 0.578971266746521, "learning_rate": 9.960309693246135e-05, "loss": 1.9905, "step": 1008 }, { "epoch": 0.05623989744161418, "grad_norm": 0.4983009099960327, "learning_rate": 9.960197643651363e-05, "loss": 1.722, "step": 1009 }, { "epoch": 0.05629563569477732, "grad_norm": 0.5528213977813721, "learning_rate": 9.960085436748044e-05, "loss": 1.8293, "step": 1010 }, { "epoch": 0.05635137394794047, "grad_norm": 0.49824774265289307, "learning_rate": 9.959973072539734e-05, "loss": 1.8081, "step": 1011 }, { "epoch": 0.056407112201103615, "grad_norm": 0.49810606241226196, "learning_rate": 9.959860551029996e-05, "loss": 1.5834, "step": 1012 }, { "epoch": 0.056462850454266765, "grad_norm": 0.515215277671814, "learning_rate": 9.9597478722224e-05, "loss": 1.8318, "step": 1013 }, { "epoch": 0.05651858870742991, "grad_norm": 0.5139912962913513, "learning_rate": 9.959635036120518e-05, "loss": 1.7475, "step": 1014 }, { "epoch": 0.05657432696059306, "grad_norm": 0.4912470579147339, "learning_rate": 9.959522042727932e-05, "loss": 1.6809, "step": 1015 }, { "epoch": 0.0566300652137562, "grad_norm": 0.4990215003490448, "learning_rate": 9.959408892048219e-05, "loss": 1.7024, "step": 1016 }, { "epoch": 0.05668580346691934, "grad_norm": 0.5626692771911621, "learning_rate": 9.959295584084974e-05, "loss": 1.9791, "step": 1017 }, { "epoch": 0.05674154172008249, "grad_norm": 0.4737264811992645, "learning_rate": 9.959182118841786e-05, "loss": 1.5592, "step": 1018 }, { "epoch": 0.056797279973245636, "grad_norm": 0.5367196798324585, "learning_rate": 9.959068496322256e-05, "loss": 2.012, "step": 1019 }, { "epoch": 0.056853018226408786, "grad_norm": 0.5062724947929382, "learning_rate": 9.958954716529987e-05, "loss": 1.6301, "step": 1020 }, { "epoch": 0.05690875647957193, "grad_norm": 0.5419873595237732, "learning_rate": 9.958840779468586e-05, "loss": 1.8351, "step": 1021 }, { "epoch": 0.05696449473273508, "grad_norm": 0.5291727781295776, "learning_rate": 9.958726685141668e-05, "loss": 1.8221, "step": 1022 }, { "epoch": 0.05702023298589822, "grad_norm": 0.5285983085632324, "learning_rate": 9.958612433552852e-05, "loss": 1.8575, "step": 1023 }, { "epoch": 0.05707597123906137, "grad_norm": 0.49050652980804443, "learning_rate": 9.95849802470576e-05, "loss": 1.7646, "step": 1024 }, { "epoch": 0.057131709492224514, "grad_norm": 0.49379006028175354, "learning_rate": 9.95838345860402e-05, "loss": 1.6789, "step": 1025 }, { "epoch": 0.05718744774538766, "grad_norm": 0.4859938621520996, "learning_rate": 9.958268735251266e-05, "loss": 1.8542, "step": 1026 }, { "epoch": 0.057243185998550807, "grad_norm": 0.5445101857185364, "learning_rate": 9.958153854651136e-05, "loss": 1.819, "step": 1027 }, { "epoch": 0.05729892425171395, "grad_norm": 0.5075321197509766, "learning_rate": 9.958038816807276e-05, "loss": 1.7872, "step": 1028 }, { "epoch": 0.0573546625048771, "grad_norm": 0.4982723593711853, "learning_rate": 9.957923621723329e-05, "loss": 1.8243, "step": 1029 }, { "epoch": 0.05741040075804024, "grad_norm": 0.49452096223831177, "learning_rate": 9.957808269402954e-05, "loss": 1.7316, "step": 1030 }, { "epoch": 0.05746613901120339, "grad_norm": 0.5450426936149597, "learning_rate": 9.957692759849806e-05, "loss": 2.0758, "step": 1031 }, { "epoch": 0.057521877264366535, "grad_norm": 0.5058251023292542, "learning_rate": 9.957577093067548e-05, "loss": 1.6588, "step": 1032 }, { "epoch": 0.05757761551752968, "grad_norm": 0.4902496039867401, "learning_rate": 9.957461269059851e-05, "loss": 1.8477, "step": 1033 }, { "epoch": 0.05763335377069283, "grad_norm": 0.5185796618461609, "learning_rate": 9.957345287830386e-05, "loss": 1.7541, "step": 1034 }, { "epoch": 0.05768909202385597, "grad_norm": 0.5609437227249146, "learning_rate": 9.95722914938283e-05, "loss": 1.8738, "step": 1035 }, { "epoch": 0.05774483027701912, "grad_norm": 0.47249266505241394, "learning_rate": 9.957112853720871e-05, "loss": 1.6668, "step": 1036 }, { "epoch": 0.05780056853018226, "grad_norm": 0.4762544333934784, "learning_rate": 9.956996400848191e-05, "loss": 1.5023, "step": 1037 }, { "epoch": 0.05785630678334541, "grad_norm": 0.5092499852180481, "learning_rate": 9.956879790768489e-05, "loss": 1.7614, "step": 1038 }, { "epoch": 0.057912045036508555, "grad_norm": 0.4864351451396942, "learning_rate": 9.95676302348546e-05, "loss": 1.7874, "step": 1039 }, { "epoch": 0.0579677832896717, "grad_norm": 0.5312706828117371, "learning_rate": 9.956646099002807e-05, "loss": 1.7864, "step": 1040 }, { "epoch": 0.05802352154283485, "grad_norm": 0.5099919438362122, "learning_rate": 9.95652901732424e-05, "loss": 1.9396, "step": 1041 }, { "epoch": 0.05807925979599799, "grad_norm": 0.4992043375968933, "learning_rate": 9.95641177845347e-05, "loss": 1.8373, "step": 1042 }, { "epoch": 0.05813499804916114, "grad_norm": 0.557106614112854, "learning_rate": 9.956294382394218e-05, "loss": 2.0565, "step": 1043 }, { "epoch": 0.058190736302324284, "grad_norm": 0.5183643102645874, "learning_rate": 9.956176829150204e-05, "loss": 1.837, "step": 1044 }, { "epoch": 0.05824647455548743, "grad_norm": 0.4911157488822937, "learning_rate": 9.956059118725158e-05, "loss": 1.736, "step": 1045 }, { "epoch": 0.058302212808650576, "grad_norm": 0.524387538433075, "learning_rate": 9.955941251122812e-05, "loss": 1.9561, "step": 1046 }, { "epoch": 0.058357951061813726, "grad_norm": 0.4891200065612793, "learning_rate": 9.955823226346905e-05, "loss": 1.723, "step": 1047 }, { "epoch": 0.05841368931497687, "grad_norm": 0.5014610886573792, "learning_rate": 9.95570504440118e-05, "loss": 1.6632, "step": 1048 }, { "epoch": 0.05846942756814001, "grad_norm": 0.46674925088882446, "learning_rate": 9.955586705289386e-05, "loss": 1.5877, "step": 1049 }, { "epoch": 0.05852516582130316, "grad_norm": 0.5613251328468323, "learning_rate": 9.955468209015273e-05, "loss": 2.0043, "step": 1050 }, { "epoch": 0.058580904074466304, "grad_norm": 0.49603840708732605, "learning_rate": 9.9553495555826e-05, "loss": 1.7604, "step": 1051 }, { "epoch": 0.058636642327629454, "grad_norm": 0.5199983716011047, "learning_rate": 9.955230744995132e-05, "loss": 1.8945, "step": 1052 }, { "epoch": 0.0586923805807926, "grad_norm": 0.5177999138832092, "learning_rate": 9.955111777256635e-05, "loss": 1.9154, "step": 1053 }, { "epoch": 0.05874811883395575, "grad_norm": 0.49996909499168396, "learning_rate": 9.954992652370885e-05, "loss": 1.6888, "step": 1054 }, { "epoch": 0.05880385708711889, "grad_norm": 0.5143979787826538, "learning_rate": 9.954873370341656e-05, "loss": 1.7544, "step": 1055 }, { "epoch": 0.05885959534028203, "grad_norm": 0.498963862657547, "learning_rate": 9.954753931172733e-05, "loss": 1.9448, "step": 1056 }, { "epoch": 0.05891533359344518, "grad_norm": 0.5648823976516724, "learning_rate": 9.954634334867902e-05, "loss": 2.0281, "step": 1057 }, { "epoch": 0.058971071846608325, "grad_norm": 0.4741098880767822, "learning_rate": 9.95451458143096e-05, "loss": 1.7383, "step": 1058 }, { "epoch": 0.059026810099771475, "grad_norm": 0.5303511023521423, "learning_rate": 9.9543946708657e-05, "loss": 1.9047, "step": 1059 }, { "epoch": 0.05908254835293462, "grad_norm": 0.6070243716239929, "learning_rate": 9.95427460317593e-05, "loss": 2.1998, "step": 1060 }, { "epoch": 0.05913828660609777, "grad_norm": 0.509857177734375, "learning_rate": 9.954154378365453e-05, "loss": 1.9788, "step": 1061 }, { "epoch": 0.05919402485926091, "grad_norm": 0.4909118711948395, "learning_rate": 9.954033996438084e-05, "loss": 1.7906, "step": 1062 }, { "epoch": 0.05924976311242406, "grad_norm": 0.5275348424911499, "learning_rate": 9.95391345739764e-05, "loss": 1.9644, "step": 1063 }, { "epoch": 0.0593055013655872, "grad_norm": 0.5134482979774475, "learning_rate": 9.953792761247946e-05, "loss": 1.7528, "step": 1064 }, { "epoch": 0.059361239618750346, "grad_norm": 0.4846155345439911, "learning_rate": 9.953671907992827e-05, "loss": 1.7198, "step": 1065 }, { "epoch": 0.059416977871913496, "grad_norm": 0.508575975894928, "learning_rate": 9.953550897636117e-05, "loss": 1.8502, "step": 1066 }, { "epoch": 0.05947271612507664, "grad_norm": 0.6168702244758606, "learning_rate": 9.953429730181653e-05, "loss": 1.8859, "step": 1067 }, { "epoch": 0.05952845437823979, "grad_norm": 0.5224670767784119, "learning_rate": 9.953308405633281e-05, "loss": 1.9667, "step": 1068 }, { "epoch": 0.05958419263140293, "grad_norm": 0.5521063208580017, "learning_rate": 9.953186923994845e-05, "loss": 1.9502, "step": 1069 }, { "epoch": 0.05963993088456608, "grad_norm": 0.5243295431137085, "learning_rate": 9.953065285270198e-05, "loss": 1.7872, "step": 1070 }, { "epoch": 0.059695669137729224, "grad_norm": 0.457383394241333, "learning_rate": 9.952943489463199e-05, "loss": 1.4861, "step": 1071 }, { "epoch": 0.059751407390892367, "grad_norm": 0.5042887330055237, "learning_rate": 9.95282153657771e-05, "loss": 1.8046, "step": 1072 }, { "epoch": 0.059807145644055516, "grad_norm": 0.5393437147140503, "learning_rate": 9.9526994266176e-05, "loss": 2.0209, "step": 1073 }, { "epoch": 0.05986288389721866, "grad_norm": 0.5133099555969238, "learning_rate": 9.952577159586739e-05, "loss": 2.0277, "step": 1074 }, { "epoch": 0.05991862215038181, "grad_norm": 0.538661539554596, "learning_rate": 9.952454735489007e-05, "loss": 1.9108, "step": 1075 }, { "epoch": 0.05997436040354495, "grad_norm": 0.5276675224304199, "learning_rate": 9.952332154328286e-05, "loss": 2.0656, "step": 1076 }, { "epoch": 0.0600300986567081, "grad_norm": 0.5048499703407288, "learning_rate": 9.952209416108461e-05, "loss": 1.757, "step": 1077 }, { "epoch": 0.060085836909871244, "grad_norm": 0.5175162553787231, "learning_rate": 9.952086520833428e-05, "loss": 1.7967, "step": 1078 }, { "epoch": 0.06014157516303439, "grad_norm": 0.5084596276283264, "learning_rate": 9.951963468507084e-05, "loss": 1.705, "step": 1079 }, { "epoch": 0.06019731341619754, "grad_norm": 0.45831501483917236, "learning_rate": 9.95184025913333e-05, "loss": 1.6394, "step": 1080 }, { "epoch": 0.06025305166936068, "grad_norm": 0.47496846318244934, "learning_rate": 9.951716892716074e-05, "loss": 1.5622, "step": 1081 }, { "epoch": 0.06030878992252383, "grad_norm": 0.5142143964767456, "learning_rate": 9.951593369259229e-05, "loss": 1.943, "step": 1082 }, { "epoch": 0.06036452817568697, "grad_norm": 0.4750124216079712, "learning_rate": 9.951469688766712e-05, "loss": 1.7855, "step": 1083 }, { "epoch": 0.06042026642885012, "grad_norm": 0.5169959664344788, "learning_rate": 9.951345851242445e-05, "loss": 1.8589, "step": 1084 }, { "epoch": 0.060476004682013265, "grad_norm": 0.4891696572303772, "learning_rate": 9.951221856690355e-05, "loss": 1.8431, "step": 1085 }, { "epoch": 0.060531742935176415, "grad_norm": 0.49664726853370667, "learning_rate": 9.951097705114378e-05, "loss": 1.8495, "step": 1086 }, { "epoch": 0.06058748118833956, "grad_norm": 0.4737338423728943, "learning_rate": 9.950973396518449e-05, "loss": 1.6244, "step": 1087 }, { "epoch": 0.0606432194415027, "grad_norm": 0.4466894865036011, "learning_rate": 9.950848930906506e-05, "loss": 1.569, "step": 1088 }, { "epoch": 0.06069895769466585, "grad_norm": 0.5531814694404602, "learning_rate": 9.950724308282504e-05, "loss": 1.8739, "step": 1089 }, { "epoch": 0.06075469594782899, "grad_norm": 0.5358182191848755, "learning_rate": 9.95059952865039e-05, "loss": 1.5985, "step": 1090 }, { "epoch": 0.06081043420099214, "grad_norm": 0.5551037788391113, "learning_rate": 9.950474592014123e-05, "loss": 1.9313, "step": 1091 }, { "epoch": 0.060866172454155286, "grad_norm": 0.46842116117477417, "learning_rate": 9.950349498377666e-05, "loss": 1.5846, "step": 1092 }, { "epoch": 0.060921910707318436, "grad_norm": 0.5490810871124268, "learning_rate": 9.950224247744986e-05, "loss": 1.7246, "step": 1093 }, { "epoch": 0.06097764896048158, "grad_norm": 0.46604838967323303, "learning_rate": 9.950098840120055e-05, "loss": 1.3499, "step": 1094 }, { "epoch": 0.06103338721364472, "grad_norm": 0.4957679808139801, "learning_rate": 9.949973275506847e-05, "loss": 1.7099, "step": 1095 }, { "epoch": 0.06108912546680787, "grad_norm": 0.5058358907699585, "learning_rate": 9.94984755390935e-05, "loss": 2.0376, "step": 1096 }, { "epoch": 0.061144863719971014, "grad_norm": 0.5344205498695374, "learning_rate": 9.949721675331546e-05, "loss": 1.8721, "step": 1097 }, { "epoch": 0.061200601973134164, "grad_norm": 0.5005959272384644, "learning_rate": 9.94959563977743e-05, "loss": 1.8502, "step": 1098 }, { "epoch": 0.06125634022629731, "grad_norm": 0.5033101439476013, "learning_rate": 9.949469447250998e-05, "loss": 1.762, "step": 1099 }, { "epoch": 0.061312078479460456, "grad_norm": 0.489114373922348, "learning_rate": 9.949343097756253e-05, "loss": 1.779, "step": 1100 }, { "epoch": 0.0613678167326236, "grad_norm": 0.49902451038360596, "learning_rate": 9.949216591297203e-05, "loss": 1.6705, "step": 1101 }, { "epoch": 0.06142355498578674, "grad_norm": 0.5019201636314392, "learning_rate": 9.949089927877858e-05, "loss": 1.6734, "step": 1102 }, { "epoch": 0.06147929323894989, "grad_norm": 0.5644415020942688, "learning_rate": 9.948963107502235e-05, "loss": 2.0193, "step": 1103 }, { "epoch": 0.061535031492113035, "grad_norm": 0.55086749792099, "learning_rate": 9.948836130174358e-05, "loss": 1.9377, "step": 1104 }, { "epoch": 0.061590769745276185, "grad_norm": 0.48262813687324524, "learning_rate": 9.94870899589825e-05, "loss": 1.6455, "step": 1105 }, { "epoch": 0.06164650799843933, "grad_norm": 0.5041834115982056, "learning_rate": 9.948581704677949e-05, "loss": 1.9186, "step": 1106 }, { "epoch": 0.06170224625160248, "grad_norm": 0.5112140774726868, "learning_rate": 9.948454256517486e-05, "loss": 1.9353, "step": 1107 }, { "epoch": 0.06175798450476562, "grad_norm": 0.5558189749717712, "learning_rate": 9.948326651420907e-05, "loss": 1.6834, "step": 1108 }, { "epoch": 0.06181372275792877, "grad_norm": 0.5652199983596802, "learning_rate": 9.948198889392255e-05, "loss": 1.8998, "step": 1109 }, { "epoch": 0.06186946101109191, "grad_norm": 0.5617989301681519, "learning_rate": 9.948070970435587e-05, "loss": 2.1707, "step": 1110 }, { "epoch": 0.061925199264255056, "grad_norm": 0.5738351941108704, "learning_rate": 9.947942894554956e-05, "loss": 1.9854, "step": 1111 }, { "epoch": 0.061980937517418205, "grad_norm": 0.4870631694793701, "learning_rate": 9.947814661754425e-05, "loss": 1.6627, "step": 1112 }, { "epoch": 0.06203667577058135, "grad_norm": 0.5056869387626648, "learning_rate": 9.947686272038059e-05, "loss": 2.0686, "step": 1113 }, { "epoch": 0.0620924140237445, "grad_norm": 0.47897595167160034, "learning_rate": 9.947557725409934e-05, "loss": 1.7178, "step": 1114 }, { "epoch": 0.06214815227690764, "grad_norm": 0.5754001140594482, "learning_rate": 9.947429021874123e-05, "loss": 1.9185, "step": 1115 }, { "epoch": 0.06220389053007079, "grad_norm": 0.5134566426277161, "learning_rate": 9.94730016143471e-05, "loss": 1.7684, "step": 1116 }, { "epoch": 0.06225962878323393, "grad_norm": 0.5307061076164246, "learning_rate": 9.947171144095779e-05, "loss": 1.8471, "step": 1117 }, { "epoch": 0.062315367036397076, "grad_norm": 0.5750778913497925, "learning_rate": 9.947041969861424e-05, "loss": 2.0452, "step": 1118 }, { "epoch": 0.062371105289560226, "grad_norm": 0.4882142245769501, "learning_rate": 9.946912638735741e-05, "loss": 1.6376, "step": 1119 }, { "epoch": 0.06242684354272337, "grad_norm": 0.5403459668159485, "learning_rate": 9.946783150722832e-05, "loss": 1.7909, "step": 1120 }, { "epoch": 0.06248258179588652, "grad_norm": 0.6261606812477112, "learning_rate": 9.946653505826802e-05, "loss": 2.3971, "step": 1121 }, { "epoch": 0.06253832004904966, "grad_norm": 0.5000771880149841, "learning_rate": 9.946523704051765e-05, "loss": 1.6772, "step": 1122 }, { "epoch": 0.0625940583022128, "grad_norm": 0.5789170265197754, "learning_rate": 9.946393745401836e-05, "loss": 1.5496, "step": 1123 }, { "epoch": 0.06264979655537596, "grad_norm": 0.5486829280853271, "learning_rate": 9.946263629881137e-05, "loss": 1.926, "step": 1124 }, { "epoch": 0.0627055348085391, "grad_norm": 0.4877256751060486, "learning_rate": 9.946133357493794e-05, "loss": 1.8916, "step": 1125 }, { "epoch": 0.06276127306170225, "grad_norm": 0.505279541015625, "learning_rate": 9.946002928243939e-05, "loss": 1.7043, "step": 1126 }, { "epoch": 0.06281701131486539, "grad_norm": 0.5650628805160522, "learning_rate": 9.945872342135709e-05, "loss": 2.0595, "step": 1127 }, { "epoch": 0.06287274956802853, "grad_norm": 0.5424087047576904, "learning_rate": 9.945741599173244e-05, "loss": 1.7227, "step": 1128 }, { "epoch": 0.06292848782119169, "grad_norm": 0.5090418457984924, "learning_rate": 9.945610699360692e-05, "loss": 1.7466, "step": 1129 }, { "epoch": 0.06298422607435483, "grad_norm": 0.5532562732696533, "learning_rate": 9.945479642702203e-05, "loss": 1.9668, "step": 1130 }, { "epoch": 0.06303996432751797, "grad_norm": 0.4829805791378021, "learning_rate": 9.945348429201933e-05, "loss": 1.664, "step": 1131 }, { "epoch": 0.06309570258068112, "grad_norm": 0.5276423096656799, "learning_rate": 9.945217058864045e-05, "loss": 1.7043, "step": 1132 }, { "epoch": 0.06315144083384426, "grad_norm": 0.49455907940864563, "learning_rate": 9.945085531692704e-05, "loss": 1.6095, "step": 1133 }, { "epoch": 0.06320717908700742, "grad_norm": 0.49773842096328735, "learning_rate": 9.944953847692082e-05, "loss": 1.6696, "step": 1134 }, { "epoch": 0.06326291734017056, "grad_norm": 0.5351307988166809, "learning_rate": 9.944822006866356e-05, "loss": 1.8795, "step": 1135 }, { "epoch": 0.0633186555933337, "grad_norm": 0.5688774585723877, "learning_rate": 9.944690009219705e-05, "loss": 1.6658, "step": 1136 }, { "epoch": 0.06337439384649685, "grad_norm": 0.5083485841751099, "learning_rate": 9.944557854756316e-05, "loss": 1.5768, "step": 1137 }, { "epoch": 0.06343013209966, "grad_norm": 0.5670489072799683, "learning_rate": 9.944425543480382e-05, "loss": 1.9228, "step": 1138 }, { "epoch": 0.06348587035282315, "grad_norm": 0.49227067828178406, "learning_rate": 9.944293075396098e-05, "loss": 1.5889, "step": 1139 }, { "epoch": 0.06354160860598629, "grad_norm": 0.5258840918540955, "learning_rate": 9.944160450507665e-05, "loss": 1.7821, "step": 1140 }, { "epoch": 0.06359734685914943, "grad_norm": 0.5238833427429199, "learning_rate": 9.944027668819286e-05, "loss": 1.6987, "step": 1141 }, { "epoch": 0.06365308511231257, "grad_norm": 0.45374488830566406, "learning_rate": 9.943894730335179e-05, "loss": 1.4687, "step": 1142 }, { "epoch": 0.06370882336547573, "grad_norm": 0.496855765581131, "learning_rate": 9.943761635059554e-05, "loss": 1.6539, "step": 1143 }, { "epoch": 0.06376456161863887, "grad_norm": 0.5250856876373291, "learning_rate": 9.943628382996634e-05, "loss": 1.9439, "step": 1144 }, { "epoch": 0.06382029987180202, "grad_norm": 0.49122875928878784, "learning_rate": 9.943494974150644e-05, "loss": 1.6248, "step": 1145 }, { "epoch": 0.06387603812496516, "grad_norm": 0.5038126111030579, "learning_rate": 9.943361408525818e-05, "loss": 1.8027, "step": 1146 }, { "epoch": 0.06393177637812832, "grad_norm": 0.5918904542922974, "learning_rate": 9.94322768612639e-05, "loss": 2.1447, "step": 1147 }, { "epoch": 0.06398751463129146, "grad_norm": 0.46479690074920654, "learning_rate": 9.943093806956601e-05, "loss": 1.8147, "step": 1148 }, { "epoch": 0.0640432528844546, "grad_norm": 0.5129300355911255, "learning_rate": 9.942959771020694e-05, "loss": 1.9251, "step": 1149 }, { "epoch": 0.06409899113761774, "grad_norm": 0.5755007266998291, "learning_rate": 9.942825578322926e-05, "loss": 1.9842, "step": 1150 }, { "epoch": 0.06415472939078089, "grad_norm": 0.4916748106479645, "learning_rate": 9.942691228867546e-05, "loss": 1.7163, "step": 1151 }, { "epoch": 0.06421046764394404, "grad_norm": 0.5524545311927795, "learning_rate": 9.94255672265882e-05, "loss": 1.8273, "step": 1152 }, { "epoch": 0.06426620589710719, "grad_norm": 0.5353971719741821, "learning_rate": 9.942422059701012e-05, "loss": 1.8914, "step": 1153 }, { "epoch": 0.06432194415027033, "grad_norm": 0.48068755865097046, "learning_rate": 9.942287239998392e-05, "loss": 1.7668, "step": 1154 }, { "epoch": 0.06437768240343347, "grad_norm": 0.48459264636039734, "learning_rate": 9.942152263555237e-05, "loss": 1.5809, "step": 1155 }, { "epoch": 0.06443342065659662, "grad_norm": 0.5255505442619324, "learning_rate": 9.942017130375825e-05, "loss": 1.8543, "step": 1156 }, { "epoch": 0.06448915890975977, "grad_norm": 0.5935083627700806, "learning_rate": 9.941881840464447e-05, "loss": 1.7744, "step": 1157 }, { "epoch": 0.06454489716292292, "grad_norm": 0.5216168761253357, "learning_rate": 9.941746393825386e-05, "loss": 1.5802, "step": 1158 }, { "epoch": 0.06460063541608606, "grad_norm": 0.5127310752868652, "learning_rate": 9.941610790462946e-05, "loss": 1.8704, "step": 1159 }, { "epoch": 0.0646563736692492, "grad_norm": 0.5310918688774109, "learning_rate": 9.94147503038142e-05, "loss": 1.7503, "step": 1160 }, { "epoch": 0.06471211192241236, "grad_norm": 0.5417837500572205, "learning_rate": 9.941339113585117e-05, "loss": 1.7069, "step": 1161 }, { "epoch": 0.0647678501755755, "grad_norm": 0.46583306789398193, "learning_rate": 9.94120304007835e-05, "loss": 1.6529, "step": 1162 }, { "epoch": 0.06482358842873864, "grad_norm": 0.5210421681404114, "learning_rate": 9.941066809865429e-05, "loss": 1.8965, "step": 1163 }, { "epoch": 0.06487932668190179, "grad_norm": 0.4983007311820984, "learning_rate": 9.940930422950679e-05, "loss": 1.797, "step": 1164 }, { "epoch": 0.06493506493506493, "grad_norm": 0.5835360884666443, "learning_rate": 9.940793879338424e-05, "loss": 1.9707, "step": 1165 }, { "epoch": 0.06499080318822809, "grad_norm": 0.48875924944877625, "learning_rate": 9.940657179032993e-05, "loss": 1.8563, "step": 1166 }, { "epoch": 0.06504654144139123, "grad_norm": 0.4999620020389557, "learning_rate": 9.940520322038722e-05, "loss": 1.6063, "step": 1167 }, { "epoch": 0.06510227969455437, "grad_norm": 0.49378272891044617, "learning_rate": 9.940383308359951e-05, "loss": 1.8387, "step": 1168 }, { "epoch": 0.06515801794771751, "grad_norm": 0.44992733001708984, "learning_rate": 9.940246138001027e-05, "loss": 1.4808, "step": 1169 }, { "epoch": 0.06521375620088067, "grad_norm": 0.5133140683174133, "learning_rate": 9.9401088109663e-05, "loss": 1.9234, "step": 1170 }, { "epoch": 0.06526949445404381, "grad_norm": 0.6143995523452759, "learning_rate": 9.939971327260122e-05, "loss": 2.1587, "step": 1171 }, { "epoch": 0.06532523270720696, "grad_norm": 0.5144213438034058, "learning_rate": 9.939833686886857e-05, "loss": 1.8453, "step": 1172 }, { "epoch": 0.0653809709603701, "grad_norm": 0.48773664236068726, "learning_rate": 9.939695889850869e-05, "loss": 1.7421, "step": 1173 }, { "epoch": 0.06543670921353324, "grad_norm": 0.48457232117652893, "learning_rate": 9.939557936156527e-05, "loss": 1.7447, "step": 1174 }, { "epoch": 0.0654924474666964, "grad_norm": 0.48477059602737427, "learning_rate": 9.939419825808207e-05, "loss": 1.5579, "step": 1175 }, { "epoch": 0.06554818571985954, "grad_norm": 0.5835525393486023, "learning_rate": 9.93928155881029e-05, "loss": 2.1224, "step": 1176 }, { "epoch": 0.06560392397302268, "grad_norm": 0.5277059078216553, "learning_rate": 9.939143135167158e-05, "loss": 1.8331, "step": 1177 }, { "epoch": 0.06565966222618583, "grad_norm": 0.5046493411064148, "learning_rate": 9.939004554883205e-05, "loss": 1.7895, "step": 1178 }, { "epoch": 0.06571540047934897, "grad_norm": 0.5206563472747803, "learning_rate": 9.938865817962822e-05, "loss": 1.7342, "step": 1179 }, { "epoch": 0.06577113873251213, "grad_norm": 0.43598276376724243, "learning_rate": 9.938726924410412e-05, "loss": 1.5657, "step": 1180 }, { "epoch": 0.06582687698567527, "grad_norm": 0.49584537744522095, "learning_rate": 9.938587874230379e-05, "loss": 1.7487, "step": 1181 }, { "epoch": 0.06588261523883841, "grad_norm": 0.539125382900238, "learning_rate": 9.938448667427131e-05, "loss": 1.8534, "step": 1182 }, { "epoch": 0.06593835349200156, "grad_norm": 0.4833453595638275, "learning_rate": 9.938309304005086e-05, "loss": 1.6074, "step": 1183 }, { "epoch": 0.06599409174516471, "grad_norm": 0.5339459180831909, "learning_rate": 9.938169783968663e-05, "loss": 1.7358, "step": 1184 }, { "epoch": 0.06604982999832786, "grad_norm": 0.5234376788139343, "learning_rate": 9.938030107322283e-05, "loss": 1.5923, "step": 1185 }, { "epoch": 0.066105568251491, "grad_norm": 0.5175224542617798, "learning_rate": 9.93789027407038e-05, "loss": 1.8394, "step": 1186 }, { "epoch": 0.06616130650465414, "grad_norm": 0.5155382752418518, "learning_rate": 9.937750284217389e-05, "loss": 1.6385, "step": 1187 }, { "epoch": 0.06621704475781728, "grad_norm": 0.47023966908454895, "learning_rate": 9.937610137767747e-05, "loss": 1.6236, "step": 1188 }, { "epoch": 0.06627278301098044, "grad_norm": 0.4659249484539032, "learning_rate": 9.937469834725898e-05, "loss": 1.6139, "step": 1189 }, { "epoch": 0.06632852126414358, "grad_norm": 0.4964550733566284, "learning_rate": 9.937329375096297e-05, "loss": 1.62, "step": 1190 }, { "epoch": 0.06638425951730673, "grad_norm": 0.5324812531471252, "learning_rate": 9.937188758883393e-05, "loss": 1.8803, "step": 1191 }, { "epoch": 0.06643999777046987, "grad_norm": 0.5404229164123535, "learning_rate": 9.937047986091646e-05, "loss": 1.9219, "step": 1192 }, { "epoch": 0.06649573602363303, "grad_norm": 0.49228188395500183, "learning_rate": 9.936907056725524e-05, "loss": 1.7777, "step": 1193 }, { "epoch": 0.06655147427679617, "grad_norm": 0.5689822435379028, "learning_rate": 9.936765970789492e-05, "loss": 1.9888, "step": 1194 }, { "epoch": 0.06660721252995931, "grad_norm": 0.5374904274940491, "learning_rate": 9.936624728288029e-05, "loss": 1.6308, "step": 1195 }, { "epoch": 0.06666295078312245, "grad_norm": 0.48381903767585754, "learning_rate": 9.93648332922561e-05, "loss": 1.6621, "step": 1196 }, { "epoch": 0.0667186890362856, "grad_norm": 0.5000702738761902, "learning_rate": 9.936341773606723e-05, "loss": 1.6883, "step": 1197 }, { "epoch": 0.06677442728944875, "grad_norm": 0.4849522113800049, "learning_rate": 9.936200061435857e-05, "loss": 1.6099, "step": 1198 }, { "epoch": 0.0668301655426119, "grad_norm": 0.5355091094970703, "learning_rate": 9.936058192717502e-05, "loss": 1.725, "step": 1199 }, { "epoch": 0.06688590379577504, "grad_norm": 0.4482690095901489, "learning_rate": 9.935916167456163e-05, "loss": 1.5314, "step": 1200 }, { "epoch": 0.06694164204893818, "grad_norm": 0.4166151285171509, "learning_rate": 9.93577398565634e-05, "loss": 1.094, "step": 1201 }, { "epoch": 0.06699738030210133, "grad_norm": 0.569545328617096, "learning_rate": 9.935631647322544e-05, "loss": 1.9806, "step": 1202 }, { "epoch": 0.06705311855526448, "grad_norm": 0.528708279132843, "learning_rate": 9.93548915245929e-05, "loss": 1.7586, "step": 1203 }, { "epoch": 0.06710885680842762, "grad_norm": 0.48107293248176575, "learning_rate": 9.935346501071095e-05, "loss": 1.6344, "step": 1204 }, { "epoch": 0.06716459506159077, "grad_norm": 0.5078762769699097, "learning_rate": 9.935203693162483e-05, "loss": 1.7792, "step": 1205 }, { "epoch": 0.06722033331475391, "grad_norm": 0.4985436797142029, "learning_rate": 9.935060728737986e-05, "loss": 1.8226, "step": 1206 }, { "epoch": 0.06727607156791707, "grad_norm": 0.5001996755599976, "learning_rate": 9.934917607802135e-05, "loss": 1.65, "step": 1207 }, { "epoch": 0.06733180982108021, "grad_norm": 0.4552146792411804, "learning_rate": 9.934774330359471e-05, "loss": 1.5889, "step": 1208 }, { "epoch": 0.06738754807424335, "grad_norm": 0.4674372673034668, "learning_rate": 9.934630896414536e-05, "loss": 1.6367, "step": 1209 }, { "epoch": 0.0674432863274065, "grad_norm": 0.4658129811286926, "learning_rate": 9.93448730597188e-05, "loss": 1.6565, "step": 1210 }, { "epoch": 0.06749902458056964, "grad_norm": 0.4953976273536682, "learning_rate": 9.934343559036056e-05, "loss": 1.7874, "step": 1211 }, { "epoch": 0.0675547628337328, "grad_norm": 0.5296363830566406, "learning_rate": 9.934199655611624e-05, "loss": 1.4178, "step": 1212 }, { "epoch": 0.06761050108689594, "grad_norm": 0.5114982724189758, "learning_rate": 9.934055595703149e-05, "loss": 1.8371, "step": 1213 }, { "epoch": 0.06766623934005908, "grad_norm": 0.54044109582901, "learning_rate": 9.933911379315198e-05, "loss": 1.77, "step": 1214 }, { "epoch": 0.06772197759322222, "grad_norm": 0.5306605100631714, "learning_rate": 9.933767006452341e-05, "loss": 1.7457, "step": 1215 }, { "epoch": 0.06777771584638538, "grad_norm": 0.45446470379829407, "learning_rate": 9.933622477119165e-05, "loss": 1.4759, "step": 1216 }, { "epoch": 0.06783345409954852, "grad_norm": 0.5077145099639893, "learning_rate": 9.933477791320246e-05, "loss": 1.5853, "step": 1217 }, { "epoch": 0.06788919235271167, "grad_norm": 0.4767955541610718, "learning_rate": 9.933332949060177e-05, "loss": 1.624, "step": 1218 }, { "epoch": 0.06794493060587481, "grad_norm": 0.5637747049331665, "learning_rate": 9.93318795034355e-05, "loss": 1.9126, "step": 1219 }, { "epoch": 0.06800066885903795, "grad_norm": 0.5085890889167786, "learning_rate": 9.933042795174963e-05, "loss": 1.7807, "step": 1220 }, { "epoch": 0.06805640711220111, "grad_norm": 0.539089024066925, "learning_rate": 9.93289748355902e-05, "loss": 1.8777, "step": 1221 }, { "epoch": 0.06811214536536425, "grad_norm": 0.557056725025177, "learning_rate": 9.93275201550033e-05, "loss": 1.7479, "step": 1222 }, { "epoch": 0.0681678836185274, "grad_norm": 0.5699108839035034, "learning_rate": 9.932606391003508e-05, "loss": 1.9158, "step": 1223 }, { "epoch": 0.06822362187169054, "grad_norm": 0.5341405868530273, "learning_rate": 9.932460610073167e-05, "loss": 1.7554, "step": 1224 }, { "epoch": 0.06827936012485368, "grad_norm": 0.6143330335617065, "learning_rate": 9.932314672713936e-05, "loss": 1.7927, "step": 1225 }, { "epoch": 0.06833509837801684, "grad_norm": 0.500853419303894, "learning_rate": 9.932168578930439e-05, "loss": 1.7221, "step": 1226 }, { "epoch": 0.06839083663117998, "grad_norm": 0.5622022151947021, "learning_rate": 9.932022328727313e-05, "loss": 2.0262, "step": 1227 }, { "epoch": 0.06844657488434312, "grad_norm": 0.4860107898712158, "learning_rate": 9.931875922109195e-05, "loss": 1.7353, "step": 1228 }, { "epoch": 0.06850231313750627, "grad_norm": 0.5524904131889343, "learning_rate": 9.931729359080726e-05, "loss": 1.8789, "step": 1229 }, { "epoch": 0.06855805139066942, "grad_norm": 0.5192303657531738, "learning_rate": 9.931582639646556e-05, "loss": 1.9549, "step": 1230 }, { "epoch": 0.06861378964383257, "grad_norm": 0.47247666120529175, "learning_rate": 9.931435763811338e-05, "loss": 1.7371, "step": 1231 }, { "epoch": 0.06866952789699571, "grad_norm": 0.5242395401000977, "learning_rate": 9.93128873157973e-05, "loss": 1.8187, "step": 1232 }, { "epoch": 0.06872526615015885, "grad_norm": 0.4895036816596985, "learning_rate": 9.931141542956394e-05, "loss": 1.6269, "step": 1233 }, { "epoch": 0.068781004403322, "grad_norm": 0.5657653212547302, "learning_rate": 9.930994197945999e-05, "loss": 1.9831, "step": 1234 }, { "epoch": 0.06883674265648515, "grad_norm": 0.5430802702903748, "learning_rate": 9.930846696553219e-05, "loss": 1.9577, "step": 1235 }, { "epoch": 0.0688924809096483, "grad_norm": 0.6241572499275208, "learning_rate": 9.930699038782729e-05, "loss": 1.7921, "step": 1236 }, { "epoch": 0.06894821916281144, "grad_norm": 0.5370758175849915, "learning_rate": 9.930551224639215e-05, "loss": 1.921, "step": 1237 }, { "epoch": 0.06900395741597458, "grad_norm": 0.5141679048538208, "learning_rate": 9.930403254127363e-05, "loss": 1.8209, "step": 1238 }, { "epoch": 0.06905969566913774, "grad_norm": 0.511951208114624, "learning_rate": 9.930255127251866e-05, "loss": 1.9209, "step": 1239 }, { "epoch": 0.06911543392230088, "grad_norm": 0.5124894976615906, "learning_rate": 9.93010684401742e-05, "loss": 1.9073, "step": 1240 }, { "epoch": 0.06917117217546402, "grad_norm": 0.49549224972724915, "learning_rate": 9.929958404428732e-05, "loss": 1.6648, "step": 1241 }, { "epoch": 0.06922691042862716, "grad_norm": 0.4937445819377899, "learning_rate": 9.929809808490505e-05, "loss": 1.6878, "step": 1242 }, { "epoch": 0.06928264868179031, "grad_norm": 0.5082506537437439, "learning_rate": 9.929661056207455e-05, "loss": 1.8051, "step": 1243 }, { "epoch": 0.06933838693495346, "grad_norm": 0.5111956596374512, "learning_rate": 9.929512147584297e-05, "loss": 1.7016, "step": 1244 }, { "epoch": 0.0693941251881166, "grad_norm": 0.46468988060951233, "learning_rate": 9.929363082625755e-05, "loss": 1.7512, "step": 1245 }, { "epoch": 0.06944986344127975, "grad_norm": 0.5274616479873657, "learning_rate": 9.929213861336557e-05, "loss": 1.7578, "step": 1246 }, { "epoch": 0.06950560169444289, "grad_norm": 0.5274865031242371, "learning_rate": 9.929064483721435e-05, "loss": 1.7655, "step": 1247 }, { "epoch": 0.06956133994760604, "grad_norm": 0.5010793209075928, "learning_rate": 9.928914949785124e-05, "loss": 1.8085, "step": 1248 }, { "epoch": 0.06961707820076919, "grad_norm": 0.5141963362693787, "learning_rate": 9.928765259532371e-05, "loss": 1.4068, "step": 1249 }, { "epoch": 0.06967281645393233, "grad_norm": 0.5250492691993713, "learning_rate": 9.928615412967919e-05, "loss": 1.9137, "step": 1250 }, { "epoch": 0.06972855470709548, "grad_norm": 0.5868452191352844, "learning_rate": 9.928465410096521e-05, "loss": 1.6562, "step": 1251 }, { "epoch": 0.06978429296025862, "grad_norm": 0.553932785987854, "learning_rate": 9.928315250922937e-05, "loss": 1.7661, "step": 1252 }, { "epoch": 0.06984003121342178, "grad_norm": 0.49618422985076904, "learning_rate": 9.928164935451927e-05, "loss": 1.9336, "step": 1253 }, { "epoch": 0.06989576946658492, "grad_norm": 0.5094950199127197, "learning_rate": 9.928014463688257e-05, "loss": 1.8955, "step": 1254 }, { "epoch": 0.06995150771974806, "grad_norm": 0.5146217942237854, "learning_rate": 9.927863835636703e-05, "loss": 1.7892, "step": 1255 }, { "epoch": 0.0700072459729112, "grad_norm": 0.5579236745834351, "learning_rate": 9.927713051302037e-05, "loss": 1.8628, "step": 1256 }, { "epoch": 0.07006298422607435, "grad_norm": 0.5719481706619263, "learning_rate": 9.927562110689046e-05, "loss": 1.9999, "step": 1257 }, { "epoch": 0.0701187224792375, "grad_norm": 0.5164546966552734, "learning_rate": 9.927411013802512e-05, "loss": 1.6341, "step": 1258 }, { "epoch": 0.07017446073240065, "grad_norm": 0.5111738443374634, "learning_rate": 9.927259760647232e-05, "loss": 1.8801, "step": 1259 }, { "epoch": 0.07023019898556379, "grad_norm": 0.47879326343536377, "learning_rate": 9.927108351227998e-05, "loss": 1.6122, "step": 1260 }, { "epoch": 0.07028593723872693, "grad_norm": 0.6105756759643555, "learning_rate": 9.926956785549616e-05, "loss": 2.0343, "step": 1261 }, { "epoch": 0.07034167549189009, "grad_norm": 0.5080457329750061, "learning_rate": 9.92680506361689e-05, "loss": 1.9449, "step": 1262 }, { "epoch": 0.07039741374505323, "grad_norm": 0.4686660170555115, "learning_rate": 9.926653185434634e-05, "loss": 1.7354, "step": 1263 }, { "epoch": 0.07045315199821638, "grad_norm": 0.5146884322166443, "learning_rate": 9.926501151007662e-05, "loss": 1.8347, "step": 1264 }, { "epoch": 0.07050889025137952, "grad_norm": 0.5533162355422974, "learning_rate": 9.926348960340796e-05, "loss": 1.887, "step": 1265 }, { "epoch": 0.07056462850454266, "grad_norm": 0.5264948606491089, "learning_rate": 9.926196613438865e-05, "loss": 1.8267, "step": 1266 }, { "epoch": 0.07062036675770582, "grad_norm": 0.5064124464988708, "learning_rate": 9.926044110306698e-05, "loss": 1.4021, "step": 1267 }, { "epoch": 0.07067610501086896, "grad_norm": 0.5374730229377747, "learning_rate": 9.925891450949135e-05, "loss": 2.1346, "step": 1268 }, { "epoch": 0.0707318432640321, "grad_norm": 0.5050212144851685, "learning_rate": 9.925738635371011e-05, "loss": 1.7458, "step": 1269 }, { "epoch": 0.07078758151719525, "grad_norm": 0.5477495789527893, "learning_rate": 9.925585663577181e-05, "loss": 1.9184, "step": 1270 }, { "epoch": 0.0708433197703584, "grad_norm": 0.4926922917366028, "learning_rate": 9.92543253557249e-05, "loss": 1.7406, "step": 1271 }, { "epoch": 0.07089905802352155, "grad_norm": 0.5027531981468201, "learning_rate": 9.925279251361795e-05, "loss": 1.6771, "step": 1272 }, { "epoch": 0.07095479627668469, "grad_norm": 0.44907525181770325, "learning_rate": 9.92512581094996e-05, "loss": 1.534, "step": 1273 }, { "epoch": 0.07101053452984783, "grad_norm": 0.4935868978500366, "learning_rate": 9.92497221434185e-05, "loss": 1.6932, "step": 1274 }, { "epoch": 0.07106627278301098, "grad_norm": 0.5403043031692505, "learning_rate": 9.924818461542335e-05, "loss": 1.7863, "step": 1275 }, { "epoch": 0.07112201103617413, "grad_norm": 0.49991410970687866, "learning_rate": 9.924664552556293e-05, "loss": 1.5134, "step": 1276 }, { "epoch": 0.07117774928933727, "grad_norm": 0.5363178849220276, "learning_rate": 9.924510487388603e-05, "loss": 1.7264, "step": 1277 }, { "epoch": 0.07123348754250042, "grad_norm": 0.6076151728630066, "learning_rate": 9.924356266044153e-05, "loss": 2.0642, "step": 1278 }, { "epoch": 0.07128922579566356, "grad_norm": 0.5013806223869324, "learning_rate": 9.924201888527833e-05, "loss": 1.5962, "step": 1279 }, { "epoch": 0.0713449640488267, "grad_norm": 0.4695322513580322, "learning_rate": 9.924047354844539e-05, "loss": 1.657, "step": 1280 }, { "epoch": 0.07140070230198986, "grad_norm": 0.5039030909538269, "learning_rate": 9.923892664999173e-05, "loss": 1.8447, "step": 1281 }, { "epoch": 0.071456440555153, "grad_norm": 0.5190325379371643, "learning_rate": 9.923737818996639e-05, "loss": 1.7732, "step": 1282 }, { "epoch": 0.07151217880831615, "grad_norm": 0.4986951947212219, "learning_rate": 9.92358281684185e-05, "loss": 1.5262, "step": 1283 }, { "epoch": 0.07156791706147929, "grad_norm": 0.5534316897392273, "learning_rate": 9.92342765853972e-05, "loss": 2.0328, "step": 1284 }, { "epoch": 0.07162365531464245, "grad_norm": 0.49968552589416504, "learning_rate": 9.923272344095169e-05, "loss": 1.7766, "step": 1285 }, { "epoch": 0.07167939356780559, "grad_norm": 0.5316057205200195, "learning_rate": 9.923116873513125e-05, "loss": 1.9544, "step": 1286 }, { "epoch": 0.07173513182096873, "grad_norm": 0.49467048048973083, "learning_rate": 9.922961246798516e-05, "loss": 1.6245, "step": 1287 }, { "epoch": 0.07179087007413187, "grad_norm": 0.5283698439598083, "learning_rate": 9.922805463956282e-05, "loss": 1.8113, "step": 1288 }, { "epoch": 0.07184660832729502, "grad_norm": 0.5117636322975159, "learning_rate": 9.922649524991359e-05, "loss": 1.5682, "step": 1289 }, { "epoch": 0.07190234658045817, "grad_norm": 0.524705708026886, "learning_rate": 9.922493429908695e-05, "loss": 1.7724, "step": 1290 }, { "epoch": 0.07195808483362132, "grad_norm": 0.5265300273895264, "learning_rate": 9.922337178713238e-05, "loss": 1.8775, "step": 1291 }, { "epoch": 0.07201382308678446, "grad_norm": 0.4668891429901123, "learning_rate": 9.922180771409945e-05, "loss": 1.6585, "step": 1292 }, { "epoch": 0.0720695613399476, "grad_norm": 0.5392476916313171, "learning_rate": 9.922024208003777e-05, "loss": 1.7811, "step": 1293 }, { "epoch": 0.07212529959311076, "grad_norm": 0.45741191506385803, "learning_rate": 9.921867488499699e-05, "loss": 1.5123, "step": 1294 }, { "epoch": 0.0721810378462739, "grad_norm": 0.5779647827148438, "learning_rate": 9.92171061290268e-05, "loss": 1.798, "step": 1295 }, { "epoch": 0.07223677609943704, "grad_norm": 0.5434536337852478, "learning_rate": 9.921553581217697e-05, "loss": 1.8681, "step": 1296 }, { "epoch": 0.07229251435260019, "grad_norm": 0.47686439752578735, "learning_rate": 9.921396393449727e-05, "loss": 1.5803, "step": 1297 }, { "epoch": 0.07234825260576333, "grad_norm": 0.5182580947875977, "learning_rate": 9.921239049603759e-05, "loss": 1.8512, "step": 1298 }, { "epoch": 0.07240399085892649, "grad_norm": 0.5331408977508545, "learning_rate": 9.921081549684779e-05, "loss": 1.9001, "step": 1299 }, { "epoch": 0.07245972911208963, "grad_norm": 0.49691641330718994, "learning_rate": 9.920923893697786e-05, "loss": 1.718, "step": 1300 }, { "epoch": 0.07251546736525277, "grad_norm": 0.526009202003479, "learning_rate": 9.920766081647779e-05, "loss": 1.6531, "step": 1301 }, { "epoch": 0.07257120561841592, "grad_norm": 0.5836690664291382, "learning_rate": 9.92060811353976e-05, "loss": 1.6522, "step": 1302 }, { "epoch": 0.07262694387157906, "grad_norm": 0.5216406583786011, "learning_rate": 9.920449989378742e-05, "loss": 1.5131, "step": 1303 }, { "epoch": 0.07268268212474222, "grad_norm": 0.4874148964881897, "learning_rate": 9.920291709169737e-05, "loss": 1.5922, "step": 1304 }, { "epoch": 0.07273842037790536, "grad_norm": 0.4904099404811859, "learning_rate": 9.920133272917767e-05, "loss": 1.83, "step": 1305 }, { "epoch": 0.0727941586310685, "grad_norm": 0.5295507907867432, "learning_rate": 9.919974680627856e-05, "loss": 1.8742, "step": 1306 }, { "epoch": 0.07284989688423164, "grad_norm": 0.5288472175598145, "learning_rate": 9.919815932305034e-05, "loss": 1.8706, "step": 1307 }, { "epoch": 0.0729056351373948, "grad_norm": 0.48234906792640686, "learning_rate": 9.919657027954335e-05, "loss": 1.6827, "step": 1308 }, { "epoch": 0.07296137339055794, "grad_norm": 0.5203633904457092, "learning_rate": 9.919497967580798e-05, "loss": 1.7064, "step": 1309 }, { "epoch": 0.07301711164372109, "grad_norm": 0.51950603723526, "learning_rate": 9.919338751189468e-05, "loss": 1.7643, "step": 1310 }, { "epoch": 0.07307284989688423, "grad_norm": 0.5219436883926392, "learning_rate": 9.919179378785396e-05, "loss": 1.928, "step": 1311 }, { "epoch": 0.07312858815004737, "grad_norm": 0.5543720722198486, "learning_rate": 9.919019850373635e-05, "loss": 2.0754, "step": 1312 }, { "epoch": 0.07318432640321053, "grad_norm": 0.4778376817703247, "learning_rate": 9.918860165959243e-05, "loss": 1.652, "step": 1313 }, { "epoch": 0.07324006465637367, "grad_norm": 0.5367230772972107, "learning_rate": 9.918700325547286e-05, "loss": 1.9413, "step": 1314 }, { "epoch": 0.07329580290953681, "grad_norm": 0.5712525248527527, "learning_rate": 9.918540329142831e-05, "loss": 1.7279, "step": 1315 }, { "epoch": 0.07335154116269996, "grad_norm": 0.5032913088798523, "learning_rate": 9.918380176750955e-05, "loss": 1.7546, "step": 1316 }, { "epoch": 0.07340727941586311, "grad_norm": 0.4760904908180237, "learning_rate": 9.918219868376737e-05, "loss": 1.657, "step": 1317 }, { "epoch": 0.07346301766902626, "grad_norm": 0.5059273838996887, "learning_rate": 9.91805940402526e-05, "loss": 1.8728, "step": 1318 }, { "epoch": 0.0735187559221894, "grad_norm": 0.5608049631118774, "learning_rate": 9.917898783701612e-05, "loss": 2.008, "step": 1319 }, { "epoch": 0.07357449417535254, "grad_norm": 0.5329555869102478, "learning_rate": 9.917738007410888e-05, "loss": 1.6254, "step": 1320 }, { "epoch": 0.07363023242851569, "grad_norm": 0.5802140831947327, "learning_rate": 9.917577075158186e-05, "loss": 2.0478, "step": 1321 }, { "epoch": 0.07368597068167884, "grad_norm": 0.5300236940383911, "learning_rate": 9.917415986948612e-05, "loss": 1.8852, "step": 1322 }, { "epoch": 0.07374170893484198, "grad_norm": 0.4858631491661072, "learning_rate": 9.917254742787273e-05, "loss": 1.5704, "step": 1323 }, { "epoch": 0.07379744718800513, "grad_norm": 0.5059242248535156, "learning_rate": 9.917093342679284e-05, "loss": 1.6683, "step": 1324 }, { "epoch": 0.07385318544116827, "grad_norm": 0.4971073567867279, "learning_rate": 9.916931786629761e-05, "loss": 1.6127, "step": 1325 }, { "epoch": 0.07390892369433141, "grad_norm": 0.5727537274360657, "learning_rate": 9.916770074643831e-05, "loss": 1.8274, "step": 1326 }, { "epoch": 0.07396466194749457, "grad_norm": 0.5242769718170166, "learning_rate": 9.91660820672662e-05, "loss": 1.7747, "step": 1327 }, { "epoch": 0.07402040020065771, "grad_norm": 0.5268994569778442, "learning_rate": 9.916446182883264e-05, "loss": 1.8716, "step": 1328 }, { "epoch": 0.07407613845382086, "grad_norm": 0.5069685578346252, "learning_rate": 9.916284003118897e-05, "loss": 1.572, "step": 1329 }, { "epoch": 0.074131876706984, "grad_norm": 0.5535740852355957, "learning_rate": 9.916121667438667e-05, "loss": 1.852, "step": 1330 }, { "epoch": 0.07418761496014716, "grad_norm": 0.5100526213645935, "learning_rate": 9.915959175847723e-05, "loss": 1.8053, "step": 1331 }, { "epoch": 0.0742433532133103, "grad_norm": 0.5486835837364197, "learning_rate": 9.915796528351212e-05, "loss": 1.9061, "step": 1332 }, { "epoch": 0.07429909146647344, "grad_norm": 0.546424150466919, "learning_rate": 9.915633724954299e-05, "loss": 1.8031, "step": 1333 }, { "epoch": 0.07435482971963658, "grad_norm": 0.5596832036972046, "learning_rate": 9.915470765662143e-05, "loss": 1.7918, "step": 1334 }, { "epoch": 0.07441056797279973, "grad_norm": 0.5737068057060242, "learning_rate": 9.915307650479914e-05, "loss": 1.7687, "step": 1335 }, { "epoch": 0.07446630622596288, "grad_norm": 0.5227526426315308, "learning_rate": 9.915144379412784e-05, "loss": 1.6509, "step": 1336 }, { "epoch": 0.07452204447912603, "grad_norm": 0.5172739028930664, "learning_rate": 9.914980952465932e-05, "loss": 1.7922, "step": 1337 }, { "epoch": 0.07457778273228917, "grad_norm": 0.5068166851997375, "learning_rate": 9.91481736964454e-05, "loss": 1.6475, "step": 1338 }, { "epoch": 0.07463352098545231, "grad_norm": 0.5804305076599121, "learning_rate": 9.914653630953797e-05, "loss": 1.9451, "step": 1339 }, { "epoch": 0.07468925923861547, "grad_norm": 0.5118273496627808, "learning_rate": 9.914489736398895e-05, "loss": 1.6014, "step": 1340 }, { "epoch": 0.07474499749177861, "grad_norm": 0.47122183442115784, "learning_rate": 9.914325685985033e-05, "loss": 1.7206, "step": 1341 }, { "epoch": 0.07480073574494175, "grad_norm": 0.5404577851295471, "learning_rate": 9.914161479717413e-05, "loss": 1.984, "step": 1342 }, { "epoch": 0.0748564739981049, "grad_norm": 0.5037184953689575, "learning_rate": 9.91399711760124e-05, "loss": 1.8535, "step": 1343 }, { "epoch": 0.07491221225126804, "grad_norm": 0.5099769830703735, "learning_rate": 9.91383259964173e-05, "loss": 1.7632, "step": 1344 }, { "epoch": 0.0749679505044312, "grad_norm": 0.5458886623382568, "learning_rate": 9.9136679258441e-05, "loss": 2.0607, "step": 1345 }, { "epoch": 0.07502368875759434, "grad_norm": 0.4648517668247223, "learning_rate": 9.913503096213572e-05, "loss": 1.914, "step": 1346 }, { "epoch": 0.07507942701075748, "grad_norm": 0.5120497941970825, "learning_rate": 9.913338110755375e-05, "loss": 1.8349, "step": 1347 }, { "epoch": 0.07513516526392063, "grad_norm": 0.4551779329776764, "learning_rate": 9.913172969474737e-05, "loss": 1.5673, "step": 1348 }, { "epoch": 0.07519090351708377, "grad_norm": 0.5728102326393127, "learning_rate": 9.913007672376899e-05, "loss": 2.1014, "step": 1349 }, { "epoch": 0.07524664177024692, "grad_norm": 0.47414430975914, "learning_rate": 9.912842219467105e-05, "loss": 1.6999, "step": 1350 }, { "epoch": 0.07530238002341007, "grad_norm": 0.5111278891563416, "learning_rate": 9.912676610750598e-05, "loss": 1.9367, "step": 1351 }, { "epoch": 0.07535811827657321, "grad_norm": 0.5118902325630188, "learning_rate": 9.91251084623263e-05, "loss": 1.8136, "step": 1352 }, { "epoch": 0.07541385652973635, "grad_norm": 0.5514450669288635, "learning_rate": 9.912344925918462e-05, "loss": 1.7309, "step": 1353 }, { "epoch": 0.07546959478289951, "grad_norm": 0.4836481511592865, "learning_rate": 9.912178849813353e-05, "loss": 1.2918, "step": 1354 }, { "epoch": 0.07552533303606265, "grad_norm": 0.5168613791465759, "learning_rate": 9.91201261792257e-05, "loss": 1.8673, "step": 1355 }, { "epoch": 0.0755810712892258, "grad_norm": 0.48082637786865234, "learning_rate": 9.911846230251388e-05, "loss": 1.6275, "step": 1356 }, { "epoch": 0.07563680954238894, "grad_norm": 0.504571259021759, "learning_rate": 9.91167968680508e-05, "loss": 1.7718, "step": 1357 }, { "epoch": 0.07569254779555208, "grad_norm": 0.499100923538208, "learning_rate": 9.911512987588932e-05, "loss": 1.7842, "step": 1358 }, { "epoch": 0.07574828604871524, "grad_norm": 0.4926021993160248, "learning_rate": 9.911346132608225e-05, "loss": 1.5556, "step": 1359 }, { "epoch": 0.07580402430187838, "grad_norm": 0.5981921553611755, "learning_rate": 9.911179121868255e-05, "loss": 1.853, "step": 1360 }, { "epoch": 0.07585976255504152, "grad_norm": 0.4938274621963501, "learning_rate": 9.911011955374316e-05, "loss": 1.646, "step": 1361 }, { "epoch": 0.07591550080820467, "grad_norm": 0.4952639937400818, "learning_rate": 9.910844633131713e-05, "loss": 1.6188, "step": 1362 }, { "epoch": 0.07597123906136782, "grad_norm": 0.5024005770683289, "learning_rate": 9.91067715514575e-05, "loss": 1.9164, "step": 1363 }, { "epoch": 0.07602697731453097, "grad_norm": 0.5488448143005371, "learning_rate": 9.910509521421738e-05, "loss": 1.9139, "step": 1364 }, { "epoch": 0.07608271556769411, "grad_norm": 0.5247362852096558, "learning_rate": 9.910341731964996e-05, "loss": 1.8488, "step": 1365 }, { "epoch": 0.07613845382085725, "grad_norm": 0.5229883193969727, "learning_rate": 9.910173786780842e-05, "loss": 1.8503, "step": 1366 }, { "epoch": 0.0761941920740204, "grad_norm": 0.49642667174339294, "learning_rate": 9.910005685874603e-05, "loss": 1.7051, "step": 1367 }, { "epoch": 0.07624993032718355, "grad_norm": 0.48131421208381653, "learning_rate": 9.909837429251614e-05, "loss": 1.4925, "step": 1368 }, { "epoch": 0.0763056685803467, "grad_norm": 0.4743631184101105, "learning_rate": 9.909669016917204e-05, "loss": 1.5833, "step": 1369 }, { "epoch": 0.07636140683350984, "grad_norm": 0.5918928980827332, "learning_rate": 9.909500448876721e-05, "loss": 2.1295, "step": 1370 }, { "epoch": 0.07641714508667298, "grad_norm": 0.5590381622314453, "learning_rate": 9.909331725135509e-05, "loss": 1.862, "step": 1371 }, { "epoch": 0.07647288333983612, "grad_norm": 0.5015060305595398, "learning_rate": 9.909162845698916e-05, "loss": 1.7541, "step": 1372 }, { "epoch": 0.07652862159299928, "grad_norm": 0.5213440656661987, "learning_rate": 9.9089938105723e-05, "loss": 1.7944, "step": 1373 }, { "epoch": 0.07658435984616242, "grad_norm": 0.5424663424491882, "learning_rate": 9.908824619761023e-05, "loss": 1.8207, "step": 1374 }, { "epoch": 0.07664009809932557, "grad_norm": 0.548622727394104, "learning_rate": 9.908655273270449e-05, "loss": 1.8224, "step": 1375 }, { "epoch": 0.07669583635248871, "grad_norm": 0.5018399953842163, "learning_rate": 9.908485771105949e-05, "loss": 1.856, "step": 1376 }, { "epoch": 0.07675157460565186, "grad_norm": 0.5578395128250122, "learning_rate": 9.908316113272897e-05, "loss": 1.7791, "step": 1377 }, { "epoch": 0.07680731285881501, "grad_norm": 0.5207507610321045, "learning_rate": 9.908146299776678e-05, "loss": 1.7608, "step": 1378 }, { "epoch": 0.07686305111197815, "grad_norm": 0.5391795039176941, "learning_rate": 9.907976330622674e-05, "loss": 1.772, "step": 1379 }, { "epoch": 0.0769187893651413, "grad_norm": 0.47418221831321716, "learning_rate": 9.907806205816277e-05, "loss": 1.2319, "step": 1380 }, { "epoch": 0.07697452761830444, "grad_norm": 0.49630096554756165, "learning_rate": 9.90763592536288e-05, "loss": 1.676, "step": 1381 }, { "epoch": 0.0770302658714676, "grad_norm": 0.533801257610321, "learning_rate": 9.907465489267886e-05, "loss": 1.7612, "step": 1382 }, { "epoch": 0.07708600412463074, "grad_norm": 0.5061699748039246, "learning_rate": 9.907294897536699e-05, "loss": 1.8883, "step": 1383 }, { "epoch": 0.07714174237779388, "grad_norm": 0.5732898116111755, "learning_rate": 9.90712415017473e-05, "loss": 1.8195, "step": 1384 }, { "epoch": 0.07719748063095702, "grad_norm": 0.5062339901924133, "learning_rate": 9.906953247187392e-05, "loss": 1.765, "step": 1385 }, { "epoch": 0.07725321888412018, "grad_norm": 0.4672509729862213, "learning_rate": 9.906782188580107e-05, "loss": 1.5199, "step": 1386 }, { "epoch": 0.07730895713728332, "grad_norm": 0.5902494788169861, "learning_rate": 9.9066109743583e-05, "loss": 2.1369, "step": 1387 }, { "epoch": 0.07736469539044646, "grad_norm": 0.4874188005924225, "learning_rate": 9.9064396045274e-05, "loss": 1.6941, "step": 1388 }, { "epoch": 0.0774204336436096, "grad_norm": 0.5620763301849365, "learning_rate": 9.906268079092843e-05, "loss": 1.7395, "step": 1389 }, { "epoch": 0.07747617189677275, "grad_norm": 0.5454680919647217, "learning_rate": 9.906096398060067e-05, "loss": 1.7771, "step": 1390 }, { "epoch": 0.0775319101499359, "grad_norm": 0.5270059704780579, "learning_rate": 9.905924561434519e-05, "loss": 1.8375, "step": 1391 }, { "epoch": 0.07758764840309905, "grad_norm": 0.4714577794075012, "learning_rate": 9.905752569221647e-05, "loss": 1.4259, "step": 1392 }, { "epoch": 0.07764338665626219, "grad_norm": 0.4905398190021515, "learning_rate": 9.905580421426905e-05, "loss": 1.7302, "step": 1393 }, { "epoch": 0.07769912490942534, "grad_norm": 0.5166676640510559, "learning_rate": 9.905408118055755e-05, "loss": 1.665, "step": 1394 }, { "epoch": 0.07775486316258848, "grad_norm": 0.5545955896377563, "learning_rate": 9.905235659113658e-05, "loss": 1.7589, "step": 1395 }, { "epoch": 0.07781060141575163, "grad_norm": 0.5974867343902588, "learning_rate": 9.905063044606088e-05, "loss": 1.9677, "step": 1396 }, { "epoch": 0.07786633966891478, "grad_norm": 0.538375198841095, "learning_rate": 9.904890274538516e-05, "loss": 1.6438, "step": 1397 }, { "epoch": 0.07792207792207792, "grad_norm": 0.5226508378982544, "learning_rate": 9.904717348916421e-05, "loss": 1.8672, "step": 1398 }, { "epoch": 0.07797781617524106, "grad_norm": 0.5076341032981873, "learning_rate": 9.904544267745288e-05, "loss": 1.6942, "step": 1399 }, { "epoch": 0.07803355442840422, "grad_norm": 0.5587323307991028, "learning_rate": 9.904371031030608e-05, "loss": 2.0127, "step": 1400 }, { "epoch": 0.07808929268156736, "grad_norm": 0.5744814276695251, "learning_rate": 9.904197638777872e-05, "loss": 1.6781, "step": 1401 }, { "epoch": 0.0781450309347305, "grad_norm": 0.4966742992401123, "learning_rate": 9.904024090992581e-05, "loss": 1.7314, "step": 1402 }, { "epoch": 0.07820076918789365, "grad_norm": 0.5050981640815735, "learning_rate": 9.903850387680238e-05, "loss": 1.8782, "step": 1403 }, { "epoch": 0.07825650744105679, "grad_norm": 0.518583357334137, "learning_rate": 9.903676528846352e-05, "loss": 1.9028, "step": 1404 }, { "epoch": 0.07831224569421995, "grad_norm": 0.5047330856323242, "learning_rate": 9.903502514496436e-05, "loss": 1.6501, "step": 1405 }, { "epoch": 0.07836798394738309, "grad_norm": 0.5036478042602539, "learning_rate": 9.903328344636012e-05, "loss": 1.7873, "step": 1406 }, { "epoch": 0.07842372220054623, "grad_norm": 0.49196913838386536, "learning_rate": 9.903154019270599e-05, "loss": 1.6404, "step": 1407 }, { "epoch": 0.07847946045370938, "grad_norm": 0.5227888226509094, "learning_rate": 9.90297953840573e-05, "loss": 1.8049, "step": 1408 }, { "epoch": 0.07853519870687253, "grad_norm": 0.5419712662696838, "learning_rate": 9.902804902046935e-05, "loss": 1.8979, "step": 1409 }, { "epoch": 0.07859093696003568, "grad_norm": 0.5512637495994568, "learning_rate": 9.902630110199753e-05, "loss": 1.5322, "step": 1410 }, { "epoch": 0.07864667521319882, "grad_norm": 0.5147241353988647, "learning_rate": 9.90245516286973e-05, "loss": 1.8126, "step": 1411 }, { "epoch": 0.07870241346636196, "grad_norm": 0.5257126092910767, "learning_rate": 9.902280060062413e-05, "loss": 1.9197, "step": 1412 }, { "epoch": 0.0787581517195251, "grad_norm": 0.5739386677742004, "learning_rate": 9.902104801783352e-05, "loss": 2.0767, "step": 1413 }, { "epoch": 0.07881388997268826, "grad_norm": 0.47901228070259094, "learning_rate": 9.90192938803811e-05, "loss": 1.4594, "step": 1414 }, { "epoch": 0.0788696282258514, "grad_norm": 0.4943484663963318, "learning_rate": 9.901753818832248e-05, "loss": 1.6394, "step": 1415 }, { "epoch": 0.07892536647901455, "grad_norm": 0.5033669471740723, "learning_rate": 9.901578094171333e-05, "loss": 1.6963, "step": 1416 }, { "epoch": 0.07898110473217769, "grad_norm": 0.5039759874343872, "learning_rate": 9.90140221406094e-05, "loss": 1.5721, "step": 1417 }, { "epoch": 0.07903684298534083, "grad_norm": 0.49595627188682556, "learning_rate": 9.901226178506646e-05, "loss": 1.7414, "step": 1418 }, { "epoch": 0.07909258123850399, "grad_norm": 0.5233118534088135, "learning_rate": 9.901049987514033e-05, "loss": 1.7728, "step": 1419 }, { "epoch": 0.07914831949166713, "grad_norm": 0.5164638757705688, "learning_rate": 9.90087364108869e-05, "loss": 1.8569, "step": 1420 }, { "epoch": 0.07920405774483028, "grad_norm": 0.5309315323829651, "learning_rate": 9.900697139236209e-05, "loss": 1.7734, "step": 1421 }, { "epoch": 0.07925979599799342, "grad_norm": 0.4936157464981079, "learning_rate": 9.900520481962188e-05, "loss": 1.6859, "step": 1422 }, { "epoch": 0.07931553425115657, "grad_norm": 0.4760551452636719, "learning_rate": 9.90034366927223e-05, "loss": 1.7148, "step": 1423 }, { "epoch": 0.07937127250431972, "grad_norm": 0.5099088549613953, "learning_rate": 9.90016670117194e-05, "loss": 1.7605, "step": 1424 }, { "epoch": 0.07942701075748286, "grad_norm": 0.512695848941803, "learning_rate": 9.899989577666933e-05, "loss": 1.7824, "step": 1425 }, { "epoch": 0.079482749010646, "grad_norm": 0.5051438212394714, "learning_rate": 9.899812298762826e-05, "loss": 1.8003, "step": 1426 }, { "epoch": 0.07953848726380915, "grad_norm": 0.5289508700370789, "learning_rate": 9.899634864465241e-05, "loss": 1.7588, "step": 1427 }, { "epoch": 0.0795942255169723, "grad_norm": 0.4910021424293518, "learning_rate": 9.899457274779804e-05, "loss": 1.7284, "step": 1428 }, { "epoch": 0.07964996377013545, "grad_norm": 0.6068856716156006, "learning_rate": 9.899279529712148e-05, "loss": 1.9947, "step": 1429 }, { "epoch": 0.07970570202329859, "grad_norm": 0.5239669680595398, "learning_rate": 9.899101629267911e-05, "loss": 1.5956, "step": 1430 }, { "epoch": 0.07976144027646173, "grad_norm": 0.5577272176742554, "learning_rate": 9.898923573452734e-05, "loss": 2.0396, "step": 1431 }, { "epoch": 0.07981717852962489, "grad_norm": 0.4893241822719574, "learning_rate": 9.898745362272264e-05, "loss": 1.5054, "step": 1432 }, { "epoch": 0.07987291678278803, "grad_norm": 0.48603859543800354, "learning_rate": 9.898566995732153e-05, "loss": 1.6304, "step": 1433 }, { "epoch": 0.07992865503595117, "grad_norm": 0.5560683012008667, "learning_rate": 9.898388473838056e-05, "loss": 1.8177, "step": 1434 }, { "epoch": 0.07998439328911432, "grad_norm": 0.5030083060264587, "learning_rate": 9.898209796595636e-05, "loss": 1.7325, "step": 1435 }, { "epoch": 0.08004013154227746, "grad_norm": 0.48422524333000183, "learning_rate": 9.898030964010562e-05, "loss": 1.5905, "step": 1436 }, { "epoch": 0.08009586979544062, "grad_norm": 0.5284083485603333, "learning_rate": 9.897851976088501e-05, "loss": 1.672, "step": 1437 }, { "epoch": 0.08015160804860376, "grad_norm": 0.5937215685844421, "learning_rate": 9.897672832835135e-05, "loss": 1.9549, "step": 1438 }, { "epoch": 0.0802073463017669, "grad_norm": 0.4896755516529083, "learning_rate": 9.89749353425614e-05, "loss": 1.7438, "step": 1439 }, { "epoch": 0.08026308455493004, "grad_norm": 0.5281119346618652, "learning_rate": 9.897314080357202e-05, "loss": 1.6437, "step": 1440 }, { "epoch": 0.08031882280809319, "grad_norm": 0.5150919556617737, "learning_rate": 9.897134471144019e-05, "loss": 1.742, "step": 1441 }, { "epoch": 0.08037456106125634, "grad_norm": 0.5028387308120728, "learning_rate": 9.896954706622281e-05, "loss": 1.5031, "step": 1442 }, { "epoch": 0.08043029931441949, "grad_norm": 0.5158771276473999, "learning_rate": 9.896774786797691e-05, "loss": 1.533, "step": 1443 }, { "epoch": 0.08048603756758263, "grad_norm": 0.5377411842346191, "learning_rate": 9.896594711675954e-05, "loss": 2.0242, "step": 1444 }, { "epoch": 0.08054177582074577, "grad_norm": 0.4912663698196411, "learning_rate": 9.896414481262784e-05, "loss": 1.815, "step": 1445 }, { "epoch": 0.08059751407390893, "grad_norm": 0.47936177253723145, "learning_rate": 9.896234095563893e-05, "loss": 1.5458, "step": 1446 }, { "epoch": 0.08065325232707207, "grad_norm": 0.5695403218269348, "learning_rate": 9.896053554585006e-05, "loss": 2.1062, "step": 1447 }, { "epoch": 0.08070899058023522, "grad_norm": 0.5067823529243469, "learning_rate": 9.895872858331843e-05, "loss": 1.7228, "step": 1448 }, { "epoch": 0.08076472883339836, "grad_norm": 0.5249797105789185, "learning_rate": 9.89569200681014e-05, "loss": 1.8915, "step": 1449 }, { "epoch": 0.0808204670865615, "grad_norm": 0.5042678713798523, "learning_rate": 9.895511000025629e-05, "loss": 1.857, "step": 1450 }, { "epoch": 0.08087620533972466, "grad_norm": 0.5119437575340271, "learning_rate": 9.895329837984053e-05, "loss": 1.7033, "step": 1451 }, { "epoch": 0.0809319435928878, "grad_norm": 0.5357143878936768, "learning_rate": 9.895148520691155e-05, "loss": 1.9076, "step": 1452 }, { "epoch": 0.08098768184605094, "grad_norm": 0.47728776931762695, "learning_rate": 9.894967048152688e-05, "loss": 1.4164, "step": 1453 }, { "epoch": 0.08104342009921409, "grad_norm": 0.5269622206687927, "learning_rate": 9.894785420374405e-05, "loss": 1.9833, "step": 1454 }, { "epoch": 0.08109915835237724, "grad_norm": 0.5312412977218628, "learning_rate": 9.894603637362068e-05, "loss": 1.8342, "step": 1455 }, { "epoch": 0.08115489660554039, "grad_norm": 0.5786725282669067, "learning_rate": 9.894421699121439e-05, "loss": 2.1415, "step": 1456 }, { "epoch": 0.08121063485870353, "grad_norm": 0.4990336000919342, "learning_rate": 9.894239605658292e-05, "loss": 1.8387, "step": 1457 }, { "epoch": 0.08126637311186667, "grad_norm": 0.5438005924224854, "learning_rate": 9.8940573569784e-05, "loss": 1.9307, "step": 1458 }, { "epoch": 0.08132211136502981, "grad_norm": 0.5444794297218323, "learning_rate": 9.893874953087543e-05, "loss": 1.7991, "step": 1459 }, { "epoch": 0.08137784961819297, "grad_norm": 0.5221540331840515, "learning_rate": 9.893692393991504e-05, "loss": 1.7898, "step": 1460 }, { "epoch": 0.08143358787135611, "grad_norm": 0.509023129940033, "learning_rate": 9.893509679696077e-05, "loss": 1.8955, "step": 1461 }, { "epoch": 0.08148932612451926, "grad_norm": 0.5018633008003235, "learning_rate": 9.893326810207053e-05, "loss": 1.6774, "step": 1462 }, { "epoch": 0.0815450643776824, "grad_norm": 0.5234403610229492, "learning_rate": 9.893143785530233e-05, "loss": 1.5989, "step": 1463 }, { "epoch": 0.08160080263084554, "grad_norm": 0.5122543573379517, "learning_rate": 9.892960605671421e-05, "loss": 1.6129, "step": 1464 }, { "epoch": 0.0816565408840087, "grad_norm": 0.5005357265472412, "learning_rate": 9.892777270636426e-05, "loss": 1.7568, "step": 1465 }, { "epoch": 0.08171227913717184, "grad_norm": 0.4521070420742035, "learning_rate": 9.892593780431063e-05, "loss": 1.5785, "step": 1466 }, { "epoch": 0.08176801739033498, "grad_norm": 0.5116862058639526, "learning_rate": 9.892410135061151e-05, "loss": 1.6021, "step": 1467 }, { "epoch": 0.08182375564349813, "grad_norm": 0.5345929861068726, "learning_rate": 9.892226334532515e-05, "loss": 1.7185, "step": 1468 }, { "epoch": 0.08187949389666128, "grad_norm": 0.5190909504890442, "learning_rate": 9.892042378850983e-05, "loss": 1.7729, "step": 1469 }, { "epoch": 0.08193523214982443, "grad_norm": 0.5051796436309814, "learning_rate": 9.89185826802239e-05, "loss": 1.7497, "step": 1470 }, { "epoch": 0.08199097040298757, "grad_norm": 0.49057456851005554, "learning_rate": 9.891674002052572e-05, "loss": 1.7032, "step": 1471 }, { "epoch": 0.08204670865615071, "grad_norm": 0.48970887064933777, "learning_rate": 9.891489580947377e-05, "loss": 1.697, "step": 1472 }, { "epoch": 0.08210244690931386, "grad_norm": 0.466226726770401, "learning_rate": 9.891305004712652e-05, "loss": 1.676, "step": 1473 }, { "epoch": 0.08215818516247701, "grad_norm": 0.5120090246200562, "learning_rate": 9.891120273354248e-05, "loss": 1.7862, "step": 1474 }, { "epoch": 0.08221392341564016, "grad_norm": 0.5071076154708862, "learning_rate": 9.890935386878029e-05, "loss": 1.7835, "step": 1475 }, { "epoch": 0.0822696616688033, "grad_norm": 0.5432698726654053, "learning_rate": 9.890750345289855e-05, "loss": 1.9147, "step": 1476 }, { "epoch": 0.08232539992196644, "grad_norm": 0.5131239295005798, "learning_rate": 9.890565148595594e-05, "loss": 1.9944, "step": 1477 }, { "epoch": 0.0823811381751296, "grad_norm": 0.49580785632133484, "learning_rate": 9.890379796801122e-05, "loss": 1.7003, "step": 1478 }, { "epoch": 0.08243687642829274, "grad_norm": 0.5251078605651855, "learning_rate": 9.890194289912315e-05, "loss": 1.5901, "step": 1479 }, { "epoch": 0.08249261468145588, "grad_norm": 0.4522892236709595, "learning_rate": 9.890008627935057e-05, "loss": 1.4628, "step": 1480 }, { "epoch": 0.08254835293461903, "grad_norm": 0.49866771697998047, "learning_rate": 9.889822810875236e-05, "loss": 1.797, "step": 1481 }, { "epoch": 0.08260409118778217, "grad_norm": 0.5042446851730347, "learning_rate": 9.889636838738745e-05, "loss": 1.7715, "step": 1482 }, { "epoch": 0.08265982944094533, "grad_norm": 0.5398827791213989, "learning_rate": 9.889450711531482e-05, "loss": 1.7935, "step": 1483 }, { "epoch": 0.08271556769410847, "grad_norm": 0.5085358023643494, "learning_rate": 9.889264429259351e-05, "loss": 1.7009, "step": 1484 }, { "epoch": 0.08277130594727161, "grad_norm": 0.5344458222389221, "learning_rate": 9.889077991928257e-05, "loss": 1.9159, "step": 1485 }, { "epoch": 0.08282704420043475, "grad_norm": 0.5375879406929016, "learning_rate": 9.888891399544116e-05, "loss": 1.8089, "step": 1486 }, { "epoch": 0.0828827824535979, "grad_norm": 0.5068013668060303, "learning_rate": 9.888704652112841e-05, "loss": 1.81, "step": 1487 }, { "epoch": 0.08293852070676105, "grad_norm": 0.5293126106262207, "learning_rate": 9.88851774964036e-05, "loss": 1.8359, "step": 1488 }, { "epoch": 0.0829942589599242, "grad_norm": 0.538372814655304, "learning_rate": 9.8883306921326e-05, "loss": 1.7542, "step": 1489 }, { "epoch": 0.08304999721308734, "grad_norm": 0.5009732246398926, "learning_rate": 9.888143479595487e-05, "loss": 1.761, "step": 1490 }, { "epoch": 0.08310573546625048, "grad_norm": 0.5073357820510864, "learning_rate": 9.887956112034965e-05, "loss": 1.961, "step": 1491 }, { "epoch": 0.08316147371941364, "grad_norm": 0.5246378779411316, "learning_rate": 9.887768589456973e-05, "loss": 1.6075, "step": 1492 }, { "epoch": 0.08321721197257678, "grad_norm": 0.5965234637260437, "learning_rate": 9.88758091186746e-05, "loss": 1.7721, "step": 1493 }, { "epoch": 0.08327295022573993, "grad_norm": 0.580460250377655, "learning_rate": 9.887393079272378e-05, "loss": 2.0317, "step": 1494 }, { "epoch": 0.08332868847890307, "grad_norm": 0.47487667202949524, "learning_rate": 9.88720509167768e-05, "loss": 1.614, "step": 1495 }, { "epoch": 0.08338442673206621, "grad_norm": 0.511886715888977, "learning_rate": 9.887016949089333e-05, "loss": 1.7988, "step": 1496 }, { "epoch": 0.08344016498522937, "grad_norm": 0.5386150479316711, "learning_rate": 9.886828651513302e-05, "loss": 1.6694, "step": 1497 }, { "epoch": 0.08349590323839251, "grad_norm": 0.5117900967597961, "learning_rate": 9.886640198955557e-05, "loss": 1.9023, "step": 1498 }, { "epoch": 0.08355164149155565, "grad_norm": 0.5726772546768188, "learning_rate": 9.886451591422076e-05, "loss": 1.8974, "step": 1499 }, { "epoch": 0.0836073797447188, "grad_norm": 0.5696210861206055, "learning_rate": 9.886262828918842e-05, "loss": 2.011, "step": 1500 }, { "epoch": 0.08366311799788195, "grad_norm": 0.5422051548957825, "learning_rate": 9.886073911451838e-05, "loss": 1.853, "step": 1501 }, { "epoch": 0.0837188562510451, "grad_norm": 0.5856989622116089, "learning_rate": 9.88588483902706e-05, "loss": 2.0279, "step": 1502 }, { "epoch": 0.08377459450420824, "grad_norm": 0.49369946122169495, "learning_rate": 9.8856956116505e-05, "loss": 1.9006, "step": 1503 }, { "epoch": 0.08383033275737138, "grad_norm": 0.5601094961166382, "learning_rate": 9.88550622932816e-05, "loss": 1.8549, "step": 1504 }, { "epoch": 0.08388607101053452, "grad_norm": 0.5482882857322693, "learning_rate": 9.885316692066048e-05, "loss": 1.6991, "step": 1505 }, { "epoch": 0.08394180926369768, "grad_norm": 0.5111584663391113, "learning_rate": 9.885126999870173e-05, "loss": 1.7942, "step": 1506 }, { "epoch": 0.08399754751686082, "grad_norm": 0.5061234831809998, "learning_rate": 9.884937152746553e-05, "loss": 1.7333, "step": 1507 }, { "epoch": 0.08405328577002397, "grad_norm": 0.5409541726112366, "learning_rate": 9.884747150701207e-05, "loss": 1.8288, "step": 1508 }, { "epoch": 0.08410902402318711, "grad_norm": 0.5025638341903687, "learning_rate": 9.884556993740161e-05, "loss": 1.7986, "step": 1509 }, { "epoch": 0.08416476227635025, "grad_norm": 0.544328510761261, "learning_rate": 9.884366681869447e-05, "loss": 1.9335, "step": 1510 }, { "epoch": 0.08422050052951341, "grad_norm": 0.5425384640693665, "learning_rate": 9.8841762150951e-05, "loss": 1.952, "step": 1511 }, { "epoch": 0.08427623878267655, "grad_norm": 0.546819269657135, "learning_rate": 9.883985593423158e-05, "loss": 1.6983, "step": 1512 }, { "epoch": 0.0843319770358397, "grad_norm": 0.5102137327194214, "learning_rate": 9.88379481685967e-05, "loss": 1.9128, "step": 1513 }, { "epoch": 0.08438771528900284, "grad_norm": 0.5642107725143433, "learning_rate": 9.883603885410686e-05, "loss": 1.8798, "step": 1514 }, { "epoch": 0.084443453542166, "grad_norm": 0.5285095572471619, "learning_rate": 9.88341279908226e-05, "loss": 1.987, "step": 1515 }, { "epoch": 0.08449919179532914, "grad_norm": 0.5712692737579346, "learning_rate": 9.88322155788045e-05, "loss": 1.9272, "step": 1516 }, { "epoch": 0.08455493004849228, "grad_norm": 0.5068216919898987, "learning_rate": 9.883030161811324e-05, "loss": 1.747, "step": 1517 }, { "epoch": 0.08461066830165542, "grad_norm": 0.5292205810546875, "learning_rate": 9.882838610880954e-05, "loss": 1.7361, "step": 1518 }, { "epoch": 0.08466640655481857, "grad_norm": 0.5131486654281616, "learning_rate": 9.88264690509541e-05, "loss": 1.7197, "step": 1519 }, { "epoch": 0.08472214480798172, "grad_norm": 0.5345507860183716, "learning_rate": 9.882455044460773e-05, "loss": 1.6553, "step": 1520 }, { "epoch": 0.08477788306114487, "grad_norm": 0.5729446411132812, "learning_rate": 9.88226302898313e-05, "loss": 1.9354, "step": 1521 }, { "epoch": 0.08483362131430801, "grad_norm": 0.5425586700439453, "learning_rate": 9.882070858668568e-05, "loss": 1.7173, "step": 1522 }, { "epoch": 0.08488935956747115, "grad_norm": 0.5828628540039062, "learning_rate": 9.881878533523185e-05, "loss": 1.5161, "step": 1523 }, { "epoch": 0.08494509782063431, "grad_norm": 0.4496408998966217, "learning_rate": 9.881686053553077e-05, "loss": 1.4486, "step": 1524 }, { "epoch": 0.08500083607379745, "grad_norm": 0.5365184545516968, "learning_rate": 9.88149341876435e-05, "loss": 1.7378, "step": 1525 }, { "epoch": 0.0850565743269606, "grad_norm": 0.5183097720146179, "learning_rate": 9.881300629163113e-05, "loss": 1.7466, "step": 1526 }, { "epoch": 0.08511231258012374, "grad_norm": 0.5500345826148987, "learning_rate": 9.88110768475548e-05, "loss": 2.05, "step": 1527 }, { "epoch": 0.08516805083328688, "grad_norm": 0.5311182141304016, "learning_rate": 9.88091458554757e-05, "loss": 1.9213, "step": 1528 }, { "epoch": 0.08522378908645004, "grad_norm": 0.5297403335571289, "learning_rate": 9.880721331545507e-05, "loss": 1.7725, "step": 1529 }, { "epoch": 0.08527952733961318, "grad_norm": 0.4777231514453888, "learning_rate": 9.880527922755418e-05, "loss": 1.7671, "step": 1530 }, { "epoch": 0.08533526559277632, "grad_norm": 0.5027580261230469, "learning_rate": 9.880334359183441e-05, "loss": 1.5094, "step": 1531 }, { "epoch": 0.08539100384593946, "grad_norm": 0.5496742725372314, "learning_rate": 9.880140640835711e-05, "loss": 1.8291, "step": 1532 }, { "epoch": 0.08544674209910261, "grad_norm": 0.5041139721870422, "learning_rate": 9.879946767718374e-05, "loss": 1.6669, "step": 1533 }, { "epoch": 0.08550248035226576, "grad_norm": 0.5976061820983887, "learning_rate": 9.879752739837578e-05, "loss": 2.1902, "step": 1534 }, { "epoch": 0.0855582186054289, "grad_norm": 0.5422946810722351, "learning_rate": 9.879558557199475e-05, "loss": 1.5727, "step": 1535 }, { "epoch": 0.08561395685859205, "grad_norm": 0.4999959170818329, "learning_rate": 9.879364219810226e-05, "loss": 1.6102, "step": 1536 }, { "epoch": 0.08566969511175519, "grad_norm": 0.5026562213897705, "learning_rate": 9.879169727675991e-05, "loss": 1.7124, "step": 1537 }, { "epoch": 0.08572543336491835, "grad_norm": 0.5175659656524658, "learning_rate": 9.87897508080294e-05, "loss": 1.7585, "step": 1538 }, { "epoch": 0.08578117161808149, "grad_norm": 0.5337525010108948, "learning_rate": 9.878780279197247e-05, "loss": 1.7857, "step": 1539 }, { "epoch": 0.08583690987124463, "grad_norm": 0.5325166583061218, "learning_rate": 9.878585322865087e-05, "loss": 1.865, "step": 1540 }, { "epoch": 0.08589264812440778, "grad_norm": 0.46590784192085266, "learning_rate": 9.878390211812646e-05, "loss": 1.627, "step": 1541 }, { "epoch": 0.08594838637757092, "grad_norm": 0.4856724441051483, "learning_rate": 9.87819494604611e-05, "loss": 1.7221, "step": 1542 }, { "epoch": 0.08600412463073408, "grad_norm": 0.5396975874900818, "learning_rate": 9.877999525571673e-05, "loss": 1.7696, "step": 1543 }, { "epoch": 0.08605986288389722, "grad_norm": 0.49516481161117554, "learning_rate": 9.87780395039553e-05, "loss": 1.6928, "step": 1544 }, { "epoch": 0.08611560113706036, "grad_norm": 0.5212313532829285, "learning_rate": 9.877608220523886e-05, "loss": 1.8461, "step": 1545 }, { "epoch": 0.0861713393902235, "grad_norm": 0.5174347162246704, "learning_rate": 9.877412335962948e-05, "loss": 1.6598, "step": 1546 }, { "epoch": 0.08622707764338666, "grad_norm": 0.5417358875274658, "learning_rate": 9.877216296718929e-05, "loss": 1.8449, "step": 1547 }, { "epoch": 0.0862828158965498, "grad_norm": 0.6204573512077332, "learning_rate": 9.877020102798044e-05, "loss": 2.0521, "step": 1548 }, { "epoch": 0.08633855414971295, "grad_norm": 0.548689067363739, "learning_rate": 9.876823754206517e-05, "loss": 1.8019, "step": 1549 }, { "epoch": 0.08639429240287609, "grad_norm": 0.5634471774101257, "learning_rate": 9.876627250950573e-05, "loss": 1.9138, "step": 1550 }, { "epoch": 0.08645003065603923, "grad_norm": 0.517440915107727, "learning_rate": 9.876430593036445e-05, "loss": 1.6576, "step": 1551 }, { "epoch": 0.08650576890920239, "grad_norm": 0.5255969762802124, "learning_rate": 9.876233780470373e-05, "loss": 1.9165, "step": 1552 }, { "epoch": 0.08656150716236553, "grad_norm": 0.5497751235961914, "learning_rate": 9.876036813258593e-05, "loss": 1.7924, "step": 1553 }, { "epoch": 0.08661724541552868, "grad_norm": 0.49066075682640076, "learning_rate": 9.875839691407355e-05, "loss": 1.7025, "step": 1554 }, { "epoch": 0.08667298366869182, "grad_norm": 0.5411027669906616, "learning_rate": 9.875642414922913e-05, "loss": 1.7742, "step": 1555 }, { "epoch": 0.08672872192185498, "grad_norm": 0.5388767123222351, "learning_rate": 9.875444983811517e-05, "loss": 1.7676, "step": 1556 }, { "epoch": 0.08678446017501812, "grad_norm": 0.540668249130249, "learning_rate": 9.875247398079434e-05, "loss": 1.7824, "step": 1557 }, { "epoch": 0.08684019842818126, "grad_norm": 0.4785401523113251, "learning_rate": 9.875049657732928e-05, "loss": 1.5643, "step": 1558 }, { "epoch": 0.0868959366813444, "grad_norm": 0.4758340120315552, "learning_rate": 9.87485176277827e-05, "loss": 1.7751, "step": 1559 }, { "epoch": 0.08695167493450755, "grad_norm": 0.5260589122772217, "learning_rate": 9.874653713221736e-05, "loss": 1.6758, "step": 1560 }, { "epoch": 0.0870074131876707, "grad_norm": 0.5716840624809265, "learning_rate": 9.874455509069608e-05, "loss": 1.9237, "step": 1561 }, { "epoch": 0.08706315144083385, "grad_norm": 0.5434233546257019, "learning_rate": 9.874257150328171e-05, "loss": 1.8882, "step": 1562 }, { "epoch": 0.08711888969399699, "grad_norm": 0.562435507774353, "learning_rate": 9.874058637003715e-05, "loss": 2.0451, "step": 1563 }, { "epoch": 0.08717462794716013, "grad_norm": 0.5642979741096497, "learning_rate": 9.87385996910254e-05, "loss": 1.924, "step": 1564 }, { "epoch": 0.08723036620032328, "grad_norm": 0.5052669048309326, "learning_rate": 9.87366114663094e-05, "loss": 1.58, "step": 1565 }, { "epoch": 0.08728610445348643, "grad_norm": 0.5220628380775452, "learning_rate": 9.873462169595225e-05, "loss": 1.7895, "step": 1566 }, { "epoch": 0.08734184270664958, "grad_norm": 0.517431378364563, "learning_rate": 9.873263038001706e-05, "loss": 1.6593, "step": 1567 }, { "epoch": 0.08739758095981272, "grad_norm": 0.5140258073806763, "learning_rate": 9.873063751856693e-05, "loss": 1.8271, "step": 1568 }, { "epoch": 0.08745331921297586, "grad_norm": 0.4922142028808594, "learning_rate": 9.872864311166513e-05, "loss": 1.6083, "step": 1569 }, { "epoch": 0.08750905746613902, "grad_norm": 0.5390502214431763, "learning_rate": 9.872664715937485e-05, "loss": 1.4434, "step": 1570 }, { "epoch": 0.08756479571930216, "grad_norm": 0.5033831596374512, "learning_rate": 9.872464966175943e-05, "loss": 1.7666, "step": 1571 }, { "epoch": 0.0876205339724653, "grad_norm": 0.5968888401985168, "learning_rate": 9.872265061888222e-05, "loss": 2.129, "step": 1572 }, { "epoch": 0.08767627222562845, "grad_norm": 0.4963712990283966, "learning_rate": 9.87206500308066e-05, "loss": 1.757, "step": 1573 }, { "epoch": 0.08773201047879159, "grad_norm": 0.561555802822113, "learning_rate": 9.871864789759602e-05, "loss": 1.8953, "step": 1574 }, { "epoch": 0.08778774873195475, "grad_norm": 0.5095016956329346, "learning_rate": 9.871664421931397e-05, "loss": 1.5125, "step": 1575 }, { "epoch": 0.08784348698511789, "grad_norm": 0.5717408061027527, "learning_rate": 9.8714638996024e-05, "loss": 1.9326, "step": 1576 }, { "epoch": 0.08789922523828103, "grad_norm": 0.5086256861686707, "learning_rate": 9.871263222778972e-05, "loss": 1.4956, "step": 1577 }, { "epoch": 0.08795496349144417, "grad_norm": 0.5559898614883423, "learning_rate": 9.871062391467476e-05, "loss": 2.0481, "step": 1578 }, { "epoch": 0.08801070174460733, "grad_norm": 0.511561930179596, "learning_rate": 9.870861405674281e-05, "loss": 1.6748, "step": 1579 }, { "epoch": 0.08806643999777047, "grad_norm": 0.46475693583488464, "learning_rate": 9.87066026540576e-05, "loss": 1.5146, "step": 1580 }, { "epoch": 0.08812217825093362, "grad_norm": 0.619973361492157, "learning_rate": 9.870458970668295e-05, "loss": 1.9752, "step": 1581 }, { "epoch": 0.08817791650409676, "grad_norm": 0.5257066488265991, "learning_rate": 9.870257521468267e-05, "loss": 1.8943, "step": 1582 }, { "epoch": 0.0882336547572599, "grad_norm": 0.48758870363235474, "learning_rate": 9.870055917812066e-05, "loss": 1.7243, "step": 1583 }, { "epoch": 0.08828939301042306, "grad_norm": 0.500957190990448, "learning_rate": 9.869854159706087e-05, "loss": 1.608, "step": 1584 }, { "epoch": 0.0883451312635862, "grad_norm": 0.5307281613349915, "learning_rate": 9.869652247156726e-05, "loss": 1.8326, "step": 1585 }, { "epoch": 0.08840086951674934, "grad_norm": 0.5321508049964905, "learning_rate": 9.869450180170388e-05, "loss": 1.5715, "step": 1586 }, { "epoch": 0.08845660776991249, "grad_norm": 0.512824296951294, "learning_rate": 9.869247958753483e-05, "loss": 1.9452, "step": 1587 }, { "epoch": 0.08851234602307563, "grad_norm": 0.5297205448150635, "learning_rate": 9.86904558291242e-05, "loss": 1.7894, "step": 1588 }, { "epoch": 0.08856808427623879, "grad_norm": 0.5388361215591431, "learning_rate": 9.86884305265362e-05, "loss": 1.8428, "step": 1589 }, { "epoch": 0.08862382252940193, "grad_norm": 0.5642775297164917, "learning_rate": 9.868640367983507e-05, "loss": 1.9602, "step": 1590 }, { "epoch": 0.08867956078256507, "grad_norm": 0.5613628029823303, "learning_rate": 9.868437528908507e-05, "loss": 1.8967, "step": 1591 }, { "epoch": 0.08873529903572822, "grad_norm": 0.4843713641166687, "learning_rate": 9.868234535435052e-05, "loss": 1.5939, "step": 1592 }, { "epoch": 0.08879103728889137, "grad_norm": 0.5549110770225525, "learning_rate": 9.868031387569583e-05, "loss": 1.7461, "step": 1593 }, { "epoch": 0.08884677554205452, "grad_norm": 0.5344760417938232, "learning_rate": 9.867828085318541e-05, "loss": 1.7843, "step": 1594 }, { "epoch": 0.08890251379521766, "grad_norm": 0.49532350897789, "learning_rate": 9.867624628688374e-05, "loss": 1.981, "step": 1595 }, { "epoch": 0.0889582520483808, "grad_norm": 0.48208191990852356, "learning_rate": 9.867421017685531e-05, "loss": 1.3437, "step": 1596 }, { "epoch": 0.08901399030154394, "grad_norm": 0.489444762468338, "learning_rate": 9.867217252316476e-05, "loss": 1.6426, "step": 1597 }, { "epoch": 0.0890697285547071, "grad_norm": 0.5148588418960571, "learning_rate": 9.867013332587667e-05, "loss": 1.5808, "step": 1598 }, { "epoch": 0.08912546680787024, "grad_norm": 0.5365609526634216, "learning_rate": 9.86680925850557e-05, "loss": 1.8197, "step": 1599 }, { "epoch": 0.08918120506103339, "grad_norm": 0.48567450046539307, "learning_rate": 9.86660503007666e-05, "loss": 1.6238, "step": 1600 }, { "epoch": 0.08923694331419653, "grad_norm": 0.515129029750824, "learning_rate": 9.866400647307413e-05, "loss": 1.8063, "step": 1601 }, { "epoch": 0.08929268156735969, "grad_norm": 0.5591225028038025, "learning_rate": 9.86619611020431e-05, "loss": 1.8849, "step": 1602 }, { "epoch": 0.08934841982052283, "grad_norm": 0.4950789213180542, "learning_rate": 9.865991418773837e-05, "loss": 1.5961, "step": 1603 }, { "epoch": 0.08940415807368597, "grad_norm": 0.5623775124549866, "learning_rate": 9.865786573022488e-05, "loss": 1.782, "step": 1604 }, { "epoch": 0.08945989632684911, "grad_norm": 0.5508179664611816, "learning_rate": 9.865581572956759e-05, "loss": 1.9102, "step": 1605 }, { "epoch": 0.08951563458001226, "grad_norm": 0.5296784043312073, "learning_rate": 9.86537641858315e-05, "loss": 1.8494, "step": 1606 }, { "epoch": 0.08957137283317541, "grad_norm": 0.5068146586418152, "learning_rate": 9.865171109908169e-05, "loss": 1.7515, "step": 1607 }, { "epoch": 0.08962711108633856, "grad_norm": 0.5015462636947632, "learning_rate": 9.864965646938326e-05, "loss": 1.6874, "step": 1608 }, { "epoch": 0.0896828493395017, "grad_norm": 0.5293746590614319, "learning_rate": 9.864760029680137e-05, "loss": 1.7417, "step": 1609 }, { "epoch": 0.08973858759266484, "grad_norm": 0.5211681127548218, "learning_rate": 9.864554258140124e-05, "loss": 1.7553, "step": 1610 }, { "epoch": 0.08979432584582799, "grad_norm": 0.7411361336708069, "learning_rate": 9.864348332324811e-05, "loss": 1.7663, "step": 1611 }, { "epoch": 0.08985006409899114, "grad_norm": 0.4988972842693329, "learning_rate": 9.864142252240731e-05, "loss": 1.6, "step": 1612 }, { "epoch": 0.08990580235215428, "grad_norm": 0.5340063571929932, "learning_rate": 9.863936017894418e-05, "loss": 1.8076, "step": 1613 }, { "epoch": 0.08996154060531743, "grad_norm": 0.5994722247123718, "learning_rate": 9.863729629292414e-05, "loss": 1.7864, "step": 1614 }, { "epoch": 0.09001727885848057, "grad_norm": 0.541131854057312, "learning_rate": 9.863523086441264e-05, "loss": 1.931, "step": 1615 }, { "epoch": 0.09007301711164373, "grad_norm": 0.5259929299354553, "learning_rate": 9.863316389347517e-05, "loss": 1.7562, "step": 1616 }, { "epoch": 0.09012875536480687, "grad_norm": 0.5242890119552612, "learning_rate": 9.863109538017729e-05, "loss": 1.6973, "step": 1617 }, { "epoch": 0.09018449361797001, "grad_norm": 0.5834923386573792, "learning_rate": 9.862902532458461e-05, "loss": 2.0494, "step": 1618 }, { "epoch": 0.09024023187113316, "grad_norm": 0.4912288188934326, "learning_rate": 9.862695372676278e-05, "loss": 1.6505, "step": 1619 }, { "epoch": 0.0902959701242963, "grad_norm": 0.5288010239601135, "learning_rate": 9.862488058677748e-05, "loss": 1.734, "step": 1620 }, { "epoch": 0.09035170837745946, "grad_norm": 0.5029554963111877, "learning_rate": 9.862280590469448e-05, "loss": 1.8098, "step": 1621 }, { "epoch": 0.0904074466306226, "grad_norm": 0.531711995601654, "learning_rate": 9.862072968057956e-05, "loss": 1.8394, "step": 1622 }, { "epoch": 0.09046318488378574, "grad_norm": 0.4818442165851593, "learning_rate": 9.861865191449858e-05, "loss": 1.6742, "step": 1623 }, { "epoch": 0.09051892313694888, "grad_norm": 0.4834239184856415, "learning_rate": 9.861657260651742e-05, "loss": 1.6425, "step": 1624 }, { "epoch": 0.09057466139011204, "grad_norm": 0.4923589825630188, "learning_rate": 9.861449175670204e-05, "loss": 1.5693, "step": 1625 }, { "epoch": 0.09063039964327518, "grad_norm": 0.48194825649261475, "learning_rate": 9.861240936511842e-05, "loss": 1.6782, "step": 1626 }, { "epoch": 0.09068613789643833, "grad_norm": 0.5542406439781189, "learning_rate": 9.86103254318326e-05, "loss": 1.9775, "step": 1627 }, { "epoch": 0.09074187614960147, "grad_norm": 0.6013079881668091, "learning_rate": 9.860823995691068e-05, "loss": 1.9425, "step": 1628 }, { "epoch": 0.09079761440276461, "grad_norm": 0.5376304984092712, "learning_rate": 9.860615294041879e-05, "loss": 1.6473, "step": 1629 }, { "epoch": 0.09085335265592777, "grad_norm": 0.5485152006149292, "learning_rate": 9.860406438242313e-05, "loss": 1.6367, "step": 1630 }, { "epoch": 0.09090909090909091, "grad_norm": 0.5142073035240173, "learning_rate": 9.860197428298991e-05, "loss": 1.7602, "step": 1631 }, { "epoch": 0.09096482916225405, "grad_norm": 0.49521228671073914, "learning_rate": 9.859988264218546e-05, "loss": 1.546, "step": 1632 }, { "epoch": 0.0910205674154172, "grad_norm": 0.5011737942695618, "learning_rate": 9.859778946007608e-05, "loss": 1.5578, "step": 1633 }, { "epoch": 0.09107630566858034, "grad_norm": 0.4523265063762665, "learning_rate": 9.859569473672816e-05, "loss": 1.3888, "step": 1634 }, { "epoch": 0.0911320439217435, "grad_norm": 0.48054036498069763, "learning_rate": 9.859359847220815e-05, "loss": 1.7516, "step": 1635 }, { "epoch": 0.09118778217490664, "grad_norm": 0.5349341034889221, "learning_rate": 9.85915006665825e-05, "loss": 1.7055, "step": 1636 }, { "epoch": 0.09124352042806978, "grad_norm": 0.5274312496185303, "learning_rate": 9.858940131991777e-05, "loss": 1.8203, "step": 1637 }, { "epoch": 0.09129925868123293, "grad_norm": 0.4654419720172882, "learning_rate": 9.85873004322805e-05, "loss": 1.5783, "step": 1638 }, { "epoch": 0.09135499693439608, "grad_norm": 0.5258073806762695, "learning_rate": 9.858519800373738e-05, "loss": 1.7707, "step": 1639 }, { "epoch": 0.09141073518755923, "grad_norm": 0.4929850995540619, "learning_rate": 9.858309403435501e-05, "loss": 1.6027, "step": 1640 }, { "epoch": 0.09146647344072237, "grad_norm": 0.5121711492538452, "learning_rate": 9.85809885242002e-05, "loss": 1.7874, "step": 1641 }, { "epoch": 0.09152221169388551, "grad_norm": 0.4955439567565918, "learning_rate": 9.857888147333965e-05, "loss": 1.7223, "step": 1642 }, { "epoch": 0.09157794994704865, "grad_norm": 0.519477903842926, "learning_rate": 9.857677288184022e-05, "loss": 1.8618, "step": 1643 }, { "epoch": 0.09163368820021181, "grad_norm": 0.5247395038604736, "learning_rate": 9.857466274976878e-05, "loss": 1.761, "step": 1644 }, { "epoch": 0.09168942645337495, "grad_norm": 0.4881756901741028, "learning_rate": 9.857255107719225e-05, "loss": 1.7272, "step": 1645 }, { "epoch": 0.0917451647065381, "grad_norm": 0.5688063502311707, "learning_rate": 9.857043786417759e-05, "loss": 1.7532, "step": 1646 }, { "epoch": 0.09180090295970124, "grad_norm": 0.531910240650177, "learning_rate": 9.856832311079183e-05, "loss": 1.9235, "step": 1647 }, { "epoch": 0.0918566412128644, "grad_norm": 0.5271464586257935, "learning_rate": 9.856620681710205e-05, "loss": 1.8481, "step": 1648 }, { "epoch": 0.09191237946602754, "grad_norm": 0.5019913911819458, "learning_rate": 9.856408898317533e-05, "loss": 1.7273, "step": 1649 }, { "epoch": 0.09196811771919068, "grad_norm": 0.5375306010246277, "learning_rate": 9.856196960907887e-05, "loss": 1.8292, "step": 1650 }, { "epoch": 0.09202385597235382, "grad_norm": 0.551287829875946, "learning_rate": 9.855984869487985e-05, "loss": 1.7672, "step": 1651 }, { "epoch": 0.09207959422551697, "grad_norm": 0.5110806226730347, "learning_rate": 9.855772624064557e-05, "loss": 1.7338, "step": 1652 }, { "epoch": 0.09213533247868012, "grad_norm": 0.5807773470878601, "learning_rate": 9.855560224644332e-05, "loss": 1.8558, "step": 1653 }, { "epoch": 0.09219107073184327, "grad_norm": 0.5399064421653748, "learning_rate": 9.855347671234045e-05, "loss": 1.7338, "step": 1654 }, { "epoch": 0.09224680898500641, "grad_norm": 0.5670611262321472, "learning_rate": 9.855134963840441e-05, "loss": 1.9314, "step": 1655 }, { "epoch": 0.09230254723816955, "grad_norm": 0.49795302748680115, "learning_rate": 9.854922102470262e-05, "loss": 1.7196, "step": 1656 }, { "epoch": 0.0923582854913327, "grad_norm": 0.5752295255661011, "learning_rate": 9.85470908713026e-05, "loss": 1.7249, "step": 1657 }, { "epoch": 0.09241402374449585, "grad_norm": 0.4967830181121826, "learning_rate": 9.854495917827191e-05, "loss": 1.7368, "step": 1658 }, { "epoch": 0.092469761997659, "grad_norm": 0.4957406520843506, "learning_rate": 9.854282594567816e-05, "loss": 1.8287, "step": 1659 }, { "epoch": 0.09252550025082214, "grad_norm": 0.49035385251045227, "learning_rate": 9.854069117358899e-05, "loss": 1.743, "step": 1660 }, { "epoch": 0.09258123850398528, "grad_norm": 0.5366220474243164, "learning_rate": 9.853855486207211e-05, "loss": 1.7903, "step": 1661 }, { "epoch": 0.09263697675714844, "grad_norm": 0.5238292217254639, "learning_rate": 9.853641701119525e-05, "loss": 1.6038, "step": 1662 }, { "epoch": 0.09269271501031158, "grad_norm": 0.507854700088501, "learning_rate": 9.853427762102625e-05, "loss": 1.7459, "step": 1663 }, { "epoch": 0.09274845326347472, "grad_norm": 0.5182837247848511, "learning_rate": 9.853213669163293e-05, "loss": 1.7409, "step": 1664 }, { "epoch": 0.09280419151663787, "grad_norm": 0.5023046135902405, "learning_rate": 9.852999422308319e-05, "loss": 1.8207, "step": 1665 }, { "epoch": 0.09285992976980101, "grad_norm": 0.6185427308082581, "learning_rate": 9.852785021544499e-05, "loss": 1.9794, "step": 1666 }, { "epoch": 0.09291566802296417, "grad_norm": 0.5567124485969543, "learning_rate": 9.852570466878632e-05, "loss": 1.8052, "step": 1667 }, { "epoch": 0.09297140627612731, "grad_norm": 0.5299728512763977, "learning_rate": 9.852355758317523e-05, "loss": 1.6414, "step": 1668 }, { "epoch": 0.09302714452929045, "grad_norm": 0.47446316480636597, "learning_rate": 9.85214089586798e-05, "loss": 1.561, "step": 1669 }, { "epoch": 0.0930828827824536, "grad_norm": 0.5260158181190491, "learning_rate": 9.851925879536817e-05, "loss": 1.7192, "step": 1670 }, { "epoch": 0.09313862103561675, "grad_norm": 0.5200673341751099, "learning_rate": 9.851710709330855e-05, "loss": 1.6869, "step": 1671 }, { "epoch": 0.0931943592887799, "grad_norm": 0.5707138180732727, "learning_rate": 9.851495385256915e-05, "loss": 1.7307, "step": 1672 }, { "epoch": 0.09325009754194304, "grad_norm": 0.6008026003837585, "learning_rate": 9.851279907321829e-05, "loss": 1.8593, "step": 1673 }, { "epoch": 0.09330583579510618, "grad_norm": 0.4921055734157562, "learning_rate": 9.851064275532428e-05, "loss": 1.7155, "step": 1674 }, { "epoch": 0.09336157404826932, "grad_norm": 0.48389917612075806, "learning_rate": 9.850848489895553e-05, "loss": 1.7011, "step": 1675 }, { "epoch": 0.09341731230143248, "grad_norm": 0.6712982058525085, "learning_rate": 9.850632550418046e-05, "loss": 1.8851, "step": 1676 }, { "epoch": 0.09347305055459562, "grad_norm": 0.49884751439094543, "learning_rate": 9.850416457106755e-05, "loss": 1.7392, "step": 1677 }, { "epoch": 0.09352878880775876, "grad_norm": 0.5436164736747742, "learning_rate": 9.850200209968535e-05, "loss": 1.8583, "step": 1678 }, { "epoch": 0.09358452706092191, "grad_norm": 0.543387234210968, "learning_rate": 9.849983809010242e-05, "loss": 1.9008, "step": 1679 }, { "epoch": 0.09364026531408505, "grad_norm": 0.5220986604690552, "learning_rate": 9.849767254238741e-05, "loss": 1.8536, "step": 1680 }, { "epoch": 0.0936960035672482, "grad_norm": 0.5086224675178528, "learning_rate": 9.849550545660898e-05, "loss": 1.6492, "step": 1681 }, { "epoch": 0.09375174182041135, "grad_norm": 0.5263844728469849, "learning_rate": 9.849333683283587e-05, "loss": 1.8646, "step": 1682 }, { "epoch": 0.09380748007357449, "grad_norm": 0.48118674755096436, "learning_rate": 9.849116667113684e-05, "loss": 1.6978, "step": 1683 }, { "epoch": 0.09386321832673764, "grad_norm": 0.5442405939102173, "learning_rate": 9.848899497158075e-05, "loss": 1.7446, "step": 1684 }, { "epoch": 0.09391895657990079, "grad_norm": 0.5518308877944946, "learning_rate": 9.848682173423642e-05, "loss": 1.9409, "step": 1685 }, { "epoch": 0.09397469483306393, "grad_norm": 0.5064495205879211, "learning_rate": 9.848464695917283e-05, "loss": 1.9023, "step": 1686 }, { "epoch": 0.09403043308622708, "grad_norm": 0.5437746644020081, "learning_rate": 9.84824706464589e-05, "loss": 1.8456, "step": 1687 }, { "epoch": 0.09408617133939022, "grad_norm": 0.4933926463127136, "learning_rate": 9.848029279616369e-05, "loss": 1.6156, "step": 1688 }, { "epoch": 0.09414190959255336, "grad_norm": 0.5288189649581909, "learning_rate": 9.847811340835625e-05, "loss": 1.8053, "step": 1689 }, { "epoch": 0.09419764784571652, "grad_norm": 0.5238629579544067, "learning_rate": 9.847593248310569e-05, "loss": 1.8396, "step": 1690 }, { "epoch": 0.09425338609887966, "grad_norm": 0.5135747790336609, "learning_rate": 9.847375002048119e-05, "loss": 1.702, "step": 1691 }, { "epoch": 0.0943091243520428, "grad_norm": 0.48049938678741455, "learning_rate": 9.847156602055196e-05, "loss": 1.7258, "step": 1692 }, { "epoch": 0.09436486260520595, "grad_norm": 0.5790214538574219, "learning_rate": 9.846938048338728e-05, "loss": 1.9521, "step": 1693 }, { "epoch": 0.0944206008583691, "grad_norm": 0.49259278178215027, "learning_rate": 9.846719340905643e-05, "loss": 1.7358, "step": 1694 }, { "epoch": 0.09447633911153225, "grad_norm": 0.5396574139595032, "learning_rate": 9.846500479762879e-05, "loss": 1.9847, "step": 1695 }, { "epoch": 0.09453207736469539, "grad_norm": 0.5003666877746582, "learning_rate": 9.846281464917377e-05, "loss": 1.777, "step": 1696 }, { "epoch": 0.09458781561785853, "grad_norm": 0.5158617496490479, "learning_rate": 9.846062296376083e-05, "loss": 1.6861, "step": 1697 }, { "epoch": 0.09464355387102168, "grad_norm": 0.5154086351394653, "learning_rate": 9.845842974145947e-05, "loss": 1.8176, "step": 1698 }, { "epoch": 0.09469929212418483, "grad_norm": 0.5052759051322937, "learning_rate": 9.845623498233926e-05, "loss": 1.6658, "step": 1699 }, { "epoch": 0.09475503037734798, "grad_norm": 0.6677058339118958, "learning_rate": 9.845403868646979e-05, "loss": 1.7287, "step": 1700 }, { "epoch": 0.09481076863051112, "grad_norm": 0.5167236924171448, "learning_rate": 9.845184085392072e-05, "loss": 1.6861, "step": 1701 }, { "epoch": 0.09486650688367426, "grad_norm": 0.57721346616745, "learning_rate": 9.844964148476175e-05, "loss": 1.9309, "step": 1702 }, { "epoch": 0.0949222451368374, "grad_norm": 0.4876415729522705, "learning_rate": 9.844744057906263e-05, "loss": 1.738, "step": 1703 }, { "epoch": 0.09497798339000056, "grad_norm": 0.5089074373245239, "learning_rate": 9.844523813689316e-05, "loss": 1.8729, "step": 1704 }, { "epoch": 0.0950337216431637, "grad_norm": 0.5102959871292114, "learning_rate": 9.844303415832322e-05, "loss": 1.901, "step": 1705 }, { "epoch": 0.09508945989632685, "grad_norm": 0.5445943474769592, "learning_rate": 9.844082864342265e-05, "loss": 1.7838, "step": 1706 }, { "epoch": 0.09514519814948999, "grad_norm": 0.5227236151695251, "learning_rate": 9.843862159226142e-05, "loss": 1.7044, "step": 1707 }, { "epoch": 0.09520093640265315, "grad_norm": 0.5036524534225464, "learning_rate": 9.843641300490956e-05, "loss": 1.6637, "step": 1708 }, { "epoch": 0.09525667465581629, "grad_norm": 0.5071728825569153, "learning_rate": 9.843420288143706e-05, "loss": 1.5714, "step": 1709 }, { "epoch": 0.09531241290897943, "grad_norm": 0.563736081123352, "learning_rate": 9.843199122191404e-05, "loss": 2.0123, "step": 1710 }, { "epoch": 0.09536815116214258, "grad_norm": 0.5531306266784668, "learning_rate": 9.842977802641065e-05, "loss": 1.74, "step": 1711 }, { "epoch": 0.09542388941530572, "grad_norm": 0.5610520243644714, "learning_rate": 9.842756329499704e-05, "loss": 1.8003, "step": 1712 }, { "epoch": 0.09547962766846887, "grad_norm": 0.498121440410614, "learning_rate": 9.842534702774349e-05, "loss": 1.6448, "step": 1713 }, { "epoch": 0.09553536592163202, "grad_norm": 0.5231457948684692, "learning_rate": 9.842312922472028e-05, "loss": 1.8862, "step": 1714 }, { "epoch": 0.09559110417479516, "grad_norm": 0.520879864692688, "learning_rate": 9.842090988599772e-05, "loss": 1.7858, "step": 1715 }, { "epoch": 0.0956468424279583, "grad_norm": 0.5959715247154236, "learning_rate": 9.841868901164622e-05, "loss": 1.8487, "step": 1716 }, { "epoch": 0.09570258068112146, "grad_norm": 0.5337534546852112, "learning_rate": 9.84164666017362e-05, "loss": 1.5147, "step": 1717 }, { "epoch": 0.0957583189342846, "grad_norm": 0.5244635939598083, "learning_rate": 9.841424265633816e-05, "loss": 1.9583, "step": 1718 }, { "epoch": 0.09581405718744775, "grad_norm": 0.5573442578315735, "learning_rate": 9.84120171755226e-05, "loss": 1.7111, "step": 1719 }, { "epoch": 0.09586979544061089, "grad_norm": 0.5416032671928406, "learning_rate": 9.840979015936014e-05, "loss": 1.9152, "step": 1720 }, { "epoch": 0.09592553369377403, "grad_norm": 0.5546048283576965, "learning_rate": 9.840756160792138e-05, "loss": 1.7902, "step": 1721 }, { "epoch": 0.09598127194693719, "grad_norm": 0.5208713412284851, "learning_rate": 9.840533152127697e-05, "loss": 1.864, "step": 1722 }, { "epoch": 0.09603701020010033, "grad_norm": 0.5275363326072693, "learning_rate": 9.840309989949769e-05, "loss": 1.7866, "step": 1723 }, { "epoch": 0.09609274845326347, "grad_norm": 0.5389683246612549, "learning_rate": 9.84008667426543e-05, "loss": 1.8186, "step": 1724 }, { "epoch": 0.09614848670642662, "grad_norm": 0.5352590680122375, "learning_rate": 9.839863205081761e-05, "loss": 1.8207, "step": 1725 }, { "epoch": 0.09620422495958976, "grad_norm": 0.5303811430931091, "learning_rate": 9.839639582405849e-05, "loss": 1.8912, "step": 1726 }, { "epoch": 0.09625996321275292, "grad_norm": 0.4606251120567322, "learning_rate": 9.839415806244785e-05, "loss": 1.6001, "step": 1727 }, { "epoch": 0.09631570146591606, "grad_norm": 0.48041149973869324, "learning_rate": 9.839191876605668e-05, "loss": 1.6385, "step": 1728 }, { "epoch": 0.0963714397190792, "grad_norm": 0.5307428240776062, "learning_rate": 9.838967793495601e-05, "loss": 1.8683, "step": 1729 }, { "epoch": 0.09642717797224234, "grad_norm": 0.48561206459999084, "learning_rate": 9.838743556921688e-05, "loss": 1.7169, "step": 1730 }, { "epoch": 0.0964829162254055, "grad_norm": 0.5501610040664673, "learning_rate": 9.83851916689104e-05, "loss": 1.7714, "step": 1731 }, { "epoch": 0.09653865447856864, "grad_norm": 0.5766540765762329, "learning_rate": 9.838294623410776e-05, "loss": 1.961, "step": 1732 }, { "epoch": 0.09659439273173179, "grad_norm": 0.5572078824043274, "learning_rate": 9.838069926488016e-05, "loss": 1.9466, "step": 1733 }, { "epoch": 0.09665013098489493, "grad_norm": 0.5235105156898499, "learning_rate": 9.837845076129885e-05, "loss": 1.6369, "step": 1734 }, { "epoch": 0.09670586923805807, "grad_norm": 0.49561917781829834, "learning_rate": 9.837620072343514e-05, "loss": 1.6879, "step": 1735 }, { "epoch": 0.09676160749122123, "grad_norm": 0.577617883682251, "learning_rate": 9.83739491513604e-05, "loss": 2.0888, "step": 1736 }, { "epoch": 0.09681734574438437, "grad_norm": 0.559758722782135, "learning_rate": 9.837169604514605e-05, "loss": 2.0155, "step": 1737 }, { "epoch": 0.09687308399754752, "grad_norm": 0.4803854525089264, "learning_rate": 9.83694414048635e-05, "loss": 1.7143, "step": 1738 }, { "epoch": 0.09692882225071066, "grad_norm": 0.5286114811897278, "learning_rate": 9.83671852305843e-05, "loss": 1.7708, "step": 1739 }, { "epoch": 0.09698456050387382, "grad_norm": 0.5186529159545898, "learning_rate": 9.836492752237998e-05, "loss": 1.8367, "step": 1740 }, { "epoch": 0.09704029875703696, "grad_norm": 0.5168614983558655, "learning_rate": 9.836266828032214e-05, "loss": 1.6913, "step": 1741 }, { "epoch": 0.0970960370102001, "grad_norm": 0.5508823990821838, "learning_rate": 9.836040750448246e-05, "loss": 1.8108, "step": 1742 }, { "epoch": 0.09715177526336324, "grad_norm": 0.5152462720870972, "learning_rate": 9.835814519493258e-05, "loss": 1.7643, "step": 1743 }, { "epoch": 0.09720751351652639, "grad_norm": 0.5197470188140869, "learning_rate": 9.835588135174432e-05, "loss": 1.753, "step": 1744 }, { "epoch": 0.09726325176968954, "grad_norm": 0.5595375895500183, "learning_rate": 9.83536159749894e-05, "loss": 1.9646, "step": 1745 }, { "epoch": 0.09731899002285269, "grad_norm": 0.5276100635528564, "learning_rate": 9.835134906473973e-05, "loss": 1.8053, "step": 1746 }, { "epoch": 0.09737472827601583, "grad_norm": 0.543694257736206, "learning_rate": 9.834908062106716e-05, "loss": 1.9073, "step": 1747 }, { "epoch": 0.09743046652917897, "grad_norm": 0.5280660390853882, "learning_rate": 9.834681064404366e-05, "loss": 1.8642, "step": 1748 }, { "epoch": 0.09748620478234211, "grad_norm": 0.5228556394577026, "learning_rate": 9.83445391337412e-05, "loss": 1.7084, "step": 1749 }, { "epoch": 0.09754194303550527, "grad_norm": 0.5147905349731445, "learning_rate": 9.834226609023183e-05, "loss": 1.7273, "step": 1750 }, { "epoch": 0.09759768128866841, "grad_norm": 0.6363779306411743, "learning_rate": 9.833999151358763e-05, "loss": 2.3455, "step": 1751 }, { "epoch": 0.09765341954183156, "grad_norm": 0.4779658317565918, "learning_rate": 9.833771540388074e-05, "loss": 1.5965, "step": 1752 }, { "epoch": 0.0977091577949947, "grad_norm": 0.5493218302726746, "learning_rate": 9.833543776118334e-05, "loss": 1.7655, "step": 1753 }, { "epoch": 0.09776489604815786, "grad_norm": 0.5027639865875244, "learning_rate": 9.833315858556769e-05, "loss": 1.6425, "step": 1754 }, { "epoch": 0.097820634301321, "grad_norm": 0.5259470343589783, "learning_rate": 9.833087787710604e-05, "loss": 1.8848, "step": 1755 }, { "epoch": 0.09787637255448414, "grad_norm": 0.5296250581741333, "learning_rate": 9.832859563587073e-05, "loss": 1.6713, "step": 1756 }, { "epoch": 0.09793211080764729, "grad_norm": 0.5273899435997009, "learning_rate": 9.832631186193414e-05, "loss": 1.7833, "step": 1757 }, { "epoch": 0.09798784906081043, "grad_norm": 0.5987624526023865, "learning_rate": 9.832402655536869e-05, "loss": 2.0934, "step": 1758 }, { "epoch": 0.09804358731397358, "grad_norm": 0.5442295074462891, "learning_rate": 9.83217397162469e-05, "loss": 1.6506, "step": 1759 }, { "epoch": 0.09809932556713673, "grad_norm": 0.6511545181274414, "learning_rate": 9.831945134464123e-05, "loss": 2.1311, "step": 1760 }, { "epoch": 0.09815506382029987, "grad_norm": 0.5505144596099854, "learning_rate": 9.831716144062431e-05, "loss": 1.7606, "step": 1761 }, { "epoch": 0.09821080207346301, "grad_norm": 0.5241886973381042, "learning_rate": 9.831487000426871e-05, "loss": 1.7404, "step": 1762 }, { "epoch": 0.09826654032662617, "grad_norm": 0.5306397080421448, "learning_rate": 9.831257703564715e-05, "loss": 1.7232, "step": 1763 }, { "epoch": 0.09832227857978931, "grad_norm": 0.5829235315322876, "learning_rate": 9.831028253483232e-05, "loss": 1.8867, "step": 1764 }, { "epoch": 0.09837801683295246, "grad_norm": 0.5258575677871704, "learning_rate": 9.8307986501897e-05, "loss": 1.6442, "step": 1765 }, { "epoch": 0.0984337550861156, "grad_norm": 0.5493606328964233, "learning_rate": 9.8305688936914e-05, "loss": 2.025, "step": 1766 }, { "epoch": 0.09848949333927874, "grad_norm": 0.5285725593566895, "learning_rate": 9.83033898399562e-05, "loss": 1.683, "step": 1767 }, { "epoch": 0.0985452315924419, "grad_norm": 0.590203046798706, "learning_rate": 9.830108921109648e-05, "loss": 2.0356, "step": 1768 }, { "epoch": 0.09860096984560504, "grad_norm": 0.47736695408821106, "learning_rate": 9.829878705040784e-05, "loss": 1.2685, "step": 1769 }, { "epoch": 0.09865670809876818, "grad_norm": 0.5433778762817383, "learning_rate": 9.829648335796327e-05, "loss": 1.5734, "step": 1770 }, { "epoch": 0.09871244635193133, "grad_norm": 0.533301591873169, "learning_rate": 9.829417813383584e-05, "loss": 1.6253, "step": 1771 }, { "epoch": 0.09876818460509447, "grad_norm": 0.5619016289710999, "learning_rate": 9.829187137809865e-05, "loss": 1.9336, "step": 1772 }, { "epoch": 0.09882392285825763, "grad_norm": 0.5166584849357605, "learning_rate": 9.828956309082487e-05, "loss": 1.6934, "step": 1773 }, { "epoch": 0.09887966111142077, "grad_norm": 0.550294041633606, "learning_rate": 9.828725327208769e-05, "loss": 1.7357, "step": 1774 }, { "epoch": 0.09893539936458391, "grad_norm": 0.5708268880844116, "learning_rate": 9.828494192196037e-05, "loss": 1.75, "step": 1775 }, { "epoch": 0.09899113761774705, "grad_norm": 0.5142853856086731, "learning_rate": 9.828262904051621e-05, "loss": 1.8905, "step": 1776 }, { "epoch": 0.09904687587091021, "grad_norm": 0.5133590698242188, "learning_rate": 9.828031462782858e-05, "loss": 1.7111, "step": 1777 }, { "epoch": 0.09910261412407335, "grad_norm": 0.491804838180542, "learning_rate": 9.827799868397086e-05, "loss": 1.7898, "step": 1778 }, { "epoch": 0.0991583523772365, "grad_norm": 0.5558345913887024, "learning_rate": 9.827568120901649e-05, "loss": 1.8621, "step": 1779 }, { "epoch": 0.09921409063039964, "grad_norm": 0.5390424132347107, "learning_rate": 9.827336220303898e-05, "loss": 1.5574, "step": 1780 }, { "epoch": 0.09926982888356278, "grad_norm": 0.5201495885848999, "learning_rate": 9.827104166611188e-05, "loss": 1.7218, "step": 1781 }, { "epoch": 0.09932556713672594, "grad_norm": 0.49533358216285706, "learning_rate": 9.826871959830877e-05, "loss": 1.6587, "step": 1782 }, { "epoch": 0.09938130538988908, "grad_norm": 0.5522517561912537, "learning_rate": 9.826639599970331e-05, "loss": 1.9942, "step": 1783 }, { "epoch": 0.09943704364305223, "grad_norm": 0.5211175680160522, "learning_rate": 9.826407087036918e-05, "loss": 1.7953, "step": 1784 }, { "epoch": 0.09949278189621537, "grad_norm": 0.5591548681259155, "learning_rate": 9.82617442103801e-05, "loss": 1.7257, "step": 1785 }, { "epoch": 0.09954852014937852, "grad_norm": 0.5057593584060669, "learning_rate": 9.82594160198099e-05, "loss": 1.6209, "step": 1786 }, { "epoch": 0.09960425840254167, "grad_norm": 0.4974839389324188, "learning_rate": 9.82570862987324e-05, "loss": 1.7242, "step": 1787 }, { "epoch": 0.09965999665570481, "grad_norm": 0.580697238445282, "learning_rate": 9.825475504722147e-05, "loss": 1.8402, "step": 1788 }, { "epoch": 0.09971573490886795, "grad_norm": 0.5298492908477783, "learning_rate": 9.825242226535106e-05, "loss": 1.5434, "step": 1789 }, { "epoch": 0.0997714731620311, "grad_norm": 0.5714828372001648, "learning_rate": 9.825008795319514e-05, "loss": 1.8505, "step": 1790 }, { "epoch": 0.09982721141519425, "grad_norm": 0.5840202569961548, "learning_rate": 9.824775211082776e-05, "loss": 1.9345, "step": 1791 }, { "epoch": 0.0998829496683574, "grad_norm": 0.495969295501709, "learning_rate": 9.824541473832298e-05, "loss": 1.6482, "step": 1792 }, { "epoch": 0.09993868792152054, "grad_norm": 0.537111759185791, "learning_rate": 9.824307583575494e-05, "loss": 1.6791, "step": 1793 }, { "epoch": 0.09999442617468368, "grad_norm": 0.5053449869155884, "learning_rate": 9.82407354031978e-05, "loss": 1.6764, "step": 1794 }, { "epoch": 0.10005016442784682, "grad_norm": 0.5327693223953247, "learning_rate": 9.82383934407258e-05, "loss": 1.7993, "step": 1795 }, { "epoch": 0.10010590268100998, "grad_norm": 0.49914291501045227, "learning_rate": 9.823604994841322e-05, "loss": 1.9674, "step": 1796 }, { "epoch": 0.10016164093417312, "grad_norm": 0.5144324898719788, "learning_rate": 9.823370492633435e-05, "loss": 1.7585, "step": 1797 }, { "epoch": 0.10021737918733627, "grad_norm": 0.5108045935630798, "learning_rate": 9.823135837456362e-05, "loss": 1.7215, "step": 1798 }, { "epoch": 0.10027311744049941, "grad_norm": 0.5693103671073914, "learning_rate": 9.822901029317537e-05, "loss": 1.7812, "step": 1799 }, { "epoch": 0.10032885569366257, "grad_norm": 0.49847400188446045, "learning_rate": 9.822666068224412e-05, "loss": 1.6675, "step": 1800 }, { "epoch": 0.10038459394682571, "grad_norm": 0.5565662384033203, "learning_rate": 9.822430954184439e-05, "loss": 1.8071, "step": 1801 }, { "epoch": 0.10044033219998885, "grad_norm": 0.5412677526473999, "learning_rate": 9.82219568720507e-05, "loss": 1.7311, "step": 1802 }, { "epoch": 0.100496070453152, "grad_norm": 0.5256420373916626, "learning_rate": 9.821960267293771e-05, "loss": 1.8179, "step": 1803 }, { "epoch": 0.10055180870631514, "grad_norm": 0.486968457698822, "learning_rate": 9.821724694458006e-05, "loss": 1.7443, "step": 1804 }, { "epoch": 0.1006075469594783, "grad_norm": 0.5230684280395508, "learning_rate": 9.821488968705246e-05, "loss": 1.8426, "step": 1805 }, { "epoch": 0.10066328521264144, "grad_norm": 0.5057176351547241, "learning_rate": 9.821253090042967e-05, "loss": 1.6857, "step": 1806 }, { "epoch": 0.10071902346580458, "grad_norm": 0.5477109551429749, "learning_rate": 9.821017058478653e-05, "loss": 1.904, "step": 1807 }, { "epoch": 0.10077476171896772, "grad_norm": 0.5054430961608887, "learning_rate": 9.820780874019782e-05, "loss": 1.8538, "step": 1808 }, { "epoch": 0.10083049997213088, "grad_norm": 0.5614181160926819, "learning_rate": 9.82054453667385e-05, "loss": 1.9318, "step": 1809 }, { "epoch": 0.10088623822529402, "grad_norm": 0.49829983711242676, "learning_rate": 9.820308046448353e-05, "loss": 1.6044, "step": 1810 }, { "epoch": 0.10094197647845717, "grad_norm": 0.53876793384552, "learning_rate": 9.820071403350787e-05, "loss": 1.7234, "step": 1811 }, { "epoch": 0.10099771473162031, "grad_norm": 0.5352075695991516, "learning_rate": 9.81983460738866e-05, "loss": 1.7911, "step": 1812 }, { "epoch": 0.10105345298478345, "grad_norm": 0.5328055024147034, "learning_rate": 9.819597658569479e-05, "loss": 1.8147, "step": 1813 }, { "epoch": 0.10110919123794661, "grad_norm": 0.5261515378952026, "learning_rate": 9.819360556900763e-05, "loss": 1.8057, "step": 1814 }, { "epoch": 0.10116492949110975, "grad_norm": 0.5476046204566956, "learning_rate": 9.819123302390027e-05, "loss": 1.7813, "step": 1815 }, { "epoch": 0.1012206677442729, "grad_norm": 0.5293675661087036, "learning_rate": 9.818885895044799e-05, "loss": 1.7398, "step": 1816 }, { "epoch": 0.10127640599743604, "grad_norm": 0.6075041890144348, "learning_rate": 9.818648334872607e-05, "loss": 1.985, "step": 1817 }, { "epoch": 0.10133214425059918, "grad_norm": 0.5815473794937134, "learning_rate": 9.818410621880982e-05, "loss": 1.7932, "step": 1818 }, { "epoch": 0.10138788250376234, "grad_norm": 0.546378493309021, "learning_rate": 9.818172756077466e-05, "loss": 1.8672, "step": 1819 }, { "epoch": 0.10144362075692548, "grad_norm": 0.5089141130447388, "learning_rate": 9.817934737469603e-05, "loss": 1.4847, "step": 1820 }, { "epoch": 0.10149935901008862, "grad_norm": 0.5070534348487854, "learning_rate": 9.81769656606494e-05, "loss": 1.6301, "step": 1821 }, { "epoch": 0.10155509726325176, "grad_norm": 0.5128391981124878, "learning_rate": 9.817458241871032e-05, "loss": 1.8199, "step": 1822 }, { "epoch": 0.10161083551641492, "grad_norm": 0.5569765567779541, "learning_rate": 9.817219764895435e-05, "loss": 1.7238, "step": 1823 }, { "epoch": 0.10166657376957806, "grad_norm": 0.5038780570030212, "learning_rate": 9.816981135145714e-05, "loss": 1.7099, "step": 1824 }, { "epoch": 0.10172231202274121, "grad_norm": 0.5122333765029907, "learning_rate": 9.816742352629437e-05, "loss": 1.7679, "step": 1825 }, { "epoch": 0.10177805027590435, "grad_norm": 0.5544700026512146, "learning_rate": 9.816503417354174e-05, "loss": 2.0049, "step": 1826 }, { "epoch": 0.10183378852906749, "grad_norm": 0.5663131475448608, "learning_rate": 9.816264329327507e-05, "loss": 1.7042, "step": 1827 }, { "epoch": 0.10188952678223065, "grad_norm": 0.5186511278152466, "learning_rate": 9.816025088557015e-05, "loss": 1.7472, "step": 1828 }, { "epoch": 0.10194526503539379, "grad_norm": 0.5595180988311768, "learning_rate": 9.815785695050288e-05, "loss": 1.6525, "step": 1829 }, { "epoch": 0.10200100328855694, "grad_norm": 0.49748462438583374, "learning_rate": 9.815546148814915e-05, "loss": 1.6744, "step": 1830 }, { "epoch": 0.10205674154172008, "grad_norm": 0.47154897451400757, "learning_rate": 9.815306449858497e-05, "loss": 1.6183, "step": 1831 }, { "epoch": 0.10211247979488323, "grad_norm": 0.5415584444999695, "learning_rate": 9.815066598188631e-05, "loss": 1.842, "step": 1832 }, { "epoch": 0.10216821804804638, "grad_norm": 0.5106571912765503, "learning_rate": 9.814826593812928e-05, "loss": 1.6504, "step": 1833 }, { "epoch": 0.10222395630120952, "grad_norm": 0.5451028347015381, "learning_rate": 9.814586436738998e-05, "loss": 1.8817, "step": 1834 }, { "epoch": 0.10227969455437266, "grad_norm": 0.5032516121864319, "learning_rate": 9.814346126974455e-05, "loss": 1.8143, "step": 1835 }, { "epoch": 0.1023354328075358, "grad_norm": 0.4844000041484833, "learning_rate": 9.814105664526925e-05, "loss": 1.8255, "step": 1836 }, { "epoch": 0.10239117106069896, "grad_norm": 0.8231089115142822, "learning_rate": 9.81386504940403e-05, "loss": 1.5754, "step": 1837 }, { "epoch": 0.1024469093138621, "grad_norm": 0.5142394304275513, "learning_rate": 9.813624281613403e-05, "loss": 1.7516, "step": 1838 }, { "epoch": 0.10250264756702525, "grad_norm": 0.5010998249053955, "learning_rate": 9.813383361162678e-05, "loss": 1.7164, "step": 1839 }, { "epoch": 0.10255838582018839, "grad_norm": 0.5169504284858704, "learning_rate": 9.813142288059497e-05, "loss": 1.4974, "step": 1840 }, { "epoch": 0.10261412407335155, "grad_norm": 0.5264306664466858, "learning_rate": 9.812901062311507e-05, "loss": 1.6087, "step": 1841 }, { "epoch": 0.10266986232651469, "grad_norm": 0.5117889642715454, "learning_rate": 9.812659683926355e-05, "loss": 1.734, "step": 1842 }, { "epoch": 0.10272560057967783, "grad_norm": 0.5216721296310425, "learning_rate": 9.812418152911697e-05, "loss": 1.7643, "step": 1843 }, { "epoch": 0.10278133883284098, "grad_norm": 0.5514086484909058, "learning_rate": 9.812176469275196e-05, "loss": 1.7052, "step": 1844 }, { "epoch": 0.10283707708600412, "grad_norm": 0.5310468077659607, "learning_rate": 9.811934633024514e-05, "loss": 1.8478, "step": 1845 }, { "epoch": 0.10289281533916728, "grad_norm": 0.5535829067230225, "learning_rate": 9.811692644167318e-05, "loss": 1.7884, "step": 1846 }, { "epoch": 0.10294855359233042, "grad_norm": 0.5332193374633789, "learning_rate": 9.811450502711288e-05, "loss": 1.7511, "step": 1847 }, { "epoch": 0.10300429184549356, "grad_norm": 0.5547590851783752, "learning_rate": 9.8112082086641e-05, "loss": 1.7348, "step": 1848 }, { "epoch": 0.1030600300986567, "grad_norm": 0.5098549127578735, "learning_rate": 9.810965762033439e-05, "loss": 1.8117, "step": 1849 }, { "epoch": 0.10311576835181985, "grad_norm": 0.4965379238128662, "learning_rate": 9.810723162826994e-05, "loss": 1.6535, "step": 1850 }, { "epoch": 0.103171506604983, "grad_norm": 0.5498190522193909, "learning_rate": 9.810480411052458e-05, "loss": 1.8094, "step": 1851 }, { "epoch": 0.10322724485814615, "grad_norm": 0.5419559478759766, "learning_rate": 9.81023750671753e-05, "loss": 1.8347, "step": 1852 }, { "epoch": 0.10328298311130929, "grad_norm": 0.5136609077453613, "learning_rate": 9.809994449829916e-05, "loss": 1.8038, "step": 1853 }, { "epoch": 0.10333872136447243, "grad_norm": 0.4600328207015991, "learning_rate": 9.809751240397321e-05, "loss": 1.5616, "step": 1854 }, { "epoch": 0.10339445961763559, "grad_norm": 0.5725501775741577, "learning_rate": 9.80950787842746e-05, "loss": 2.0217, "step": 1855 }, { "epoch": 0.10345019787079873, "grad_norm": 0.4968816936016083, "learning_rate": 9.809264363928049e-05, "loss": 1.6151, "step": 1856 }, { "epoch": 0.10350593612396188, "grad_norm": 0.5521273016929626, "learning_rate": 9.809020696906815e-05, "loss": 1.5242, "step": 1857 }, { "epoch": 0.10356167437712502, "grad_norm": 0.526759684085846, "learning_rate": 9.80877687737148e-05, "loss": 1.6917, "step": 1858 }, { "epoch": 0.10361741263028816, "grad_norm": 0.5235029458999634, "learning_rate": 9.808532905329781e-05, "loss": 1.785, "step": 1859 }, { "epoch": 0.10367315088345132, "grad_norm": 0.5284624099731445, "learning_rate": 9.808288780789454e-05, "loss": 1.8857, "step": 1860 }, { "epoch": 0.10372888913661446, "grad_norm": 0.5086808800697327, "learning_rate": 9.80804450375824e-05, "loss": 1.5768, "step": 1861 }, { "epoch": 0.1037846273897776, "grad_norm": 0.6029835343360901, "learning_rate": 9.807800074243888e-05, "loss": 2.1482, "step": 1862 }, { "epoch": 0.10384036564294075, "grad_norm": 0.5451070666313171, "learning_rate": 9.80755549225415e-05, "loss": 1.6884, "step": 1863 }, { "epoch": 0.1038961038961039, "grad_norm": 0.5617519021034241, "learning_rate": 9.807310757796781e-05, "loss": 1.9665, "step": 1864 }, { "epoch": 0.10395184214926705, "grad_norm": 0.6114406585693359, "learning_rate": 9.807065870879544e-05, "loss": 1.9696, "step": 1865 }, { "epoch": 0.10400758040243019, "grad_norm": 0.5124810338020325, "learning_rate": 9.806820831510204e-05, "loss": 1.6848, "step": 1866 }, { "epoch": 0.10406331865559333, "grad_norm": 0.5385152697563171, "learning_rate": 9.806575639696533e-05, "loss": 1.6808, "step": 1867 }, { "epoch": 0.10411905690875647, "grad_norm": 0.49392756819725037, "learning_rate": 9.806330295446307e-05, "loss": 1.8179, "step": 1868 }, { "epoch": 0.10417479516191963, "grad_norm": 0.49383312463760376, "learning_rate": 9.806084798767307e-05, "loss": 1.5517, "step": 1869 }, { "epoch": 0.10423053341508277, "grad_norm": 0.5276709198951721, "learning_rate": 9.805839149667319e-05, "loss": 1.7125, "step": 1870 }, { "epoch": 0.10428627166824592, "grad_norm": 0.5694584250450134, "learning_rate": 9.805593348154131e-05, "loss": 1.9891, "step": 1871 }, { "epoch": 0.10434200992140906, "grad_norm": 0.5705782771110535, "learning_rate": 9.805347394235543e-05, "loss": 1.779, "step": 1872 }, { "epoch": 0.1043977481745722, "grad_norm": 0.543282151222229, "learning_rate": 9.805101287919352e-05, "loss": 1.898, "step": 1873 }, { "epoch": 0.10445348642773536, "grad_norm": 0.5607357025146484, "learning_rate": 9.804855029213365e-05, "loss": 1.9422, "step": 1874 }, { "epoch": 0.1045092246808985, "grad_norm": 0.548055112361908, "learning_rate": 9.804608618125388e-05, "loss": 1.776, "step": 1875 }, { "epoch": 0.10456496293406164, "grad_norm": 0.528634250164032, "learning_rate": 9.804362054663241e-05, "loss": 1.7196, "step": 1876 }, { "epoch": 0.10462070118722479, "grad_norm": 0.5074811577796936, "learning_rate": 9.80411533883474e-05, "loss": 1.6667, "step": 1877 }, { "epoch": 0.10467643944038794, "grad_norm": 0.5272465944290161, "learning_rate": 9.80386847064771e-05, "loss": 1.8897, "step": 1878 }, { "epoch": 0.10473217769355109, "grad_norm": 0.5819423198699951, "learning_rate": 9.80362145010998e-05, "loss": 1.868, "step": 1879 }, { "epoch": 0.10478791594671423, "grad_norm": 0.4952581226825714, "learning_rate": 9.803374277229387e-05, "loss": 1.7449, "step": 1880 }, { "epoch": 0.10484365419987737, "grad_norm": 0.5459893345832825, "learning_rate": 9.803126952013766e-05, "loss": 1.7454, "step": 1881 }, { "epoch": 0.10489939245304052, "grad_norm": 0.4974026381969452, "learning_rate": 9.802879474470964e-05, "loss": 1.5892, "step": 1882 }, { "epoch": 0.10495513070620367, "grad_norm": 0.503982424736023, "learning_rate": 9.802631844608825e-05, "loss": 1.608, "step": 1883 }, { "epoch": 0.10501086895936682, "grad_norm": 0.5444994568824768, "learning_rate": 9.802384062435206e-05, "loss": 1.8286, "step": 1884 }, { "epoch": 0.10506660721252996, "grad_norm": 0.5099791288375854, "learning_rate": 9.802136127957965e-05, "loss": 1.7811, "step": 1885 }, { "epoch": 0.1051223454656931, "grad_norm": 0.5670564770698547, "learning_rate": 9.801888041184963e-05, "loss": 2.0036, "step": 1886 }, { "epoch": 0.10517808371885626, "grad_norm": 0.5026718378067017, "learning_rate": 9.801639802124071e-05, "loss": 1.6716, "step": 1887 }, { "epoch": 0.1052338219720194, "grad_norm": 0.519005298614502, "learning_rate": 9.801391410783161e-05, "loss": 1.6815, "step": 1888 }, { "epoch": 0.10528956022518254, "grad_norm": 0.46930474042892456, "learning_rate": 9.801142867170106e-05, "loss": 1.7429, "step": 1889 }, { "epoch": 0.10534529847834569, "grad_norm": 0.5434656143188477, "learning_rate": 9.800894171292793e-05, "loss": 1.8671, "step": 1890 }, { "epoch": 0.10540103673150883, "grad_norm": 0.5062917470932007, "learning_rate": 9.80064532315911e-05, "loss": 1.6347, "step": 1891 }, { "epoch": 0.10545677498467199, "grad_norm": 0.5208712220191956, "learning_rate": 9.800396322776945e-05, "loss": 1.601, "step": 1892 }, { "epoch": 0.10551251323783513, "grad_norm": 0.49505361914634705, "learning_rate": 9.800147170154199e-05, "loss": 1.7157, "step": 1893 }, { "epoch": 0.10556825149099827, "grad_norm": 0.5282744765281677, "learning_rate": 9.79989786529877e-05, "loss": 1.7322, "step": 1894 }, { "epoch": 0.10562398974416141, "grad_norm": 0.5821601748466492, "learning_rate": 9.799648408218567e-05, "loss": 2.0407, "step": 1895 }, { "epoch": 0.10567972799732456, "grad_norm": 0.5044925212860107, "learning_rate": 9.7993987989215e-05, "loss": 1.6443, "step": 1896 }, { "epoch": 0.10573546625048771, "grad_norm": 0.5207780599594116, "learning_rate": 9.799149037415485e-05, "loss": 1.6341, "step": 1897 }, { "epoch": 0.10579120450365086, "grad_norm": 0.5176671743392944, "learning_rate": 9.798899123708444e-05, "loss": 1.7532, "step": 1898 }, { "epoch": 0.105846942756814, "grad_norm": 0.585341215133667, "learning_rate": 9.798649057808302e-05, "loss": 1.7511, "step": 1899 }, { "epoch": 0.10590268100997714, "grad_norm": 0.5633143782615662, "learning_rate": 9.798398839722991e-05, "loss": 1.8548, "step": 1900 }, { "epoch": 0.1059584192631403, "grad_norm": 0.5425167083740234, "learning_rate": 9.798148469460444e-05, "loss": 1.7457, "step": 1901 }, { "epoch": 0.10601415751630344, "grad_norm": 0.5065333247184753, "learning_rate": 9.797897947028602e-05, "loss": 1.6342, "step": 1902 }, { "epoch": 0.10606989576946659, "grad_norm": 0.4805918037891388, "learning_rate": 9.797647272435413e-05, "loss": 1.6272, "step": 1903 }, { "epoch": 0.10612563402262973, "grad_norm": 0.49736079573631287, "learning_rate": 9.797396445688825e-05, "loss": 1.6666, "step": 1904 }, { "epoch": 0.10618137227579287, "grad_norm": 0.5496745705604553, "learning_rate": 9.797145466796791e-05, "loss": 1.7214, "step": 1905 }, { "epoch": 0.10623711052895603, "grad_norm": 0.5134656429290771, "learning_rate": 9.796894335767272e-05, "loss": 1.7156, "step": 1906 }, { "epoch": 0.10629284878211917, "grad_norm": 0.5449696183204651, "learning_rate": 9.796643052608232e-05, "loss": 1.7284, "step": 1907 }, { "epoch": 0.10634858703528231, "grad_norm": 0.5344961881637573, "learning_rate": 9.796391617327643e-05, "loss": 1.514, "step": 1908 }, { "epoch": 0.10640432528844546, "grad_norm": 0.5717931389808655, "learning_rate": 9.796140029933474e-05, "loss": 1.9562, "step": 1909 }, { "epoch": 0.10646006354160861, "grad_norm": 0.5507314205169678, "learning_rate": 9.795888290433708e-05, "loss": 1.8475, "step": 1910 }, { "epoch": 0.10651580179477176, "grad_norm": 0.4807168245315552, "learning_rate": 9.795636398836328e-05, "loss": 1.4198, "step": 1911 }, { "epoch": 0.1065715400479349, "grad_norm": 0.5163860321044922, "learning_rate": 9.795384355149321e-05, "loss": 1.7098, "step": 1912 }, { "epoch": 0.10662727830109804, "grad_norm": 0.5876139998435974, "learning_rate": 9.795132159380683e-05, "loss": 1.8379, "step": 1913 }, { "epoch": 0.10668301655426118, "grad_norm": 0.5147418975830078, "learning_rate": 9.794879811538409e-05, "loss": 1.8069, "step": 1914 }, { "epoch": 0.10673875480742434, "grad_norm": 0.5539793372154236, "learning_rate": 9.794627311630503e-05, "loss": 1.9336, "step": 1915 }, { "epoch": 0.10679449306058748, "grad_norm": 0.5565729737281799, "learning_rate": 9.794374659664975e-05, "loss": 1.8024, "step": 1916 }, { "epoch": 0.10685023131375063, "grad_norm": 0.509848952293396, "learning_rate": 9.794121855649834e-05, "loss": 1.6553, "step": 1917 }, { "epoch": 0.10690596956691377, "grad_norm": 0.5031093955039978, "learning_rate": 9.793868899593101e-05, "loss": 1.6452, "step": 1918 }, { "epoch": 0.10696170782007691, "grad_norm": 0.5101149082183838, "learning_rate": 9.793615791502794e-05, "loss": 1.5787, "step": 1919 }, { "epoch": 0.10701744607324007, "grad_norm": 0.5462785363197327, "learning_rate": 9.793362531386946e-05, "loss": 1.7273, "step": 1920 }, { "epoch": 0.10707318432640321, "grad_norm": 0.5313560366630554, "learning_rate": 9.793109119253584e-05, "loss": 1.7061, "step": 1921 }, { "epoch": 0.10712892257956635, "grad_norm": 0.49144747853279114, "learning_rate": 9.792855555110747e-05, "loss": 1.6418, "step": 1922 }, { "epoch": 0.1071846608327295, "grad_norm": 0.5435053110122681, "learning_rate": 9.792601838966477e-05, "loss": 1.8774, "step": 1923 }, { "epoch": 0.10724039908589265, "grad_norm": 0.5598286390304565, "learning_rate": 9.792347970828819e-05, "loss": 1.8705, "step": 1924 }, { "epoch": 0.1072961373390558, "grad_norm": 0.5478824377059937, "learning_rate": 9.792093950705824e-05, "loss": 1.6882, "step": 1925 }, { "epoch": 0.10735187559221894, "grad_norm": 0.5779083967208862, "learning_rate": 9.79183977860555e-05, "loss": 1.993, "step": 1926 }, { "epoch": 0.10740761384538208, "grad_norm": 0.5614520907402039, "learning_rate": 9.791585454536054e-05, "loss": 1.7984, "step": 1927 }, { "epoch": 0.10746335209854523, "grad_norm": 0.5752551555633545, "learning_rate": 9.791330978505406e-05, "loss": 1.781, "step": 1928 }, { "epoch": 0.10751909035170838, "grad_norm": 0.5250864624977112, "learning_rate": 9.791076350521675e-05, "loss": 1.8367, "step": 1929 }, { "epoch": 0.10757482860487153, "grad_norm": 0.5408803224563599, "learning_rate": 9.790821570592937e-05, "loss": 1.9812, "step": 1930 }, { "epoch": 0.10763056685803467, "grad_norm": 0.5511845350265503, "learning_rate": 9.790566638727268e-05, "loss": 1.9631, "step": 1931 }, { "epoch": 0.10768630511119781, "grad_norm": 0.5966324806213379, "learning_rate": 9.790311554932758e-05, "loss": 1.6961, "step": 1932 }, { "epoch": 0.10774204336436097, "grad_norm": 0.5062892436981201, "learning_rate": 9.790056319217495e-05, "loss": 1.4829, "step": 1933 }, { "epoch": 0.10779778161752411, "grad_norm": 0.5916358232498169, "learning_rate": 9.789800931589574e-05, "loss": 1.7646, "step": 1934 }, { "epoch": 0.10785351987068725, "grad_norm": 0.5008646845817566, "learning_rate": 9.789545392057093e-05, "loss": 1.6985, "step": 1935 }, { "epoch": 0.1079092581238504, "grad_norm": 0.557442843914032, "learning_rate": 9.789289700628158e-05, "loss": 1.6734, "step": 1936 }, { "epoch": 0.10796499637701354, "grad_norm": 0.5303389430046082, "learning_rate": 9.789033857310876e-05, "loss": 1.8051, "step": 1937 }, { "epoch": 0.1080207346301767, "grad_norm": 0.5422589182853699, "learning_rate": 9.788777862113363e-05, "loss": 1.7073, "step": 1938 }, { "epoch": 0.10807647288333984, "grad_norm": 0.49321499466896057, "learning_rate": 9.788521715043736e-05, "loss": 1.6106, "step": 1939 }, { "epoch": 0.10813221113650298, "grad_norm": 0.5515221953392029, "learning_rate": 9.78826541611012e-05, "loss": 1.9005, "step": 1940 }, { "epoch": 0.10818794938966612, "grad_norm": 0.5055232048034668, "learning_rate": 9.788008965320643e-05, "loss": 1.6169, "step": 1941 }, { "epoch": 0.10824368764282927, "grad_norm": 0.5074330568313599, "learning_rate": 9.787752362683438e-05, "loss": 1.6712, "step": 1942 }, { "epoch": 0.10829942589599242, "grad_norm": 0.5290434956550598, "learning_rate": 9.78749560820664e-05, "loss": 1.6697, "step": 1943 }, { "epoch": 0.10835516414915557, "grad_norm": 0.5382573008537292, "learning_rate": 9.787238701898397e-05, "loss": 1.6955, "step": 1944 }, { "epoch": 0.10841090240231871, "grad_norm": 0.5350417494773865, "learning_rate": 9.786981643766852e-05, "loss": 1.695, "step": 1945 }, { "epoch": 0.10846664065548185, "grad_norm": 0.5305573344230652, "learning_rate": 9.78672443382016e-05, "loss": 1.8205, "step": 1946 }, { "epoch": 0.10852237890864501, "grad_norm": 0.5057222247123718, "learning_rate": 9.786467072066478e-05, "loss": 1.7815, "step": 1947 }, { "epoch": 0.10857811716180815, "grad_norm": 0.5606647729873657, "learning_rate": 9.786209558513968e-05, "loss": 2.0612, "step": 1948 }, { "epoch": 0.1086338554149713, "grad_norm": 0.5300911068916321, "learning_rate": 9.785951893170795e-05, "loss": 1.8648, "step": 1949 }, { "epoch": 0.10868959366813444, "grad_norm": 0.5408658385276794, "learning_rate": 9.785694076045133e-05, "loss": 1.7291, "step": 1950 }, { "epoch": 0.10874533192129758, "grad_norm": 0.5921101570129395, "learning_rate": 9.785436107145156e-05, "loss": 1.9079, "step": 1951 }, { "epoch": 0.10880107017446074, "grad_norm": 0.5365302562713623, "learning_rate": 9.785177986479048e-05, "loss": 1.888, "step": 1952 }, { "epoch": 0.10885680842762388, "grad_norm": 0.5375866293907166, "learning_rate": 9.784919714054993e-05, "loss": 1.7309, "step": 1953 }, { "epoch": 0.10891254668078702, "grad_norm": 0.5292702317237854, "learning_rate": 9.784661289881183e-05, "loss": 1.7366, "step": 1954 }, { "epoch": 0.10896828493395017, "grad_norm": 0.5953987240791321, "learning_rate": 9.784402713965815e-05, "loss": 1.6749, "step": 1955 }, { "epoch": 0.10902402318711332, "grad_norm": 0.5666269659996033, "learning_rate": 9.784143986317084e-05, "loss": 1.8123, "step": 1956 }, { "epoch": 0.10907976144027647, "grad_norm": 0.4942094683647156, "learning_rate": 9.783885106943203e-05, "loss": 1.5919, "step": 1957 }, { "epoch": 0.10913549969343961, "grad_norm": 0.5365981459617615, "learning_rate": 9.783626075852377e-05, "loss": 1.8938, "step": 1958 }, { "epoch": 0.10919123794660275, "grad_norm": 0.4730222523212433, "learning_rate": 9.783366893052822e-05, "loss": 1.6972, "step": 1959 }, { "epoch": 0.1092469761997659, "grad_norm": 0.5012983679771423, "learning_rate": 9.783107558552759e-05, "loss": 1.5967, "step": 1960 }, { "epoch": 0.10930271445292905, "grad_norm": 0.47032400965690613, "learning_rate": 9.782848072360411e-05, "loss": 1.4359, "step": 1961 }, { "epoch": 0.1093584527060922, "grad_norm": 0.6051558256149292, "learning_rate": 9.782588434484008e-05, "loss": 1.8727, "step": 1962 }, { "epoch": 0.10941419095925534, "grad_norm": 0.5087974667549133, "learning_rate": 9.782328644931784e-05, "loss": 1.6863, "step": 1963 }, { "epoch": 0.10946992921241848, "grad_norm": 0.5419572591781616, "learning_rate": 9.782068703711979e-05, "loss": 1.8686, "step": 1964 }, { "epoch": 0.10952566746558162, "grad_norm": 0.5740787386894226, "learning_rate": 9.781808610832837e-05, "loss": 1.8671, "step": 1965 }, { "epoch": 0.10958140571874478, "grad_norm": 0.5375397801399231, "learning_rate": 9.781548366302604e-05, "loss": 1.855, "step": 1966 }, { "epoch": 0.10963714397190792, "grad_norm": 0.5186393857002258, "learning_rate": 9.781287970129536e-05, "loss": 1.8296, "step": 1967 }, { "epoch": 0.10969288222507106, "grad_norm": 0.5058977007865906, "learning_rate": 9.781027422321891e-05, "loss": 1.6181, "step": 1968 }, { "epoch": 0.10974862047823421, "grad_norm": 0.5131574273109436, "learning_rate": 9.78076672288793e-05, "loss": 1.8194, "step": 1969 }, { "epoch": 0.10980435873139736, "grad_norm": 0.5668989419937134, "learning_rate": 9.780505871835924e-05, "loss": 1.857, "step": 1970 }, { "epoch": 0.1098600969845605, "grad_norm": 0.5090118646621704, "learning_rate": 9.780244869174142e-05, "loss": 1.5722, "step": 1971 }, { "epoch": 0.10991583523772365, "grad_norm": 0.5472584962844849, "learning_rate": 9.779983714910865e-05, "loss": 1.7926, "step": 1972 }, { "epoch": 0.10997157349088679, "grad_norm": 0.5904543399810791, "learning_rate": 9.779722409054374e-05, "loss": 1.9054, "step": 1973 }, { "epoch": 0.11002731174404994, "grad_norm": 0.4884478747844696, "learning_rate": 9.779460951612955e-05, "loss": 1.5573, "step": 1974 }, { "epoch": 0.11008304999721309, "grad_norm": 0.6380166411399841, "learning_rate": 9.779199342594902e-05, "loss": 2.0516, "step": 1975 }, { "epoch": 0.11013878825037623, "grad_norm": 0.5148760080337524, "learning_rate": 9.778937582008509e-05, "loss": 1.7119, "step": 1976 }, { "epoch": 0.11019452650353938, "grad_norm": 0.5153675079345703, "learning_rate": 9.77867566986208e-05, "loss": 1.6784, "step": 1977 }, { "epoch": 0.11025026475670252, "grad_norm": 0.5181575417518616, "learning_rate": 9.77841360616392e-05, "loss": 1.4993, "step": 1978 }, { "epoch": 0.11030600300986568, "grad_norm": 0.557270348072052, "learning_rate": 9.778151390922341e-05, "loss": 1.8278, "step": 1979 }, { "epoch": 0.11036174126302882, "grad_norm": 0.570976972579956, "learning_rate": 9.777889024145657e-05, "loss": 1.9032, "step": 1980 }, { "epoch": 0.11041747951619196, "grad_norm": 0.5794844031333923, "learning_rate": 9.777626505842193e-05, "loss": 1.8758, "step": 1981 }, { "epoch": 0.1104732177693551, "grad_norm": 0.5161063075065613, "learning_rate": 9.777363836020268e-05, "loss": 1.8698, "step": 1982 }, { "epoch": 0.11052895602251825, "grad_norm": 0.5546018481254578, "learning_rate": 9.777101014688219e-05, "loss": 1.87, "step": 1983 }, { "epoch": 0.1105846942756814, "grad_norm": 0.5865330696105957, "learning_rate": 9.776838041854377e-05, "loss": 1.9022, "step": 1984 }, { "epoch": 0.11064043252884455, "grad_norm": 0.5667337775230408, "learning_rate": 9.776574917527083e-05, "loss": 2.0603, "step": 1985 }, { "epoch": 0.11069617078200769, "grad_norm": 0.5092570185661316, "learning_rate": 9.776311641714683e-05, "loss": 1.7887, "step": 1986 }, { "epoch": 0.11075190903517083, "grad_norm": 0.5329071879386902, "learning_rate": 9.776048214425525e-05, "loss": 1.7294, "step": 1987 }, { "epoch": 0.11080764728833398, "grad_norm": 0.5048893690109253, "learning_rate": 9.775784635667964e-05, "loss": 1.7357, "step": 1988 }, { "epoch": 0.11086338554149713, "grad_norm": 0.4852405786514282, "learning_rate": 9.77552090545036e-05, "loss": 1.7027, "step": 1989 }, { "epoch": 0.11091912379466028, "grad_norm": 0.5363536477088928, "learning_rate": 9.775257023781074e-05, "loss": 1.9082, "step": 1990 }, { "epoch": 0.11097486204782342, "grad_norm": 0.5514358878135681, "learning_rate": 9.774992990668479e-05, "loss": 1.8572, "step": 1991 }, { "epoch": 0.11103060030098656, "grad_norm": 0.5773457884788513, "learning_rate": 9.774728806120945e-05, "loss": 1.9287, "step": 1992 }, { "epoch": 0.11108633855414972, "grad_norm": 0.5018163323402405, "learning_rate": 9.774464470146851e-05, "loss": 1.6721, "step": 1993 }, { "epoch": 0.11114207680731286, "grad_norm": 0.5004386305809021, "learning_rate": 9.774199982754584e-05, "loss": 1.6999, "step": 1994 }, { "epoch": 0.111197815060476, "grad_norm": 0.5078005194664001, "learning_rate": 9.773935343952527e-05, "loss": 1.6968, "step": 1995 }, { "epoch": 0.11125355331363915, "grad_norm": 0.5355806946754456, "learning_rate": 9.773670553749075e-05, "loss": 1.8122, "step": 1996 }, { "epoch": 0.11130929156680229, "grad_norm": 0.5051989555358887, "learning_rate": 9.773405612152626e-05, "loss": 1.6712, "step": 1997 }, { "epoch": 0.11136502981996545, "grad_norm": 0.5549625754356384, "learning_rate": 9.773140519171582e-05, "loss": 1.8872, "step": 1998 }, { "epoch": 0.11142076807312859, "grad_norm": 0.5879496335983276, "learning_rate": 9.77287527481435e-05, "loss": 1.7659, "step": 1999 }, { "epoch": 0.11147650632629173, "grad_norm": 0.6350980401039124, "learning_rate": 9.772609879089341e-05, "loss": 1.9805, "step": 2000 }, { "epoch": 0.11153224457945488, "grad_norm": 0.5255335569381714, "learning_rate": 9.772344332004975e-05, "loss": 1.7215, "step": 2001 }, { "epoch": 0.11158798283261803, "grad_norm": 0.5538710355758667, "learning_rate": 9.77207863356967e-05, "loss": 1.8071, "step": 2002 }, { "epoch": 0.11164372108578118, "grad_norm": 0.5447118878364563, "learning_rate": 9.771812783791854e-05, "loss": 1.6401, "step": 2003 }, { "epoch": 0.11169945933894432, "grad_norm": 0.5420034527778625, "learning_rate": 9.771546782679959e-05, "loss": 1.7649, "step": 2004 }, { "epoch": 0.11175519759210746, "grad_norm": 0.5717622637748718, "learning_rate": 9.771280630242419e-05, "loss": 1.9245, "step": 2005 }, { "epoch": 0.1118109358452706, "grad_norm": 0.533752977848053, "learning_rate": 9.771014326487675e-05, "loss": 1.6562, "step": 2006 }, { "epoch": 0.11186667409843376, "grad_norm": 0.5668651461601257, "learning_rate": 9.770747871424175e-05, "loss": 1.8504, "step": 2007 }, { "epoch": 0.1119224123515969, "grad_norm": 0.5300382375717163, "learning_rate": 9.770481265060368e-05, "loss": 1.5858, "step": 2008 }, { "epoch": 0.11197815060476005, "grad_norm": 0.5205538272857666, "learning_rate": 9.770214507404709e-05, "loss": 1.8421, "step": 2009 }, { "epoch": 0.11203388885792319, "grad_norm": 0.5641254186630249, "learning_rate": 9.769947598465657e-05, "loss": 1.7521, "step": 2010 }, { "epoch": 0.11208962711108633, "grad_norm": 0.5881509184837341, "learning_rate": 9.76968053825168e-05, "loss": 1.8359, "step": 2011 }, { "epoch": 0.11214536536424949, "grad_norm": 0.5264688730239868, "learning_rate": 9.769413326771243e-05, "loss": 1.7792, "step": 2012 }, { "epoch": 0.11220110361741263, "grad_norm": 0.5596029758453369, "learning_rate": 9.769145964032824e-05, "loss": 1.8502, "step": 2013 }, { "epoch": 0.11225684187057577, "grad_norm": 0.5555474758148193, "learning_rate": 9.768878450044902e-05, "loss": 1.9158, "step": 2014 }, { "epoch": 0.11231258012373892, "grad_norm": 0.5508490800857544, "learning_rate": 9.768610784815959e-05, "loss": 1.5545, "step": 2015 }, { "epoch": 0.11236831837690207, "grad_norm": 0.5072826743125916, "learning_rate": 9.768342968354484e-05, "loss": 1.6679, "step": 2016 }, { "epoch": 0.11242405663006522, "grad_norm": 0.4995681941509247, "learning_rate": 9.768075000668974e-05, "loss": 1.7114, "step": 2017 }, { "epoch": 0.11247979488322836, "grad_norm": 0.5590416789054871, "learning_rate": 9.767806881767923e-05, "loss": 1.8553, "step": 2018 }, { "epoch": 0.1125355331363915, "grad_norm": 0.542676568031311, "learning_rate": 9.767538611659837e-05, "loss": 1.9799, "step": 2019 }, { "epoch": 0.11259127138955465, "grad_norm": 0.6015095710754395, "learning_rate": 9.767270190353221e-05, "loss": 2.0631, "step": 2020 }, { "epoch": 0.1126470096427178, "grad_norm": 0.5182809829711914, "learning_rate": 9.767001617856591e-05, "loss": 1.8081, "step": 2021 }, { "epoch": 0.11270274789588094, "grad_norm": 0.539851725101471, "learning_rate": 9.766732894178463e-05, "loss": 1.6224, "step": 2022 }, { "epoch": 0.11275848614904409, "grad_norm": 0.5738646388053894, "learning_rate": 9.766464019327359e-05, "loss": 1.8425, "step": 2023 }, { "epoch": 0.11281422440220723, "grad_norm": 0.5035516619682312, "learning_rate": 9.766194993311809e-05, "loss": 1.8101, "step": 2024 }, { "epoch": 0.11286996265537039, "grad_norm": 0.4765785038471222, "learning_rate": 9.76592581614034e-05, "loss": 1.7461, "step": 2025 }, { "epoch": 0.11292570090853353, "grad_norm": 0.5692024230957031, "learning_rate": 9.765656487821492e-05, "loss": 1.9905, "step": 2026 }, { "epoch": 0.11298143916169667, "grad_norm": 0.5034509301185608, "learning_rate": 9.765387008363807e-05, "loss": 1.7689, "step": 2027 }, { "epoch": 0.11303717741485982, "grad_norm": 0.5591553449630737, "learning_rate": 9.76511737777583e-05, "loss": 1.7994, "step": 2028 }, { "epoch": 0.11309291566802296, "grad_norm": 0.533530592918396, "learning_rate": 9.764847596066111e-05, "loss": 1.5192, "step": 2029 }, { "epoch": 0.11314865392118612, "grad_norm": 0.5049347281455994, "learning_rate": 9.764577663243209e-05, "loss": 1.5906, "step": 2030 }, { "epoch": 0.11320439217434926, "grad_norm": 0.4710226058959961, "learning_rate": 9.764307579315681e-05, "loss": 1.4503, "step": 2031 }, { "epoch": 0.1132601304275124, "grad_norm": 0.5490729212760925, "learning_rate": 9.764037344292096e-05, "loss": 1.7865, "step": 2032 }, { "epoch": 0.11331586868067554, "grad_norm": 0.5714886784553528, "learning_rate": 9.763766958181022e-05, "loss": 1.6803, "step": 2033 }, { "epoch": 0.11337160693383869, "grad_norm": 0.5637816786766052, "learning_rate": 9.763496420991037e-05, "loss": 1.902, "step": 2034 }, { "epoch": 0.11342734518700184, "grad_norm": 0.5324851870536804, "learning_rate": 9.763225732730716e-05, "loss": 1.7774, "step": 2035 }, { "epoch": 0.11348308344016499, "grad_norm": 0.542209267616272, "learning_rate": 9.762954893408646e-05, "loss": 1.7369, "step": 2036 }, { "epoch": 0.11353882169332813, "grad_norm": 0.5353888273239136, "learning_rate": 9.762683903033419e-05, "loss": 1.914, "step": 2037 }, { "epoch": 0.11359455994649127, "grad_norm": 0.5152493119239807, "learning_rate": 9.762412761613624e-05, "loss": 1.8155, "step": 2038 }, { "epoch": 0.11365029819965443, "grad_norm": 0.4723453223705292, "learning_rate": 9.762141469157865e-05, "loss": 1.6183, "step": 2039 }, { "epoch": 0.11370603645281757, "grad_norm": 0.5671008229255676, "learning_rate": 9.761870025674743e-05, "loss": 1.887, "step": 2040 }, { "epoch": 0.11376177470598071, "grad_norm": 0.5240710377693176, "learning_rate": 9.761598431172868e-05, "loss": 1.7928, "step": 2041 }, { "epoch": 0.11381751295914386, "grad_norm": 0.4852540194988251, "learning_rate": 9.761326685660852e-05, "loss": 1.6132, "step": 2042 }, { "epoch": 0.113873251212307, "grad_norm": 0.46512627601623535, "learning_rate": 9.761054789147315e-05, "loss": 1.4053, "step": 2043 }, { "epoch": 0.11392898946547016, "grad_norm": 0.5127692222595215, "learning_rate": 9.760782741640879e-05, "loss": 1.649, "step": 2044 }, { "epoch": 0.1139847277186333, "grad_norm": 0.5368222594261169, "learning_rate": 9.76051054315017e-05, "loss": 1.7286, "step": 2045 }, { "epoch": 0.11404046597179644, "grad_norm": 0.5699864625930786, "learning_rate": 9.760238193683824e-05, "loss": 1.7911, "step": 2046 }, { "epoch": 0.11409620422495959, "grad_norm": 0.59310382604599, "learning_rate": 9.759965693250477e-05, "loss": 1.7731, "step": 2047 }, { "epoch": 0.11415194247812274, "grad_norm": 0.5524492859840393, "learning_rate": 9.75969304185877e-05, "loss": 1.7917, "step": 2048 }, { "epoch": 0.11420768073128588, "grad_norm": 0.529346227645874, "learning_rate": 9.75942023951735e-05, "loss": 1.7298, "step": 2049 }, { "epoch": 0.11426341898444903, "grad_norm": 0.5188475847244263, "learning_rate": 9.75914728623487e-05, "loss": 1.8422, "step": 2050 }, { "epoch": 0.11431915723761217, "grad_norm": 0.5141621232032776, "learning_rate": 9.758874182019986e-05, "loss": 1.7194, "step": 2051 }, { "epoch": 0.11437489549077531, "grad_norm": 0.5103389024734497, "learning_rate": 9.758600926881358e-05, "loss": 1.782, "step": 2052 }, { "epoch": 0.11443063374393847, "grad_norm": 0.5371511578559875, "learning_rate": 9.758327520827654e-05, "loss": 1.8925, "step": 2053 }, { "epoch": 0.11448637199710161, "grad_norm": 0.528293788433075, "learning_rate": 9.758053963867544e-05, "loss": 1.5632, "step": 2054 }, { "epoch": 0.11454211025026476, "grad_norm": 0.5670381784439087, "learning_rate": 9.757780256009704e-05, "loss": 2.0612, "step": 2055 }, { "epoch": 0.1145978485034279, "grad_norm": 0.4997304677963257, "learning_rate": 9.757506397262814e-05, "loss": 1.4963, "step": 2056 }, { "epoch": 0.11465358675659104, "grad_norm": 0.5154783129692078, "learning_rate": 9.757232387635559e-05, "loss": 1.7024, "step": 2057 }, { "epoch": 0.1147093250097542, "grad_norm": 0.5076404213905334, "learning_rate": 9.75695822713663e-05, "loss": 1.7356, "step": 2058 }, { "epoch": 0.11476506326291734, "grad_norm": 0.5490261912345886, "learning_rate": 9.75668391577472e-05, "loss": 1.9454, "step": 2059 }, { "epoch": 0.11482080151608048, "grad_norm": 0.49244236946105957, "learning_rate": 9.756409453558531e-05, "loss": 1.7741, "step": 2060 }, { "epoch": 0.11487653976924363, "grad_norm": 0.5007554292678833, "learning_rate": 9.756134840496763e-05, "loss": 1.6877, "step": 2061 }, { "epoch": 0.11493227802240678, "grad_norm": 0.5688347816467285, "learning_rate": 9.75586007659813e-05, "loss": 1.8947, "step": 2062 }, { "epoch": 0.11498801627556993, "grad_norm": 0.49076688289642334, "learning_rate": 9.755585161871344e-05, "loss": 1.632, "step": 2063 }, { "epoch": 0.11504375452873307, "grad_norm": 0.5263219475746155, "learning_rate": 9.755310096325123e-05, "loss": 1.8176, "step": 2064 }, { "epoch": 0.11509949278189621, "grad_norm": 0.5379471778869629, "learning_rate": 9.755034879968193e-05, "loss": 1.9844, "step": 2065 }, { "epoch": 0.11515523103505935, "grad_norm": 0.6128638982772827, "learning_rate": 9.754759512809277e-05, "loss": 2.0891, "step": 2066 }, { "epoch": 0.11521096928822251, "grad_norm": 0.513877272605896, "learning_rate": 9.754483994857115e-05, "loss": 1.7906, "step": 2067 }, { "epoch": 0.11526670754138565, "grad_norm": 0.5699423551559448, "learning_rate": 9.75420832612044e-05, "loss": 1.9245, "step": 2068 }, { "epoch": 0.1153224457945488, "grad_norm": 0.49974846839904785, "learning_rate": 9.753932506607995e-05, "loss": 1.5529, "step": 2069 }, { "epoch": 0.11537818404771194, "grad_norm": 0.5551686882972717, "learning_rate": 9.753656536328528e-05, "loss": 1.7138, "step": 2070 }, { "epoch": 0.1154339223008751, "grad_norm": 0.5302468538284302, "learning_rate": 9.753380415290792e-05, "loss": 1.7991, "step": 2071 }, { "epoch": 0.11548966055403824, "grad_norm": 0.5461943745613098, "learning_rate": 9.753104143503544e-05, "loss": 1.6249, "step": 2072 }, { "epoch": 0.11554539880720138, "grad_norm": 0.5242646336555481, "learning_rate": 9.752827720975544e-05, "loss": 1.7194, "step": 2073 }, { "epoch": 0.11560113706036453, "grad_norm": 0.5647328495979309, "learning_rate": 9.75255114771556e-05, "loss": 1.6221, "step": 2074 }, { "epoch": 0.11565687531352767, "grad_norm": 0.5108300447463989, "learning_rate": 9.752274423732364e-05, "loss": 1.5454, "step": 2075 }, { "epoch": 0.11571261356669083, "grad_norm": 0.5370137691497803, "learning_rate": 9.75199754903473e-05, "loss": 1.8162, "step": 2076 }, { "epoch": 0.11576835181985397, "grad_norm": 0.5308608412742615, "learning_rate": 9.75172052363144e-05, "loss": 1.8913, "step": 2077 }, { "epoch": 0.11582409007301711, "grad_norm": 0.5060725808143616, "learning_rate": 9.751443347531279e-05, "loss": 1.6392, "step": 2078 }, { "epoch": 0.11587982832618025, "grad_norm": 0.5402329564094543, "learning_rate": 9.751166020743037e-05, "loss": 1.6481, "step": 2079 }, { "epoch": 0.1159355665793434, "grad_norm": 0.5728126168251038, "learning_rate": 9.750888543275511e-05, "loss": 1.7507, "step": 2080 }, { "epoch": 0.11599130483250655, "grad_norm": 0.5055838227272034, "learning_rate": 9.750610915137502e-05, "loss": 1.7667, "step": 2081 }, { "epoch": 0.1160470430856697, "grad_norm": 0.5178690552711487, "learning_rate": 9.750333136337811e-05, "loss": 1.7303, "step": 2082 }, { "epoch": 0.11610278133883284, "grad_norm": 0.5922085642814636, "learning_rate": 9.750055206885249e-05, "loss": 1.9936, "step": 2083 }, { "epoch": 0.11615851959199598, "grad_norm": 0.5285540223121643, "learning_rate": 9.74977712678863e-05, "loss": 1.8642, "step": 2084 }, { "epoch": 0.11621425784515914, "grad_norm": 0.5517610907554626, "learning_rate": 9.749498896056775e-05, "loss": 1.8, "step": 2085 }, { "epoch": 0.11626999609832228, "grad_norm": 0.519136905670166, "learning_rate": 9.749220514698505e-05, "loss": 1.8553, "step": 2086 }, { "epoch": 0.11632573435148542, "grad_norm": 0.47392770648002625, "learning_rate": 9.748941982722652e-05, "loss": 1.5635, "step": 2087 }, { "epoch": 0.11638147260464857, "grad_norm": 0.5580193400382996, "learning_rate": 9.748663300138046e-05, "loss": 2.0887, "step": 2088 }, { "epoch": 0.11643721085781171, "grad_norm": 0.5110911726951599, "learning_rate": 9.748384466953529e-05, "loss": 1.7254, "step": 2089 }, { "epoch": 0.11649294911097487, "grad_norm": 0.5411677360534668, "learning_rate": 9.748105483177939e-05, "loss": 2.0895, "step": 2090 }, { "epoch": 0.11654868736413801, "grad_norm": 0.5149423480033875, "learning_rate": 9.747826348820129e-05, "loss": 1.6339, "step": 2091 }, { "epoch": 0.11660442561730115, "grad_norm": 0.48806729912757874, "learning_rate": 9.747547063888947e-05, "loss": 1.8714, "step": 2092 }, { "epoch": 0.1166601638704643, "grad_norm": 0.5147302746772766, "learning_rate": 9.747267628393252e-05, "loss": 1.8269, "step": 2093 }, { "epoch": 0.11671590212362745, "grad_norm": 0.512217104434967, "learning_rate": 9.746988042341906e-05, "loss": 1.7604, "step": 2094 }, { "epoch": 0.1167716403767906, "grad_norm": 0.66917484998703, "learning_rate": 9.746708305743778e-05, "loss": 2.2348, "step": 2095 }, { "epoch": 0.11682737862995374, "grad_norm": 0.5376080870628357, "learning_rate": 9.746428418607737e-05, "loss": 1.811, "step": 2096 }, { "epoch": 0.11688311688311688, "grad_norm": 0.5490595102310181, "learning_rate": 9.746148380942661e-05, "loss": 1.7822, "step": 2097 }, { "epoch": 0.11693885513628002, "grad_norm": 0.5195513367652893, "learning_rate": 9.745868192757429e-05, "loss": 1.815, "step": 2098 }, { "epoch": 0.11699459338944318, "grad_norm": 0.4978055953979492, "learning_rate": 9.745587854060929e-05, "loss": 1.6799, "step": 2099 }, { "epoch": 0.11705033164260632, "grad_norm": 0.47539737820625305, "learning_rate": 9.74530736486205e-05, "loss": 1.3444, "step": 2100 }, { "epoch": 0.11710606989576947, "grad_norm": 0.49834421277046204, "learning_rate": 9.74502672516969e-05, "loss": 1.6343, "step": 2101 }, { "epoch": 0.11716180814893261, "grad_norm": 0.5414234399795532, "learning_rate": 9.744745934992747e-05, "loss": 1.8732, "step": 2102 }, { "epoch": 0.11721754640209577, "grad_norm": 0.55171799659729, "learning_rate": 9.744464994340126e-05, "loss": 1.823, "step": 2103 }, { "epoch": 0.11727328465525891, "grad_norm": 0.545732319355011, "learning_rate": 9.744183903220738e-05, "loss": 1.6152, "step": 2104 }, { "epoch": 0.11732902290842205, "grad_norm": 0.5116435885429382, "learning_rate": 9.743902661643498e-05, "loss": 1.8159, "step": 2105 }, { "epoch": 0.1173847611615852, "grad_norm": 0.5736915469169617, "learning_rate": 9.743621269617324e-05, "loss": 2.0891, "step": 2106 }, { "epoch": 0.11744049941474834, "grad_norm": 0.5401880741119385, "learning_rate": 9.74333972715114e-05, "loss": 1.6851, "step": 2107 }, { "epoch": 0.1174962376679115, "grad_norm": 0.4980708658695221, "learning_rate": 9.743058034253876e-05, "loss": 1.7487, "step": 2108 }, { "epoch": 0.11755197592107464, "grad_norm": 0.5513383150100708, "learning_rate": 9.742776190934464e-05, "loss": 1.7077, "step": 2109 }, { "epoch": 0.11760771417423778, "grad_norm": 0.48612821102142334, "learning_rate": 9.742494197201845e-05, "loss": 1.7193, "step": 2110 }, { "epoch": 0.11766345242740092, "grad_norm": 0.5319970846176147, "learning_rate": 9.742212053064959e-05, "loss": 1.8341, "step": 2111 }, { "epoch": 0.11771919068056406, "grad_norm": 0.5188704133033752, "learning_rate": 9.741929758532758e-05, "loss": 1.7452, "step": 2112 }, { "epoch": 0.11777492893372722, "grad_norm": 0.569303035736084, "learning_rate": 9.741647313614191e-05, "loss": 1.7242, "step": 2113 }, { "epoch": 0.11783066718689036, "grad_norm": 0.5230869650840759, "learning_rate": 9.741364718318216e-05, "loss": 1.7484, "step": 2114 }, { "epoch": 0.11788640544005351, "grad_norm": 0.5458916425704956, "learning_rate": 9.741081972653798e-05, "loss": 1.8975, "step": 2115 }, { "epoch": 0.11794214369321665, "grad_norm": 0.5454350113868713, "learning_rate": 9.740799076629902e-05, "loss": 1.7848, "step": 2116 }, { "epoch": 0.1179978819463798, "grad_norm": 0.5229981541633606, "learning_rate": 9.7405160302555e-05, "loss": 1.7087, "step": 2117 }, { "epoch": 0.11805362019954295, "grad_norm": 0.5540334582328796, "learning_rate": 9.740232833539567e-05, "loss": 1.712, "step": 2118 }, { "epoch": 0.11810935845270609, "grad_norm": 0.5371966361999512, "learning_rate": 9.739949486491088e-05, "loss": 1.6682, "step": 2119 }, { "epoch": 0.11816509670586924, "grad_norm": 0.5578680038452148, "learning_rate": 9.739665989119047e-05, "loss": 1.7035, "step": 2120 }, { "epoch": 0.11822083495903238, "grad_norm": 0.49404215812683105, "learning_rate": 9.739382341432434e-05, "loss": 1.6535, "step": 2121 }, { "epoch": 0.11827657321219553, "grad_norm": 0.5198866724967957, "learning_rate": 9.739098543440246e-05, "loss": 1.9483, "step": 2122 }, { "epoch": 0.11833231146535868, "grad_norm": 0.5561308860778809, "learning_rate": 9.738814595151481e-05, "loss": 1.6287, "step": 2123 }, { "epoch": 0.11838804971852182, "grad_norm": 0.5929575562477112, "learning_rate": 9.73853049657515e-05, "loss": 1.8991, "step": 2124 }, { "epoch": 0.11844378797168496, "grad_norm": 0.5198292136192322, "learning_rate": 9.738246247720257e-05, "loss": 1.7004, "step": 2125 }, { "epoch": 0.11849952622484812, "grad_norm": 0.4800911247730255, "learning_rate": 9.73796184859582e-05, "loss": 1.8126, "step": 2126 }, { "epoch": 0.11855526447801126, "grad_norm": 0.5122108459472656, "learning_rate": 9.737677299210857e-05, "loss": 1.6761, "step": 2127 }, { "epoch": 0.1186110027311744, "grad_norm": 0.5015464425086975, "learning_rate": 9.737392599574391e-05, "loss": 1.6405, "step": 2128 }, { "epoch": 0.11866674098433755, "grad_norm": 0.560658872127533, "learning_rate": 9.737107749695456e-05, "loss": 1.8458, "step": 2129 }, { "epoch": 0.11872247923750069, "grad_norm": 0.5312667489051819, "learning_rate": 9.73682274958308e-05, "loss": 1.9419, "step": 2130 }, { "epoch": 0.11877821749066385, "grad_norm": 0.5537664294242859, "learning_rate": 9.736537599246305e-05, "loss": 2.0495, "step": 2131 }, { "epoch": 0.11883395574382699, "grad_norm": 0.5166563391685486, "learning_rate": 9.736252298694172e-05, "loss": 1.7997, "step": 2132 }, { "epoch": 0.11888969399699013, "grad_norm": 0.5567119121551514, "learning_rate": 9.735966847935732e-05, "loss": 2.0086, "step": 2133 }, { "epoch": 0.11894543225015328, "grad_norm": 0.5614973306655884, "learning_rate": 9.735681246980035e-05, "loss": 1.8669, "step": 2134 }, { "epoch": 0.11900117050331642, "grad_norm": 0.4755729138851166, "learning_rate": 9.73539549583614e-05, "loss": 1.4678, "step": 2135 }, { "epoch": 0.11905690875647958, "grad_norm": 0.5338446497917175, "learning_rate": 9.73510959451311e-05, "loss": 1.758, "step": 2136 }, { "epoch": 0.11911264700964272, "grad_norm": 0.5301800966262817, "learning_rate": 9.734823543020009e-05, "loss": 1.6377, "step": 2137 }, { "epoch": 0.11916838526280586, "grad_norm": 0.5584478378295898, "learning_rate": 9.734537341365914e-05, "loss": 1.8973, "step": 2138 }, { "epoch": 0.119224123515969, "grad_norm": 0.5499609112739563, "learning_rate": 9.734250989559896e-05, "loss": 1.8316, "step": 2139 }, { "epoch": 0.11927986176913216, "grad_norm": 0.5567249655723572, "learning_rate": 9.733964487611042e-05, "loss": 1.9231, "step": 2140 }, { "epoch": 0.1193356000222953, "grad_norm": 0.5121795535087585, "learning_rate": 9.733677835528434e-05, "loss": 1.7316, "step": 2141 }, { "epoch": 0.11939133827545845, "grad_norm": 0.5235653519630432, "learning_rate": 9.733391033321164e-05, "loss": 1.7328, "step": 2142 }, { "epoch": 0.11944707652862159, "grad_norm": 0.5482314229011536, "learning_rate": 9.733104080998329e-05, "loss": 1.9832, "step": 2143 }, { "epoch": 0.11950281478178473, "grad_norm": 0.4945628345012665, "learning_rate": 9.732816978569028e-05, "loss": 1.6102, "step": 2144 }, { "epoch": 0.11955855303494789, "grad_norm": 0.532642126083374, "learning_rate": 9.732529726042365e-05, "loss": 1.6543, "step": 2145 }, { "epoch": 0.11961429128811103, "grad_norm": 0.5531574487686157, "learning_rate": 9.732242323427455e-05, "loss": 1.8017, "step": 2146 }, { "epoch": 0.11967002954127418, "grad_norm": 0.595876932144165, "learning_rate": 9.731954770733407e-05, "loss": 2.0041, "step": 2147 }, { "epoch": 0.11972576779443732, "grad_norm": 0.5025404095649719, "learning_rate": 9.731667067969344e-05, "loss": 1.716, "step": 2148 }, { "epoch": 0.11978150604760048, "grad_norm": 0.5070561766624451, "learning_rate": 9.731379215144388e-05, "loss": 1.8201, "step": 2149 }, { "epoch": 0.11983724430076362, "grad_norm": 0.5182836651802063, "learning_rate": 9.73109121226767e-05, "loss": 1.51, "step": 2150 }, { "epoch": 0.11989298255392676, "grad_norm": 0.5657908320426941, "learning_rate": 9.730803059348323e-05, "loss": 2.0817, "step": 2151 }, { "epoch": 0.1199487208070899, "grad_norm": 0.5556692481040955, "learning_rate": 9.730514756395485e-05, "loss": 1.854, "step": 2152 }, { "epoch": 0.12000445906025305, "grad_norm": 0.4503386616706848, "learning_rate": 9.7302263034183e-05, "loss": 1.4719, "step": 2153 }, { "epoch": 0.1200601973134162, "grad_norm": 0.5425733327865601, "learning_rate": 9.729937700425916e-05, "loss": 1.8686, "step": 2154 }, { "epoch": 0.12011593556657935, "grad_norm": 0.5144285559654236, "learning_rate": 9.729648947427484e-05, "loss": 1.8232, "step": 2155 }, { "epoch": 0.12017167381974249, "grad_norm": 0.5346119999885559, "learning_rate": 9.729360044432166e-05, "loss": 1.7735, "step": 2156 }, { "epoch": 0.12022741207290563, "grad_norm": 0.5558546185493469, "learning_rate": 9.729070991449119e-05, "loss": 1.9485, "step": 2157 }, { "epoch": 0.12028315032606877, "grad_norm": 0.495919406414032, "learning_rate": 9.728781788487513e-05, "loss": 1.6713, "step": 2158 }, { "epoch": 0.12033888857923193, "grad_norm": 0.5348759889602661, "learning_rate": 9.72849243555652e-05, "loss": 1.6913, "step": 2159 }, { "epoch": 0.12039462683239507, "grad_norm": 0.5228710174560547, "learning_rate": 9.728202932665316e-05, "loss": 1.6557, "step": 2160 }, { "epoch": 0.12045036508555822, "grad_norm": 0.49766623973846436, "learning_rate": 9.727913279823081e-05, "loss": 1.6087, "step": 2161 }, { "epoch": 0.12050610333872136, "grad_norm": 0.5042500495910645, "learning_rate": 9.727623477039005e-05, "loss": 1.8017, "step": 2162 }, { "epoch": 0.12056184159188452, "grad_norm": 0.5221708416938782, "learning_rate": 9.727333524322274e-05, "loss": 1.7577, "step": 2163 }, { "epoch": 0.12061757984504766, "grad_norm": 0.5310743451118469, "learning_rate": 9.727043421682087e-05, "loss": 1.7025, "step": 2164 }, { "epoch": 0.1206733180982108, "grad_norm": 0.5771050453186035, "learning_rate": 9.726753169127643e-05, "loss": 1.8185, "step": 2165 }, { "epoch": 0.12072905635137395, "grad_norm": 0.4827874004840851, "learning_rate": 9.726462766668147e-05, "loss": 1.5869, "step": 2166 }, { "epoch": 0.12078479460453709, "grad_norm": 0.5001873970031738, "learning_rate": 9.72617221431281e-05, "loss": 1.6207, "step": 2167 }, { "epoch": 0.12084053285770024, "grad_norm": 0.47895923256874084, "learning_rate": 9.725881512070845e-05, "loss": 1.5611, "step": 2168 }, { "epoch": 0.12089627111086339, "grad_norm": 0.5227773785591125, "learning_rate": 9.725590659951473e-05, "loss": 1.7524, "step": 2169 }, { "epoch": 0.12095200936402653, "grad_norm": 0.5513851046562195, "learning_rate": 9.725299657963916e-05, "loss": 1.9093, "step": 2170 }, { "epoch": 0.12100774761718967, "grad_norm": 0.5206924080848694, "learning_rate": 9.725008506117405e-05, "loss": 1.6196, "step": 2171 }, { "epoch": 0.12106348587035283, "grad_norm": 0.5124804377555847, "learning_rate": 9.724717204421175e-05, "loss": 1.5592, "step": 2172 }, { "epoch": 0.12111922412351597, "grad_norm": 0.49579185247421265, "learning_rate": 9.724425752884458e-05, "loss": 1.7796, "step": 2173 }, { "epoch": 0.12117496237667912, "grad_norm": 0.4806743562221527, "learning_rate": 9.724134151516504e-05, "loss": 1.5684, "step": 2174 }, { "epoch": 0.12123070062984226, "grad_norm": 0.5735479593276978, "learning_rate": 9.72384240032656e-05, "loss": 1.9183, "step": 2175 }, { "epoch": 0.1212864388830054, "grad_norm": 0.49125027656555176, "learning_rate": 9.723550499323874e-05, "loss": 1.5609, "step": 2176 }, { "epoch": 0.12134217713616856, "grad_norm": 0.5535476207733154, "learning_rate": 9.723258448517707e-05, "loss": 1.8593, "step": 2177 }, { "epoch": 0.1213979153893317, "grad_norm": 0.5923840403556824, "learning_rate": 9.722966247917322e-05, "loss": 1.8673, "step": 2178 }, { "epoch": 0.12145365364249484, "grad_norm": 0.5120698809623718, "learning_rate": 9.722673897531983e-05, "loss": 1.6219, "step": 2179 }, { "epoch": 0.12150939189565799, "grad_norm": 0.5636369585990906, "learning_rate": 9.722381397370963e-05, "loss": 1.9298, "step": 2180 }, { "epoch": 0.12156513014882113, "grad_norm": 0.5421077609062195, "learning_rate": 9.722088747443539e-05, "loss": 1.4028, "step": 2181 }, { "epoch": 0.12162086840198429, "grad_norm": 0.5058643817901611, "learning_rate": 9.721795947758991e-05, "loss": 1.6988, "step": 2182 }, { "epoch": 0.12167660665514743, "grad_norm": 0.5012438297271729, "learning_rate": 9.721502998326607e-05, "loss": 1.6624, "step": 2183 }, { "epoch": 0.12173234490831057, "grad_norm": 0.47187769412994385, "learning_rate": 9.721209899155675e-05, "loss": 1.5275, "step": 2184 }, { "epoch": 0.12178808316147371, "grad_norm": 0.525303065776825, "learning_rate": 9.720916650255492e-05, "loss": 1.7458, "step": 2185 }, { "epoch": 0.12184382141463687, "grad_norm": 0.586681604385376, "learning_rate": 9.720623251635357e-05, "loss": 1.7205, "step": 2186 }, { "epoch": 0.12189955966780001, "grad_norm": 0.5550994873046875, "learning_rate": 9.720329703304577e-05, "loss": 1.6508, "step": 2187 }, { "epoch": 0.12195529792096316, "grad_norm": 0.5518259406089783, "learning_rate": 9.720036005272459e-05, "loss": 1.7847, "step": 2188 }, { "epoch": 0.1220110361741263, "grad_norm": 0.4833231270313263, "learning_rate": 9.719742157548319e-05, "loss": 1.578, "step": 2189 }, { "epoch": 0.12206677442728944, "grad_norm": 0.5002262592315674, "learning_rate": 9.719448160141476e-05, "loss": 1.7526, "step": 2190 }, { "epoch": 0.1221225126804526, "grad_norm": 0.4701862335205078, "learning_rate": 9.719154013061253e-05, "loss": 1.369, "step": 2191 }, { "epoch": 0.12217825093361574, "grad_norm": 0.5255539417266846, "learning_rate": 9.71885971631698e-05, "loss": 1.9266, "step": 2192 }, { "epoch": 0.12223398918677889, "grad_norm": 0.5181805491447449, "learning_rate": 9.71856526991799e-05, "loss": 1.8049, "step": 2193 }, { "epoch": 0.12228972743994203, "grad_norm": 0.5119277834892273, "learning_rate": 9.71827067387362e-05, "loss": 1.6141, "step": 2194 }, { "epoch": 0.12234546569310518, "grad_norm": 0.46822264790534973, "learning_rate": 9.717975928193214e-05, "loss": 1.4462, "step": 2195 }, { "epoch": 0.12240120394626833, "grad_norm": 0.5520098209381104, "learning_rate": 9.717681032886119e-05, "loss": 1.7872, "step": 2196 }, { "epoch": 0.12245694219943147, "grad_norm": 0.5204572677612305, "learning_rate": 9.717385987961686e-05, "loss": 1.7539, "step": 2197 }, { "epoch": 0.12251268045259461, "grad_norm": 0.5343250036239624, "learning_rate": 9.717090793429276e-05, "loss": 1.8575, "step": 2198 }, { "epoch": 0.12256841870575776, "grad_norm": 0.521108865737915, "learning_rate": 9.716795449298248e-05, "loss": 1.9104, "step": 2199 }, { "epoch": 0.12262415695892091, "grad_norm": 0.49352675676345825, "learning_rate": 9.71649995557797e-05, "loss": 1.6201, "step": 2200 }, { "epoch": 0.12267989521208406, "grad_norm": 0.5716384649276733, "learning_rate": 9.716204312277812e-05, "loss": 1.928, "step": 2201 }, { "epoch": 0.1227356334652472, "grad_norm": 0.5332071781158447, "learning_rate": 9.715908519407149e-05, "loss": 1.6348, "step": 2202 }, { "epoch": 0.12279137171841034, "grad_norm": 0.5008523464202881, "learning_rate": 9.715612576975366e-05, "loss": 1.8211, "step": 2203 }, { "epoch": 0.12284710997157348, "grad_norm": 0.5112088322639465, "learning_rate": 9.715316484991845e-05, "loss": 1.8334, "step": 2204 }, { "epoch": 0.12290284822473664, "grad_norm": 0.5519534349441528, "learning_rate": 9.715020243465976e-05, "loss": 1.8001, "step": 2205 }, { "epoch": 0.12295858647789978, "grad_norm": 0.46493321657180786, "learning_rate": 9.714723852407157e-05, "loss": 1.4173, "step": 2206 }, { "epoch": 0.12301432473106293, "grad_norm": 0.5702951550483704, "learning_rate": 9.714427311824786e-05, "loss": 1.7186, "step": 2207 }, { "epoch": 0.12307006298422607, "grad_norm": 0.5255847573280334, "learning_rate": 9.714130621728266e-05, "loss": 1.6884, "step": 2208 }, { "epoch": 0.12312580123738923, "grad_norm": 0.581146776676178, "learning_rate": 9.713833782127008e-05, "loss": 1.8707, "step": 2209 }, { "epoch": 0.12318153949055237, "grad_norm": 0.5044531226158142, "learning_rate": 9.713536793030429e-05, "loss": 1.555, "step": 2210 }, { "epoch": 0.12323727774371551, "grad_norm": 0.543787956237793, "learning_rate": 9.713239654447943e-05, "loss": 1.8188, "step": 2211 }, { "epoch": 0.12329301599687865, "grad_norm": 0.6438772678375244, "learning_rate": 9.712942366388975e-05, "loss": 1.8096, "step": 2212 }, { "epoch": 0.1233487542500418, "grad_norm": 0.5758397579193115, "learning_rate": 9.712644928862953e-05, "loss": 1.8329, "step": 2213 }, { "epoch": 0.12340449250320495, "grad_norm": 0.5573188066482544, "learning_rate": 9.712347341879311e-05, "loss": 1.8994, "step": 2214 }, { "epoch": 0.1234602307563681, "grad_norm": 0.5477108359336853, "learning_rate": 9.712049605447486e-05, "loss": 1.8856, "step": 2215 }, { "epoch": 0.12351596900953124, "grad_norm": 0.5133275985717773, "learning_rate": 9.711751719576922e-05, "loss": 1.7319, "step": 2216 }, { "epoch": 0.12357170726269438, "grad_norm": 0.5406665802001953, "learning_rate": 9.711453684277063e-05, "loss": 1.9889, "step": 2217 }, { "epoch": 0.12362744551585754, "grad_norm": 0.48421719670295715, "learning_rate": 9.711155499557364e-05, "loss": 1.5177, "step": 2218 }, { "epoch": 0.12368318376902068, "grad_norm": 0.5295604467391968, "learning_rate": 9.710857165427281e-05, "loss": 1.5376, "step": 2219 }, { "epoch": 0.12373892202218383, "grad_norm": 0.5241243243217468, "learning_rate": 9.710558681896274e-05, "loss": 1.7389, "step": 2220 }, { "epoch": 0.12379466027534697, "grad_norm": 0.48620593547821045, "learning_rate": 9.71026004897381e-05, "loss": 1.7281, "step": 2221 }, { "epoch": 0.12385039852851011, "grad_norm": 0.5162755846977234, "learning_rate": 9.70996126666936e-05, "loss": 1.6421, "step": 2222 }, { "epoch": 0.12390613678167327, "grad_norm": 0.5603106021881104, "learning_rate": 9.7096623349924e-05, "loss": 2.0405, "step": 2223 }, { "epoch": 0.12396187503483641, "grad_norm": 0.5636157393455505, "learning_rate": 9.70936325395241e-05, "loss": 1.7629, "step": 2224 }, { "epoch": 0.12401761328799955, "grad_norm": 0.5287961363792419, "learning_rate": 9.709064023558874e-05, "loss": 1.7357, "step": 2225 }, { "epoch": 0.1240733515411627, "grad_norm": 0.5584306120872498, "learning_rate": 9.708764643821284e-05, "loss": 1.905, "step": 2226 }, { "epoch": 0.12412908979432584, "grad_norm": 0.5021309852600098, "learning_rate": 9.708465114749132e-05, "loss": 1.7439, "step": 2227 }, { "epoch": 0.124184828047489, "grad_norm": 0.5482348799705505, "learning_rate": 9.708165436351921e-05, "loss": 1.7851, "step": 2228 }, { "epoch": 0.12424056630065214, "grad_norm": 0.498470276594162, "learning_rate": 9.707865608639152e-05, "loss": 1.494, "step": 2229 }, { "epoch": 0.12429630455381528, "grad_norm": 0.5526018142700195, "learning_rate": 9.707565631620334e-05, "loss": 1.973, "step": 2230 }, { "epoch": 0.12435204280697842, "grad_norm": 0.5773054957389832, "learning_rate": 9.707265505304982e-05, "loss": 1.9693, "step": 2231 }, { "epoch": 0.12440778106014158, "grad_norm": 0.5307757258415222, "learning_rate": 9.706965229702614e-05, "loss": 1.8978, "step": 2232 }, { "epoch": 0.12446351931330472, "grad_norm": 0.5740475654602051, "learning_rate": 9.70666480482275e-05, "loss": 2.0298, "step": 2233 }, { "epoch": 0.12451925756646787, "grad_norm": 0.5156608819961548, "learning_rate": 9.706364230674923e-05, "loss": 1.5383, "step": 2234 }, { "epoch": 0.12457499581963101, "grad_norm": 0.4921102225780487, "learning_rate": 9.706063507268661e-05, "loss": 1.6472, "step": 2235 }, { "epoch": 0.12463073407279415, "grad_norm": 0.5701449513435364, "learning_rate": 9.705762634613502e-05, "loss": 1.7692, "step": 2236 }, { "epoch": 0.12468647232595731, "grad_norm": 0.49713411927223206, "learning_rate": 9.705461612718991e-05, "loss": 1.5998, "step": 2237 }, { "epoch": 0.12474221057912045, "grad_norm": 0.5252828598022461, "learning_rate": 9.705160441594671e-05, "loss": 1.6545, "step": 2238 }, { "epoch": 0.1247979488322836, "grad_norm": 0.543063759803772, "learning_rate": 9.704859121250095e-05, "loss": 1.8984, "step": 2239 }, { "epoch": 0.12485368708544674, "grad_norm": 0.5450255274772644, "learning_rate": 9.704557651694818e-05, "loss": 1.7794, "step": 2240 }, { "epoch": 0.1249094253386099, "grad_norm": 0.4936400353908539, "learning_rate": 9.704256032938403e-05, "loss": 1.4191, "step": 2241 }, { "epoch": 0.12496516359177304, "grad_norm": 0.5075535774230957, "learning_rate": 9.703954264990414e-05, "loss": 1.7634, "step": 2242 }, { "epoch": 0.12502090184493617, "grad_norm": 0.5337166786193848, "learning_rate": 9.703652347860422e-05, "loss": 1.9257, "step": 2243 }, { "epoch": 0.12507664009809932, "grad_norm": 0.5265361666679382, "learning_rate": 9.703350281558002e-05, "loss": 1.8102, "step": 2244 }, { "epoch": 0.12513237835126248, "grad_norm": 0.5706486701965332, "learning_rate": 9.703048066092733e-05, "loss": 2.1658, "step": 2245 }, { "epoch": 0.1251881166044256, "grad_norm": 0.5012516975402832, "learning_rate": 9.7027457014742e-05, "loss": 1.6586, "step": 2246 }, { "epoch": 0.12524385485758877, "grad_norm": 0.5617608428001404, "learning_rate": 9.702443187711992e-05, "loss": 1.7678, "step": 2247 }, { "epoch": 0.12529959311075192, "grad_norm": 0.5820160508155823, "learning_rate": 9.702140524815704e-05, "loss": 1.848, "step": 2248 }, { "epoch": 0.12535533136391505, "grad_norm": 0.5511069297790527, "learning_rate": 9.701837712794932e-05, "loss": 1.8369, "step": 2249 }, { "epoch": 0.1254110696170782, "grad_norm": 0.5301650166511536, "learning_rate": 9.701534751659283e-05, "loss": 1.8621, "step": 2250 }, { "epoch": 0.12546680787024134, "grad_norm": 0.519693911075592, "learning_rate": 9.701231641418363e-05, "loss": 1.7069, "step": 2251 }, { "epoch": 0.1255225461234045, "grad_norm": 0.5177733302116394, "learning_rate": 9.700928382081786e-05, "loss": 1.7311, "step": 2252 }, { "epoch": 0.12557828437656765, "grad_norm": 0.5452710390090942, "learning_rate": 9.700624973659169e-05, "loss": 1.6022, "step": 2253 }, { "epoch": 0.12563402262973078, "grad_norm": 0.49126002192497253, "learning_rate": 9.700321416160134e-05, "loss": 1.6004, "step": 2254 }, { "epoch": 0.12568976088289394, "grad_norm": 0.4859536290168762, "learning_rate": 9.70001770959431e-05, "loss": 1.627, "step": 2255 }, { "epoch": 0.12574549913605707, "grad_norm": 0.5808461308479309, "learning_rate": 9.699713853971324e-05, "loss": 1.9893, "step": 2256 }, { "epoch": 0.12580123738922022, "grad_norm": 0.5044426321983337, "learning_rate": 9.699409849300818e-05, "loss": 1.6269, "step": 2257 }, { "epoch": 0.12585697564238338, "grad_norm": 0.5458354353904724, "learning_rate": 9.69910569559243e-05, "loss": 1.6803, "step": 2258 }, { "epoch": 0.1259127138955465, "grad_norm": 0.5350721478462219, "learning_rate": 9.698801392855808e-05, "loss": 1.7217, "step": 2259 }, { "epoch": 0.12596845214870966, "grad_norm": 0.511223554611206, "learning_rate": 9.698496941100601e-05, "loss": 1.6904, "step": 2260 }, { "epoch": 0.1260241904018728, "grad_norm": 0.46969008445739746, "learning_rate": 9.698192340336468e-05, "loss": 1.5411, "step": 2261 }, { "epoch": 0.12607992865503595, "grad_norm": 0.5638684630393982, "learning_rate": 9.697887590573063e-05, "loss": 1.6144, "step": 2262 }, { "epoch": 0.1261356669081991, "grad_norm": 0.5146279335021973, "learning_rate": 9.697582691820054e-05, "loss": 1.605, "step": 2263 }, { "epoch": 0.12619140516136224, "grad_norm": 0.46321019530296326, "learning_rate": 9.697277644087113e-05, "loss": 1.0444, "step": 2264 }, { "epoch": 0.1262471434145254, "grad_norm": 0.5038657784461975, "learning_rate": 9.69697244738391e-05, "loss": 1.7319, "step": 2265 }, { "epoch": 0.12630288166768852, "grad_norm": 0.593559205532074, "learning_rate": 9.696667101720127e-05, "loss": 1.9173, "step": 2266 }, { "epoch": 0.12635861992085168, "grad_norm": 0.5412843227386475, "learning_rate": 9.696361607105448e-05, "loss": 1.6603, "step": 2267 }, { "epoch": 0.12641435817401483, "grad_norm": 0.5422548055648804, "learning_rate": 9.69605596354956e-05, "loss": 1.7048, "step": 2268 }, { "epoch": 0.12647009642717796, "grad_norm": 0.5455138087272644, "learning_rate": 9.695750171062156e-05, "loss": 1.669, "step": 2269 }, { "epoch": 0.12652583468034112, "grad_norm": 0.5468176007270813, "learning_rate": 9.695444229652935e-05, "loss": 1.6744, "step": 2270 }, { "epoch": 0.12658157293350428, "grad_norm": 0.49385011196136475, "learning_rate": 9.6951381393316e-05, "loss": 1.6182, "step": 2271 }, { "epoch": 0.1266373111866674, "grad_norm": 0.5301021933555603, "learning_rate": 9.694831900107857e-05, "loss": 1.7818, "step": 2272 }, { "epoch": 0.12669304943983056, "grad_norm": 0.6178646087646484, "learning_rate": 9.69452551199142e-05, "loss": 1.9646, "step": 2273 }, { "epoch": 0.1267487876929937, "grad_norm": 0.5421885848045349, "learning_rate": 9.694218974992005e-05, "loss": 1.6862, "step": 2274 }, { "epoch": 0.12680452594615685, "grad_norm": 0.5251665115356445, "learning_rate": 9.693912289119332e-05, "loss": 1.7259, "step": 2275 }, { "epoch": 0.12686026419932, "grad_norm": 0.5069818496704102, "learning_rate": 9.693605454383128e-05, "loss": 1.8426, "step": 2276 }, { "epoch": 0.12691600245248313, "grad_norm": 0.5525764226913452, "learning_rate": 9.693298470793126e-05, "loss": 1.9999, "step": 2277 }, { "epoch": 0.1269717407056463, "grad_norm": 0.5717039108276367, "learning_rate": 9.69299133835906e-05, "loss": 1.736, "step": 2278 }, { "epoch": 0.12702747895880942, "grad_norm": 0.4768933057785034, "learning_rate": 9.69268405709067e-05, "loss": 1.4284, "step": 2279 }, { "epoch": 0.12708321721197258, "grad_norm": 0.5677302479743958, "learning_rate": 9.692376626997703e-05, "loss": 1.8972, "step": 2280 }, { "epoch": 0.12713895546513573, "grad_norm": 0.5202549695968628, "learning_rate": 9.692069048089907e-05, "loss": 1.6173, "step": 2281 }, { "epoch": 0.12719469371829886, "grad_norm": 0.5106683373451233, "learning_rate": 9.691761320377037e-05, "loss": 1.5599, "step": 2282 }, { "epoch": 0.12725043197146202, "grad_norm": 0.5042096376419067, "learning_rate": 9.691453443868854e-05, "loss": 1.7705, "step": 2283 }, { "epoch": 0.12730617022462515, "grad_norm": 0.5391340255737305, "learning_rate": 9.691145418575122e-05, "loss": 1.9065, "step": 2284 }, { "epoch": 0.1273619084777883, "grad_norm": 0.5074059963226318, "learning_rate": 9.690837244505607e-05, "loss": 1.7623, "step": 2285 }, { "epoch": 0.12741764673095146, "grad_norm": 0.5277912616729736, "learning_rate": 9.690528921670084e-05, "loss": 1.7758, "step": 2286 }, { "epoch": 0.1274733849841146, "grad_norm": 0.5068628787994385, "learning_rate": 9.69022045007833e-05, "loss": 1.6409, "step": 2287 }, { "epoch": 0.12752912323727775, "grad_norm": 0.5209136009216309, "learning_rate": 9.689911829740133e-05, "loss": 1.6144, "step": 2288 }, { "epoch": 0.12758486149044088, "grad_norm": 0.5280535221099854, "learning_rate": 9.689603060665273e-05, "loss": 1.8711, "step": 2289 }, { "epoch": 0.12764059974360403, "grad_norm": 0.5511658191680908, "learning_rate": 9.689294142863548e-05, "loss": 1.8228, "step": 2290 }, { "epoch": 0.1276963379967672, "grad_norm": 0.5436153411865234, "learning_rate": 9.688985076344754e-05, "loss": 1.696, "step": 2291 }, { "epoch": 0.12775207624993032, "grad_norm": 0.5065414309501648, "learning_rate": 9.68867586111869e-05, "loss": 1.6989, "step": 2292 }, { "epoch": 0.12780781450309348, "grad_norm": 0.5280441045761108, "learning_rate": 9.688366497195166e-05, "loss": 1.6764, "step": 2293 }, { "epoch": 0.12786355275625663, "grad_norm": 0.46777546405792236, "learning_rate": 9.68805698458399e-05, "loss": 1.4595, "step": 2294 }, { "epoch": 0.12791929100941976, "grad_norm": 0.5001897811889648, "learning_rate": 9.687747323294982e-05, "loss": 1.4642, "step": 2295 }, { "epoch": 0.12797502926258292, "grad_norm": 0.5615783929824829, "learning_rate": 9.687437513337961e-05, "loss": 1.7116, "step": 2296 }, { "epoch": 0.12803076751574605, "grad_norm": 0.5208621621131897, "learning_rate": 9.687127554722749e-05, "loss": 1.637, "step": 2297 }, { "epoch": 0.1280865057689092, "grad_norm": 0.5435874462127686, "learning_rate": 9.68681744745918e-05, "loss": 1.7629, "step": 2298 }, { "epoch": 0.12814224402207236, "grad_norm": 0.5296335220336914, "learning_rate": 9.686507191557089e-05, "loss": 1.827, "step": 2299 }, { "epoch": 0.1281979822752355, "grad_norm": 0.5191251635551453, "learning_rate": 9.686196787026311e-05, "loss": 1.9385, "step": 2300 }, { "epoch": 0.12825372052839865, "grad_norm": 0.5494365096092224, "learning_rate": 9.685886233876695e-05, "loss": 1.8378, "step": 2301 }, { "epoch": 0.12830945878156177, "grad_norm": 0.583207905292511, "learning_rate": 9.685575532118089e-05, "loss": 1.6812, "step": 2302 }, { "epoch": 0.12836519703472493, "grad_norm": 0.5473710894584656, "learning_rate": 9.685264681760345e-05, "loss": 1.9602, "step": 2303 }, { "epoch": 0.1284209352878881, "grad_norm": 0.567272424697876, "learning_rate": 9.684953682813322e-05, "loss": 1.8125, "step": 2304 }, { "epoch": 0.12847667354105122, "grad_norm": 0.4732169806957245, "learning_rate": 9.684642535286885e-05, "loss": 1.5566, "step": 2305 }, { "epoch": 0.12853241179421437, "grad_norm": 0.516720712184906, "learning_rate": 9.684331239190899e-05, "loss": 1.5688, "step": 2306 }, { "epoch": 0.1285881500473775, "grad_norm": 0.5574965476989746, "learning_rate": 9.684019794535237e-05, "loss": 1.7452, "step": 2307 }, { "epoch": 0.12864388830054066, "grad_norm": 0.5443317294120789, "learning_rate": 9.683708201329777e-05, "loss": 1.6624, "step": 2308 }, { "epoch": 0.12869962655370382, "grad_norm": 0.5809649229049683, "learning_rate": 9.683396459584404e-05, "loss": 1.7721, "step": 2309 }, { "epoch": 0.12875536480686695, "grad_norm": 0.5913598537445068, "learning_rate": 9.683084569308997e-05, "loss": 2.1623, "step": 2310 }, { "epoch": 0.1288111030600301, "grad_norm": 0.5404501557350159, "learning_rate": 9.682772530513453e-05, "loss": 1.7165, "step": 2311 }, { "epoch": 0.12886684131319323, "grad_norm": 0.4902174174785614, "learning_rate": 9.682460343207669e-05, "loss": 1.6391, "step": 2312 }, { "epoch": 0.1289225795663564, "grad_norm": 0.5791998505592346, "learning_rate": 9.682148007401541e-05, "loss": 1.891, "step": 2313 }, { "epoch": 0.12897831781951954, "grad_norm": 0.5695587992668152, "learning_rate": 9.681835523104978e-05, "loss": 1.9901, "step": 2314 }, { "epoch": 0.12903405607268267, "grad_norm": 0.6025593876838684, "learning_rate": 9.681522890327889e-05, "loss": 1.7748, "step": 2315 }, { "epoch": 0.12908979432584583, "grad_norm": 0.5111005902290344, "learning_rate": 9.681210109080189e-05, "loss": 1.6, "step": 2316 }, { "epoch": 0.129145532579009, "grad_norm": 0.533204972743988, "learning_rate": 9.680897179371798e-05, "loss": 1.6863, "step": 2317 }, { "epoch": 0.12920127083217212, "grad_norm": 0.5172824859619141, "learning_rate": 9.68058410121264e-05, "loss": 1.7456, "step": 2318 }, { "epoch": 0.12925700908533527, "grad_norm": 0.5905986428260803, "learning_rate": 9.680270874612643e-05, "loss": 1.572, "step": 2319 }, { "epoch": 0.1293127473384984, "grad_norm": 0.5090576410293579, "learning_rate": 9.679957499581742e-05, "loss": 1.7946, "step": 2320 }, { "epoch": 0.12936848559166156, "grad_norm": 0.5587893724441528, "learning_rate": 9.679643976129876e-05, "loss": 1.7792, "step": 2321 }, { "epoch": 0.12942422384482472, "grad_norm": 0.6383116841316223, "learning_rate": 9.679330304266988e-05, "loss": 2.0051, "step": 2322 }, { "epoch": 0.12947996209798784, "grad_norm": 0.5700294375419617, "learning_rate": 9.679016484003023e-05, "loss": 1.8419, "step": 2323 }, { "epoch": 0.129535700351151, "grad_norm": 0.6416967511177063, "learning_rate": 9.678702515347938e-05, "loss": 1.7893, "step": 2324 }, { "epoch": 0.12959143860431413, "grad_norm": 0.5761459469795227, "learning_rate": 9.678388398311686e-05, "loss": 1.8868, "step": 2325 }, { "epoch": 0.1296471768574773, "grad_norm": 0.5779362320899963, "learning_rate": 9.678074132904231e-05, "loss": 1.6472, "step": 2326 }, { "epoch": 0.12970291511064044, "grad_norm": 0.5250251293182373, "learning_rate": 9.677759719135542e-05, "loss": 1.8353, "step": 2327 }, { "epoch": 0.12975865336380357, "grad_norm": 0.5306884050369263, "learning_rate": 9.677445157015585e-05, "loss": 1.8419, "step": 2328 }, { "epoch": 0.12981439161696673, "grad_norm": 0.5761096477508545, "learning_rate": 9.67713044655434e-05, "loss": 1.846, "step": 2329 }, { "epoch": 0.12987012987012986, "grad_norm": 0.5438225269317627, "learning_rate": 9.676815587761787e-05, "loss": 1.734, "step": 2330 }, { "epoch": 0.12992586812329301, "grad_norm": 0.5154998898506165, "learning_rate": 9.676500580647912e-05, "loss": 1.8124, "step": 2331 }, { "epoch": 0.12998160637645617, "grad_norm": 0.5288179516792297, "learning_rate": 9.676185425222704e-05, "loss": 2.0132, "step": 2332 }, { "epoch": 0.1300373446296193, "grad_norm": 0.5507707595825195, "learning_rate": 9.675870121496158e-05, "loss": 1.7686, "step": 2333 }, { "epoch": 0.13009308288278246, "grad_norm": 0.4893222451210022, "learning_rate": 9.675554669478272e-05, "loss": 1.8113, "step": 2334 }, { "epoch": 0.13014882113594559, "grad_norm": 0.5455611944198608, "learning_rate": 9.675239069179056e-05, "loss": 1.7593, "step": 2335 }, { "epoch": 0.13020455938910874, "grad_norm": 0.5068415403366089, "learning_rate": 9.674923320608513e-05, "loss": 1.5302, "step": 2336 }, { "epoch": 0.1302602976422719, "grad_norm": 0.5160056948661804, "learning_rate": 9.674607423776661e-05, "loss": 1.5793, "step": 2337 }, { "epoch": 0.13031603589543503, "grad_norm": 0.5414824485778809, "learning_rate": 9.674291378693515e-05, "loss": 1.6392, "step": 2338 }, { "epoch": 0.13037177414859819, "grad_norm": 0.5210713744163513, "learning_rate": 9.673975185369098e-05, "loss": 1.9403, "step": 2339 }, { "epoch": 0.13042751240176134, "grad_norm": 0.5296798944473267, "learning_rate": 9.673658843813442e-05, "loss": 1.7093, "step": 2340 }, { "epoch": 0.13048325065492447, "grad_norm": 0.5705276131629944, "learning_rate": 9.673342354036574e-05, "loss": 1.7645, "step": 2341 }, { "epoch": 0.13053898890808763, "grad_norm": 0.5289913415908813, "learning_rate": 9.673025716048536e-05, "loss": 1.81, "step": 2342 }, { "epoch": 0.13059472716125076, "grad_norm": 0.5237072706222534, "learning_rate": 9.672708929859368e-05, "loss": 2.0053, "step": 2343 }, { "epoch": 0.1306504654144139, "grad_norm": 0.5144554376602173, "learning_rate": 9.672391995479115e-05, "loss": 1.7236, "step": 2344 }, { "epoch": 0.13070620366757707, "grad_norm": 0.5384603142738342, "learning_rate": 9.672074912917831e-05, "loss": 1.7492, "step": 2345 }, { "epoch": 0.1307619419207402, "grad_norm": 0.5475570559501648, "learning_rate": 9.67175768218557e-05, "loss": 1.9068, "step": 2346 }, { "epoch": 0.13081768017390336, "grad_norm": 0.512937068939209, "learning_rate": 9.671440303292395e-05, "loss": 1.7364, "step": 2347 }, { "epoch": 0.13087341842706648, "grad_norm": 0.48609036207199097, "learning_rate": 9.67112277624837e-05, "loss": 1.5916, "step": 2348 }, { "epoch": 0.13092915668022964, "grad_norm": 0.5132019519805908, "learning_rate": 9.670805101063563e-05, "loss": 1.7222, "step": 2349 }, { "epoch": 0.1309848949333928, "grad_norm": 0.5112780928611755, "learning_rate": 9.670487277748052e-05, "loss": 1.6418, "step": 2350 }, { "epoch": 0.13104063318655593, "grad_norm": 0.531306803226471, "learning_rate": 9.670169306311916e-05, "loss": 1.7323, "step": 2351 }, { "epoch": 0.13109637143971908, "grad_norm": 0.48118212819099426, "learning_rate": 9.669851186765238e-05, "loss": 1.4822, "step": 2352 }, { "epoch": 0.1311521096928822, "grad_norm": 0.5309464931488037, "learning_rate": 9.669532919118108e-05, "loss": 1.767, "step": 2353 }, { "epoch": 0.13120784794604537, "grad_norm": 0.532576322555542, "learning_rate": 9.669214503380617e-05, "loss": 1.7228, "step": 2354 }, { "epoch": 0.13126358619920853, "grad_norm": 0.49597617983818054, "learning_rate": 9.668895939562868e-05, "loss": 1.4792, "step": 2355 }, { "epoch": 0.13131932445237166, "grad_norm": 0.5480032563209534, "learning_rate": 9.66857722767496e-05, "loss": 1.7285, "step": 2356 }, { "epoch": 0.1313750627055348, "grad_norm": 0.5191400647163391, "learning_rate": 9.668258367727002e-05, "loss": 1.5942, "step": 2357 }, { "epoch": 0.13143080095869794, "grad_norm": 0.5335458517074585, "learning_rate": 9.667939359729109e-05, "loss": 1.8991, "step": 2358 }, { "epoch": 0.1314865392118611, "grad_norm": 0.5872248411178589, "learning_rate": 9.667620203691393e-05, "loss": 1.8247, "step": 2359 }, { "epoch": 0.13154227746502425, "grad_norm": 0.5811527967453003, "learning_rate": 9.667300899623976e-05, "loss": 2.0837, "step": 2360 }, { "epoch": 0.13159801571818738, "grad_norm": 0.5214108824729919, "learning_rate": 9.66698144753699e-05, "loss": 1.681, "step": 2361 }, { "epoch": 0.13165375397135054, "grad_norm": 0.5067755579948425, "learning_rate": 9.666661847440563e-05, "loss": 1.7168, "step": 2362 }, { "epoch": 0.1317094922245137, "grad_norm": 0.5883169770240784, "learning_rate": 9.666342099344829e-05, "loss": 1.8355, "step": 2363 }, { "epoch": 0.13176523047767683, "grad_norm": 0.5047624111175537, "learning_rate": 9.666022203259931e-05, "loss": 1.6872, "step": 2364 }, { "epoch": 0.13182096873083998, "grad_norm": 0.5165308117866516, "learning_rate": 9.665702159196013e-05, "loss": 1.6867, "step": 2365 }, { "epoch": 0.1318767069840031, "grad_norm": 0.5131801962852478, "learning_rate": 9.665381967163227e-05, "loss": 1.5836, "step": 2366 }, { "epoch": 0.13193244523716627, "grad_norm": 0.5561967492103577, "learning_rate": 9.665061627171726e-05, "loss": 1.6933, "step": 2367 }, { "epoch": 0.13198818349032942, "grad_norm": 0.6118646860122681, "learning_rate": 9.664741139231668e-05, "loss": 2.0988, "step": 2368 }, { "epoch": 0.13204392174349255, "grad_norm": 0.5255211591720581, "learning_rate": 9.664420503353218e-05, "loss": 1.7087, "step": 2369 }, { "epoch": 0.1320996599966557, "grad_norm": 0.555664598941803, "learning_rate": 9.664099719546547e-05, "loss": 1.8029, "step": 2370 }, { "epoch": 0.13215539824981884, "grad_norm": 0.5417226552963257, "learning_rate": 9.663778787821825e-05, "loss": 1.7483, "step": 2371 }, { "epoch": 0.132211136502982, "grad_norm": 0.5773631930351257, "learning_rate": 9.663457708189232e-05, "loss": 1.7137, "step": 2372 }, { "epoch": 0.13226687475614515, "grad_norm": 0.5354270935058594, "learning_rate": 9.66313648065895e-05, "loss": 1.8748, "step": 2373 }, { "epoch": 0.13232261300930828, "grad_norm": 0.5149551033973694, "learning_rate": 9.662815105241168e-05, "loss": 1.5948, "step": 2374 }, { "epoch": 0.13237835126247144, "grad_norm": 0.5566468238830566, "learning_rate": 9.662493581946074e-05, "loss": 1.7724, "step": 2375 }, { "epoch": 0.13243408951563457, "grad_norm": 0.5304192304611206, "learning_rate": 9.66217191078387e-05, "loss": 1.8068, "step": 2376 }, { "epoch": 0.13248982776879772, "grad_norm": 0.5885264873504639, "learning_rate": 9.661850091764756e-05, "loss": 1.9129, "step": 2377 }, { "epoch": 0.13254556602196088, "grad_norm": 0.4796747863292694, "learning_rate": 9.661528124898937e-05, "loss": 1.6931, "step": 2378 }, { "epoch": 0.132601304275124, "grad_norm": 0.49771320819854736, "learning_rate": 9.661206010196624e-05, "loss": 1.5938, "step": 2379 }, { "epoch": 0.13265704252828717, "grad_norm": 0.530432939529419, "learning_rate": 9.660883747668034e-05, "loss": 2.0283, "step": 2380 }, { "epoch": 0.1327127807814503, "grad_norm": 0.515631914138794, "learning_rate": 9.660561337323385e-05, "loss": 1.8549, "step": 2381 }, { "epoch": 0.13276851903461345, "grad_norm": 0.6954619288444519, "learning_rate": 9.660238779172905e-05, "loss": 2.0152, "step": 2382 }, { "epoch": 0.1328242572877766, "grad_norm": 0.5233824253082275, "learning_rate": 9.65991607322682e-05, "loss": 1.7353, "step": 2383 }, { "epoch": 0.13287999554093974, "grad_norm": 0.5527575016021729, "learning_rate": 9.659593219495368e-05, "loss": 1.6361, "step": 2384 }, { "epoch": 0.1329357337941029, "grad_norm": 0.48741617798805237, "learning_rate": 9.659270217988786e-05, "loss": 1.682, "step": 2385 }, { "epoch": 0.13299147204726605, "grad_norm": 0.5804024338722229, "learning_rate": 9.658947068717316e-05, "loss": 1.5736, "step": 2386 }, { "epoch": 0.13304721030042918, "grad_norm": 0.5614018440246582, "learning_rate": 9.658623771691211e-05, "loss": 1.9172, "step": 2387 }, { "epoch": 0.13310294855359234, "grad_norm": 0.5239617824554443, "learning_rate": 9.658300326920722e-05, "loss": 1.7751, "step": 2388 }, { "epoch": 0.13315868680675547, "grad_norm": 0.5195541381835938, "learning_rate": 9.657976734416106e-05, "loss": 1.875, "step": 2389 }, { "epoch": 0.13321442505991862, "grad_norm": 0.531480610370636, "learning_rate": 9.657652994187625e-05, "loss": 1.7631, "step": 2390 }, { "epoch": 0.13327016331308178, "grad_norm": 0.5037621259689331, "learning_rate": 9.657329106245547e-05, "loss": 1.6134, "step": 2391 }, { "epoch": 0.1333259015662449, "grad_norm": 0.4974221885204315, "learning_rate": 9.657005070600144e-05, "loss": 1.7501, "step": 2392 }, { "epoch": 0.13338163981940807, "grad_norm": 0.5308098196983337, "learning_rate": 9.656680887261693e-05, "loss": 1.7283, "step": 2393 }, { "epoch": 0.1334373780725712, "grad_norm": 0.4996281862258911, "learning_rate": 9.656356556240473e-05, "loss": 1.7897, "step": 2394 }, { "epoch": 0.13349311632573435, "grad_norm": 0.6450517773628235, "learning_rate": 9.656032077546772e-05, "loss": 1.7089, "step": 2395 }, { "epoch": 0.1335488545788975, "grad_norm": 0.5968025326728821, "learning_rate": 9.655707451190883e-05, "loss": 1.8664, "step": 2396 }, { "epoch": 0.13360459283206064, "grad_norm": 0.470813512802124, "learning_rate": 9.655382677183095e-05, "loss": 1.5199, "step": 2397 }, { "epoch": 0.1336603310852238, "grad_norm": 0.5651730298995972, "learning_rate": 9.655057755533712e-05, "loss": 1.9733, "step": 2398 }, { "epoch": 0.13371606933838692, "grad_norm": 0.5370044112205505, "learning_rate": 9.654732686253039e-05, "loss": 1.8281, "step": 2399 }, { "epoch": 0.13377180759155008, "grad_norm": 0.5285357236862183, "learning_rate": 9.654407469351383e-05, "loss": 1.592, "step": 2400 }, { "epoch": 0.13382754584471324, "grad_norm": 0.5265277624130249, "learning_rate": 9.654082104839059e-05, "loss": 1.8503, "step": 2401 }, { "epoch": 0.13388328409787636, "grad_norm": 0.5449655652046204, "learning_rate": 9.653756592726386e-05, "loss": 1.8579, "step": 2402 }, { "epoch": 0.13393902235103952, "grad_norm": 0.5737154483795166, "learning_rate": 9.653430933023689e-05, "loss": 1.8618, "step": 2403 }, { "epoch": 0.13399476060420265, "grad_norm": 0.5164530873298645, "learning_rate": 9.653105125741292e-05, "loss": 1.6213, "step": 2404 }, { "epoch": 0.1340504988573658, "grad_norm": 0.5017974376678467, "learning_rate": 9.65277917088953e-05, "loss": 1.6255, "step": 2405 }, { "epoch": 0.13410623711052896, "grad_norm": 0.5122340321540833, "learning_rate": 9.652453068478741e-05, "loss": 1.5653, "step": 2406 }, { "epoch": 0.1341619753636921, "grad_norm": 0.6067832708358765, "learning_rate": 9.652126818519266e-05, "loss": 2.0985, "step": 2407 }, { "epoch": 0.13421771361685525, "grad_norm": 0.5796366333961487, "learning_rate": 9.651800421021453e-05, "loss": 1.9636, "step": 2408 }, { "epoch": 0.1342734518700184, "grad_norm": 0.5619643926620483, "learning_rate": 9.651473875995651e-05, "loss": 1.7129, "step": 2409 }, { "epoch": 0.13432919012318154, "grad_norm": 0.5060097575187683, "learning_rate": 9.651147183452219e-05, "loss": 1.5304, "step": 2410 }, { "epoch": 0.1343849283763447, "grad_norm": 0.532145619392395, "learning_rate": 9.650820343401515e-05, "loss": 1.7844, "step": 2411 }, { "epoch": 0.13444066662950782, "grad_norm": 0.5342923402786255, "learning_rate": 9.650493355853906e-05, "loss": 1.8585, "step": 2412 }, { "epoch": 0.13449640488267098, "grad_norm": 0.49805736541748047, "learning_rate": 9.650166220819764e-05, "loss": 1.4576, "step": 2413 }, { "epoch": 0.13455214313583413, "grad_norm": 0.5234712362289429, "learning_rate": 9.64983893830946e-05, "loss": 1.6994, "step": 2414 }, { "epoch": 0.13460788138899726, "grad_norm": 0.5124284029006958, "learning_rate": 9.649511508333375e-05, "loss": 1.6614, "step": 2415 }, { "epoch": 0.13466361964216042, "grad_norm": 0.4958679676055908, "learning_rate": 9.649183930901895e-05, "loss": 1.56, "step": 2416 }, { "epoch": 0.13471935789532355, "grad_norm": 0.5191091895103455, "learning_rate": 9.648856206025407e-05, "loss": 1.7004, "step": 2417 }, { "epoch": 0.1347750961484867, "grad_norm": 0.5366125702857971, "learning_rate": 9.648528333714304e-05, "loss": 1.7206, "step": 2418 }, { "epoch": 0.13483083440164986, "grad_norm": 0.5979599952697754, "learning_rate": 9.648200313978986e-05, "loss": 1.757, "step": 2419 }, { "epoch": 0.134886572654813, "grad_norm": 0.5878745317459106, "learning_rate": 9.647872146829855e-05, "loss": 1.7236, "step": 2420 }, { "epoch": 0.13494231090797615, "grad_norm": 0.5160901546478271, "learning_rate": 9.647543832277317e-05, "loss": 1.7274, "step": 2421 }, { "epoch": 0.13499804916113928, "grad_norm": 0.5626492500305176, "learning_rate": 9.647215370331786e-05, "loss": 1.9507, "step": 2422 }, { "epoch": 0.13505378741430243, "grad_norm": 0.5624846816062927, "learning_rate": 9.646886761003679e-05, "loss": 1.9476, "step": 2423 }, { "epoch": 0.1351095256674656, "grad_norm": 0.5468912720680237, "learning_rate": 9.646558004303419e-05, "loss": 1.7836, "step": 2424 }, { "epoch": 0.13516526392062872, "grad_norm": 0.5446691513061523, "learning_rate": 9.646229100241429e-05, "loss": 1.7664, "step": 2425 }, { "epoch": 0.13522100217379188, "grad_norm": 0.5568925738334656, "learning_rate": 9.64590004882814e-05, "loss": 2.0063, "step": 2426 }, { "epoch": 0.135276740426955, "grad_norm": 0.560264527797699, "learning_rate": 9.64557085007399e-05, "loss": 1.8132, "step": 2427 }, { "epoch": 0.13533247868011816, "grad_norm": 0.5093153715133667, "learning_rate": 9.64524150398942e-05, "loss": 1.4198, "step": 2428 }, { "epoch": 0.13538821693328132, "grad_norm": 0.5184745192527771, "learning_rate": 9.64491201058487e-05, "loss": 1.6062, "step": 2429 }, { "epoch": 0.13544395518644445, "grad_norm": 0.5188031792640686, "learning_rate": 9.644582369870794e-05, "loss": 1.8179, "step": 2430 }, { "epoch": 0.1354996934396076, "grad_norm": 0.537381112575531, "learning_rate": 9.644252581857647e-05, "loss": 1.9697, "step": 2431 }, { "epoch": 0.13555543169277076, "grad_norm": 0.5132935047149658, "learning_rate": 9.643922646555883e-05, "loss": 1.6746, "step": 2432 }, { "epoch": 0.1356111699459339, "grad_norm": 0.5265336036682129, "learning_rate": 9.64359256397597e-05, "loss": 1.6561, "step": 2433 }, { "epoch": 0.13566690819909705, "grad_norm": 0.5241510272026062, "learning_rate": 9.643262334128374e-05, "loss": 1.577, "step": 2434 }, { "epoch": 0.13572264645226018, "grad_norm": 0.5073732137680054, "learning_rate": 9.642931957023569e-05, "loss": 1.6821, "step": 2435 }, { "epoch": 0.13577838470542333, "grad_norm": 0.4868320822715759, "learning_rate": 9.642601432672034e-05, "loss": 1.4476, "step": 2436 }, { "epoch": 0.1358341229585865, "grad_norm": 0.5248389840126038, "learning_rate": 9.642270761084249e-05, "loss": 1.9406, "step": 2437 }, { "epoch": 0.13588986121174962, "grad_norm": 0.492227166891098, "learning_rate": 9.641939942270701e-05, "loss": 1.6538, "step": 2438 }, { "epoch": 0.13594559946491278, "grad_norm": 0.5446291565895081, "learning_rate": 9.641608976241883e-05, "loss": 1.8208, "step": 2439 }, { "epoch": 0.1360013377180759, "grad_norm": 0.5214070677757263, "learning_rate": 9.64127786300829e-05, "loss": 1.6889, "step": 2440 }, { "epoch": 0.13605707597123906, "grad_norm": 0.5892273187637329, "learning_rate": 9.640946602580426e-05, "loss": 2.0888, "step": 2441 }, { "epoch": 0.13611281422440222, "grad_norm": 0.5230244994163513, "learning_rate": 9.640615194968791e-05, "loss": 1.7068, "step": 2442 }, { "epoch": 0.13616855247756535, "grad_norm": 0.5090706944465637, "learning_rate": 9.640283640183903e-05, "loss": 1.7328, "step": 2443 }, { "epoch": 0.1362242907307285, "grad_norm": 0.5167303681373596, "learning_rate": 9.639951938236269e-05, "loss": 1.7062, "step": 2444 }, { "epoch": 0.13628002898389163, "grad_norm": 0.5717843770980835, "learning_rate": 9.639620089136413e-05, "loss": 1.8633, "step": 2445 }, { "epoch": 0.1363357672370548, "grad_norm": 0.514242947101593, "learning_rate": 9.63928809289486e-05, "loss": 1.9126, "step": 2446 }, { "epoch": 0.13639150549021795, "grad_norm": 0.5159420371055603, "learning_rate": 9.638955949522137e-05, "loss": 1.6795, "step": 2447 }, { "epoch": 0.13644724374338107, "grad_norm": 0.4026312828063965, "learning_rate": 9.638623659028779e-05, "loss": 1.008, "step": 2448 }, { "epoch": 0.13650298199654423, "grad_norm": 0.5365085601806641, "learning_rate": 9.63829122142532e-05, "loss": 1.9597, "step": 2449 }, { "epoch": 0.13655872024970736, "grad_norm": 0.528103768825531, "learning_rate": 9.637958636722311e-05, "loss": 1.8801, "step": 2450 }, { "epoch": 0.13661445850287052, "grad_norm": 0.5581492185592651, "learning_rate": 9.637625904930292e-05, "loss": 1.6802, "step": 2451 }, { "epoch": 0.13667019675603367, "grad_norm": 0.5182628631591797, "learning_rate": 9.63729302605982e-05, "loss": 1.8041, "step": 2452 }, { "epoch": 0.1367259350091968, "grad_norm": 0.48804765939712524, "learning_rate": 9.636960000121451e-05, "loss": 1.7381, "step": 2453 }, { "epoch": 0.13678167326235996, "grad_norm": 0.5185055136680603, "learning_rate": 9.636626827125745e-05, "loss": 1.8356, "step": 2454 }, { "epoch": 0.13683741151552312, "grad_norm": 0.5890060663223267, "learning_rate": 9.63629350708327e-05, "loss": 1.8636, "step": 2455 }, { "epoch": 0.13689314976868625, "grad_norm": 0.5501379370689392, "learning_rate": 9.635960040004597e-05, "loss": 2.0967, "step": 2456 }, { "epoch": 0.1369488880218494, "grad_norm": 0.5753256678581238, "learning_rate": 9.635626425900301e-05, "loss": 1.8931, "step": 2457 }, { "epoch": 0.13700462627501253, "grad_norm": 0.5230208039283752, "learning_rate": 9.635292664780962e-05, "loss": 1.6546, "step": 2458 }, { "epoch": 0.1370603645281757, "grad_norm": 0.507422149181366, "learning_rate": 9.634958756657165e-05, "loss": 1.7135, "step": 2459 }, { "epoch": 0.13711610278133884, "grad_norm": 0.48532143235206604, "learning_rate": 9.634624701539498e-05, "loss": 1.5297, "step": 2460 }, { "epoch": 0.13717184103450197, "grad_norm": 0.5039069652557373, "learning_rate": 9.63429049943856e-05, "loss": 1.9089, "step": 2461 }, { "epoch": 0.13722757928766513, "grad_norm": 0.5480893850326538, "learning_rate": 9.633956150364947e-05, "loss": 1.7987, "step": 2462 }, { "epoch": 0.13728331754082826, "grad_norm": 0.5339971780776978, "learning_rate": 9.633621654329261e-05, "loss": 1.7035, "step": 2463 }, { "epoch": 0.13733905579399142, "grad_norm": 0.5058174133300781, "learning_rate": 9.633287011342113e-05, "loss": 1.6676, "step": 2464 }, { "epoch": 0.13739479404715457, "grad_norm": 0.5697671175003052, "learning_rate": 9.632952221414116e-05, "loss": 1.9683, "step": 2465 }, { "epoch": 0.1374505323003177, "grad_norm": 0.5071194767951965, "learning_rate": 9.632617284555886e-05, "loss": 1.9232, "step": 2466 }, { "epoch": 0.13750627055348086, "grad_norm": 0.5929427742958069, "learning_rate": 9.632282200778045e-05, "loss": 1.8352, "step": 2467 }, { "epoch": 0.137562008806644, "grad_norm": 0.528889000415802, "learning_rate": 9.631946970091221e-05, "loss": 1.7636, "step": 2468 }, { "epoch": 0.13761774705980714, "grad_norm": 1.3195804357528687, "learning_rate": 9.631611592506046e-05, "loss": 1.7929, "step": 2469 }, { "epoch": 0.1376734853129703, "grad_norm": 0.5272727608680725, "learning_rate": 9.631276068033154e-05, "loss": 1.89, "step": 2470 }, { "epoch": 0.13772922356613343, "grad_norm": 0.5453211665153503, "learning_rate": 9.630940396683188e-05, "loss": 1.6766, "step": 2471 }, { "epoch": 0.1377849618192966, "grad_norm": 0.5383656620979309, "learning_rate": 9.630604578466794e-05, "loss": 1.6168, "step": 2472 }, { "epoch": 0.13784070007245972, "grad_norm": 0.5008901953697205, "learning_rate": 9.63026861339462e-05, "loss": 1.5592, "step": 2473 }, { "epoch": 0.13789643832562287, "grad_norm": 0.5986757874488831, "learning_rate": 9.629932501477321e-05, "loss": 2.0793, "step": 2474 }, { "epoch": 0.13795217657878603, "grad_norm": 0.5368151664733887, "learning_rate": 9.629596242725558e-05, "loss": 1.6693, "step": 2475 }, { "epoch": 0.13800791483194916, "grad_norm": 0.5330533385276794, "learning_rate": 9.629259837149995e-05, "loss": 1.7398, "step": 2476 }, { "epoch": 0.13806365308511231, "grad_norm": 0.5093852877616882, "learning_rate": 9.6289232847613e-05, "loss": 1.6665, "step": 2477 }, { "epoch": 0.13811939133827547, "grad_norm": 0.5469667911529541, "learning_rate": 9.628586585570149e-05, "loss": 1.8411, "step": 2478 }, { "epoch": 0.1381751295914386, "grad_norm": 0.5832191705703735, "learning_rate": 9.628249739587217e-05, "loss": 1.8821, "step": 2479 }, { "epoch": 0.13823086784460176, "grad_norm": 0.5154137015342712, "learning_rate": 9.627912746823187e-05, "loss": 1.6075, "step": 2480 }, { "epoch": 0.13828660609776489, "grad_norm": 0.5499826669692993, "learning_rate": 9.627575607288745e-05, "loss": 1.735, "step": 2481 }, { "epoch": 0.13834234435092804, "grad_norm": 0.6152673959732056, "learning_rate": 9.627238320994589e-05, "loss": 2.0207, "step": 2482 }, { "epoch": 0.1383980826040912, "grad_norm": 0.49340128898620605, "learning_rate": 9.626900887951412e-05, "loss": 1.64, "step": 2483 }, { "epoch": 0.13845382085725433, "grad_norm": 0.5563956499099731, "learning_rate": 9.626563308169914e-05, "loss": 1.9062, "step": 2484 }, { "epoch": 0.13850955911041749, "grad_norm": 0.4945386052131653, "learning_rate": 9.626225581660803e-05, "loss": 1.4852, "step": 2485 }, { "epoch": 0.13856529736358061, "grad_norm": 0.5170808434486389, "learning_rate": 9.625887708434788e-05, "loss": 1.7517, "step": 2486 }, { "epoch": 0.13862103561674377, "grad_norm": 0.5459514260292053, "learning_rate": 9.625549688502589e-05, "loss": 1.6785, "step": 2487 }, { "epoch": 0.13867677386990693, "grad_norm": 0.5073458552360535, "learning_rate": 9.62521152187492e-05, "loss": 1.7213, "step": 2488 }, { "epoch": 0.13873251212307006, "grad_norm": 0.4946017563343048, "learning_rate": 9.624873208562509e-05, "loss": 1.6256, "step": 2489 }, { "epoch": 0.1387882503762332, "grad_norm": 0.5971960425376892, "learning_rate": 9.624534748576085e-05, "loss": 1.9997, "step": 2490 }, { "epoch": 0.13884398862939634, "grad_norm": 0.5135798454284668, "learning_rate": 9.624196141926381e-05, "loss": 1.6544, "step": 2491 }, { "epoch": 0.1388997268825595, "grad_norm": 0.5550069212913513, "learning_rate": 9.623857388624138e-05, "loss": 1.8297, "step": 2492 }, { "epoch": 0.13895546513572266, "grad_norm": 0.5476080179214478, "learning_rate": 9.623518488680095e-05, "loss": 1.9136, "step": 2493 }, { "epoch": 0.13901120338888578, "grad_norm": 0.5327604413032532, "learning_rate": 9.623179442105004e-05, "loss": 1.7471, "step": 2494 }, { "epoch": 0.13906694164204894, "grad_norm": 0.5192773938179016, "learning_rate": 9.622840248909617e-05, "loss": 1.6395, "step": 2495 }, { "epoch": 0.13912267989521207, "grad_norm": 0.5261735916137695, "learning_rate": 9.622500909104689e-05, "loss": 1.6751, "step": 2496 }, { "epoch": 0.13917841814837523, "grad_norm": 0.5256398916244507, "learning_rate": 9.622161422700984e-05, "loss": 1.7681, "step": 2497 }, { "epoch": 0.13923415640153838, "grad_norm": 0.5021438002586365, "learning_rate": 9.621821789709267e-05, "loss": 1.6317, "step": 2498 }, { "epoch": 0.1392898946547015, "grad_norm": 0.5900087952613831, "learning_rate": 9.62148201014031e-05, "loss": 1.8691, "step": 2499 }, { "epoch": 0.13934563290786467, "grad_norm": 0.492544025182724, "learning_rate": 9.621142084004889e-05, "loss": 1.6061, "step": 2500 }, { "epoch": 0.13940137116102783, "grad_norm": 0.5590608716011047, "learning_rate": 9.620802011313785e-05, "loss": 1.9551, "step": 2501 }, { "epoch": 0.13945710941419096, "grad_norm": 0.5163889527320862, "learning_rate": 9.620461792077782e-05, "loss": 1.8419, "step": 2502 }, { "epoch": 0.1395128476673541, "grad_norm": 0.5565062165260315, "learning_rate": 9.620121426307669e-05, "loss": 1.9454, "step": 2503 }, { "epoch": 0.13956858592051724, "grad_norm": 0.5010280013084412, "learning_rate": 9.619780914014242e-05, "loss": 1.6189, "step": 2504 }, { "epoch": 0.1396243241736804, "grad_norm": 0.5342069268226624, "learning_rate": 9.619440255208301e-05, "loss": 1.7667, "step": 2505 }, { "epoch": 0.13968006242684355, "grad_norm": 0.5092571377754211, "learning_rate": 9.619099449900646e-05, "loss": 1.6797, "step": 2506 }, { "epoch": 0.13973580068000668, "grad_norm": 0.5784452557563782, "learning_rate": 9.618758498102089e-05, "loss": 1.9559, "step": 2507 }, { "epoch": 0.13979153893316984, "grad_norm": 0.5389965176582336, "learning_rate": 9.618417399823441e-05, "loss": 1.7971, "step": 2508 }, { "epoch": 0.13984727718633297, "grad_norm": 0.5197558999061584, "learning_rate": 9.618076155075521e-05, "loss": 1.8631, "step": 2509 }, { "epoch": 0.13990301543949613, "grad_norm": 0.5198122262954712, "learning_rate": 9.617734763869151e-05, "loss": 1.7487, "step": 2510 }, { "epoch": 0.13995875369265928, "grad_norm": 0.515998363494873, "learning_rate": 9.617393226215157e-05, "loss": 1.6849, "step": 2511 }, { "epoch": 0.1400144919458224, "grad_norm": 0.5627748370170593, "learning_rate": 9.617051542124371e-05, "loss": 1.7637, "step": 2512 }, { "epoch": 0.14007023019898557, "grad_norm": 0.49436190724372864, "learning_rate": 9.61670971160763e-05, "loss": 1.6303, "step": 2513 }, { "epoch": 0.1401259684521487, "grad_norm": 0.5101426839828491, "learning_rate": 9.616367734675772e-05, "loss": 1.5709, "step": 2514 }, { "epoch": 0.14018170670531185, "grad_norm": 0.5416966080665588, "learning_rate": 9.616025611339647e-05, "loss": 1.8456, "step": 2515 }, { "epoch": 0.140237444958475, "grad_norm": 0.5797568559646606, "learning_rate": 9.615683341610103e-05, "loss": 1.7499, "step": 2516 }, { "epoch": 0.14029318321163814, "grad_norm": 0.5696927905082703, "learning_rate": 9.615340925497995e-05, "loss": 1.6875, "step": 2517 }, { "epoch": 0.1403489214648013, "grad_norm": 0.49985361099243164, "learning_rate": 9.61499836301418e-05, "loss": 1.6336, "step": 2518 }, { "epoch": 0.14040465971796443, "grad_norm": 0.5426433086395264, "learning_rate": 9.614655654169527e-05, "loss": 1.8164, "step": 2519 }, { "epoch": 0.14046039797112758, "grad_norm": 0.562021017074585, "learning_rate": 9.6143127989749e-05, "loss": 1.626, "step": 2520 }, { "epoch": 0.14051613622429074, "grad_norm": 0.5873587727546692, "learning_rate": 9.613969797441173e-05, "loss": 2.0087, "step": 2521 }, { "epoch": 0.14057187447745387, "grad_norm": 0.5239251852035522, "learning_rate": 9.613626649579229e-05, "loss": 1.74, "step": 2522 }, { "epoch": 0.14062761273061702, "grad_norm": 0.613498330116272, "learning_rate": 9.613283355399945e-05, "loss": 1.7088, "step": 2523 }, { "epoch": 0.14068335098378018, "grad_norm": 0.5224273800849915, "learning_rate": 9.61293991491421e-05, "loss": 1.5665, "step": 2524 }, { "epoch": 0.1407390892369433, "grad_norm": 0.5063479542732239, "learning_rate": 9.612596328132915e-05, "loss": 1.3456, "step": 2525 }, { "epoch": 0.14079482749010647, "grad_norm": 0.5042296648025513, "learning_rate": 9.61225259506696e-05, "loss": 1.6111, "step": 2526 }, { "epoch": 0.1408505657432696, "grad_norm": 0.5116347670555115, "learning_rate": 9.611908715727244e-05, "loss": 1.9546, "step": 2527 }, { "epoch": 0.14090630399643275, "grad_norm": 0.5643008351325989, "learning_rate": 9.611564690124672e-05, "loss": 1.8488, "step": 2528 }, { "epoch": 0.1409620422495959, "grad_norm": 0.5275754332542419, "learning_rate": 9.611220518270155e-05, "loss": 1.7367, "step": 2529 }, { "epoch": 0.14101778050275904, "grad_norm": 0.523114800453186, "learning_rate": 9.61087620017461e-05, "loss": 1.5207, "step": 2530 }, { "epoch": 0.1410735187559222, "grad_norm": 0.5141943693161011, "learning_rate": 9.610531735848953e-05, "loss": 1.6592, "step": 2531 }, { "epoch": 0.14112925700908532, "grad_norm": 0.5485236048698425, "learning_rate": 9.610187125304111e-05, "loss": 1.7567, "step": 2532 }, { "epoch": 0.14118499526224848, "grad_norm": 0.537264347076416, "learning_rate": 9.609842368551014e-05, "loss": 1.7151, "step": 2533 }, { "epoch": 0.14124073351541164, "grad_norm": 0.588664174079895, "learning_rate": 9.609497465600595e-05, "loss": 1.9591, "step": 2534 }, { "epoch": 0.14129647176857477, "grad_norm": 0.5192539691925049, "learning_rate": 9.60915241646379e-05, "loss": 1.7296, "step": 2535 }, { "epoch": 0.14135221002173792, "grad_norm": 0.543268620967865, "learning_rate": 9.608807221151543e-05, "loss": 1.7645, "step": 2536 }, { "epoch": 0.14140794827490105, "grad_norm": 0.534324049949646, "learning_rate": 9.608461879674802e-05, "loss": 1.8227, "step": 2537 }, { "epoch": 0.1414636865280642, "grad_norm": 0.5177492499351501, "learning_rate": 9.608116392044521e-05, "loss": 1.6495, "step": 2538 }, { "epoch": 0.14151942478122737, "grad_norm": 0.5617666840553284, "learning_rate": 9.607770758271655e-05, "loss": 1.9329, "step": 2539 }, { "epoch": 0.1415751630343905, "grad_norm": 0.5591059327125549, "learning_rate": 9.607424978367165e-05, "loss": 1.8535, "step": 2540 }, { "epoch": 0.14163090128755365, "grad_norm": 0.5114865899085999, "learning_rate": 9.607079052342018e-05, "loss": 1.6956, "step": 2541 }, { "epoch": 0.1416866395407168, "grad_norm": 0.5444316864013672, "learning_rate": 9.606732980207184e-05, "loss": 1.6842, "step": 2542 }, { "epoch": 0.14174237779387994, "grad_norm": 0.5291377305984497, "learning_rate": 9.606386761973641e-05, "loss": 1.778, "step": 2543 }, { "epoch": 0.1417981160470431, "grad_norm": 0.5469574332237244, "learning_rate": 9.606040397652365e-05, "loss": 1.8492, "step": 2544 }, { "epoch": 0.14185385430020622, "grad_norm": 0.5374149084091187, "learning_rate": 9.605693887254343e-05, "loss": 1.8428, "step": 2545 }, { "epoch": 0.14190959255336938, "grad_norm": 0.5556001663208008, "learning_rate": 9.605347230790565e-05, "loss": 1.786, "step": 2546 }, { "epoch": 0.14196533080653254, "grad_norm": 0.5268534421920776, "learning_rate": 9.605000428272023e-05, "loss": 1.5936, "step": 2547 }, { "epoch": 0.14202106905969566, "grad_norm": 0.5348252058029175, "learning_rate": 9.604653479709717e-05, "loss": 1.8033, "step": 2548 }, { "epoch": 0.14207680731285882, "grad_norm": 0.47919270396232605, "learning_rate": 9.60430638511465e-05, "loss": 1.5892, "step": 2549 }, { "epoch": 0.14213254556602195, "grad_norm": 0.5066027045249939, "learning_rate": 9.603959144497827e-05, "loss": 1.6489, "step": 2550 }, { "epoch": 0.1421882838191851, "grad_norm": 0.512729823589325, "learning_rate": 9.603611757870266e-05, "loss": 1.4806, "step": 2551 }, { "epoch": 0.14224402207234826, "grad_norm": 0.5020458102226257, "learning_rate": 9.603264225242978e-05, "loss": 1.7944, "step": 2552 }, { "epoch": 0.1422997603255114, "grad_norm": 0.5788121819496155, "learning_rate": 9.60291654662699e-05, "loss": 1.828, "step": 2553 }, { "epoch": 0.14235549857867455, "grad_norm": 0.5426775217056274, "learning_rate": 9.602568722033326e-05, "loss": 1.8621, "step": 2554 }, { "epoch": 0.14241123683183768, "grad_norm": 0.5158776044845581, "learning_rate": 9.602220751473015e-05, "loss": 1.8829, "step": 2555 }, { "epoch": 0.14246697508500084, "grad_norm": 0.48226305842399597, "learning_rate": 9.601872634957096e-05, "loss": 1.6547, "step": 2556 }, { "epoch": 0.142522713338164, "grad_norm": 0.5081673860549927, "learning_rate": 9.601524372496608e-05, "loss": 1.6629, "step": 2557 }, { "epoch": 0.14257845159132712, "grad_norm": 0.5080944299697876, "learning_rate": 9.601175964102596e-05, "loss": 1.8285, "step": 2558 }, { "epoch": 0.14263418984449028, "grad_norm": 0.5221143364906311, "learning_rate": 9.600827409786107e-05, "loss": 1.9544, "step": 2559 }, { "epoch": 0.1426899280976534, "grad_norm": 0.5045720338821411, "learning_rate": 9.600478709558199e-05, "loss": 1.5243, "step": 2560 }, { "epoch": 0.14274566635081656, "grad_norm": 0.5300230383872986, "learning_rate": 9.600129863429929e-05, "loss": 1.6888, "step": 2561 }, { "epoch": 0.14280140460397972, "grad_norm": 0.5262769460678101, "learning_rate": 9.599780871412359e-05, "loss": 1.8205, "step": 2562 }, { "epoch": 0.14285714285714285, "grad_norm": 0.5437910556793213, "learning_rate": 9.59943173351656e-05, "loss": 1.69, "step": 2563 }, { "epoch": 0.142912881110306, "grad_norm": 0.5781261324882507, "learning_rate": 9.599082449753602e-05, "loss": 1.918, "step": 2564 }, { "epoch": 0.14296861936346916, "grad_norm": 0.5519402623176575, "learning_rate": 9.598733020134562e-05, "loss": 1.7039, "step": 2565 }, { "epoch": 0.1430243576166323, "grad_norm": 0.5874602198600769, "learning_rate": 9.598383444670526e-05, "loss": 1.6948, "step": 2566 }, { "epoch": 0.14308009586979545, "grad_norm": 0.5131939649581909, "learning_rate": 9.598033723372575e-05, "loss": 1.6666, "step": 2567 }, { "epoch": 0.14313583412295858, "grad_norm": 0.6441419124603271, "learning_rate": 9.597683856251804e-05, "loss": 1.9023, "step": 2568 }, { "epoch": 0.14319157237612173, "grad_norm": 0.48139771819114685, "learning_rate": 9.597333843319309e-05, "loss": 1.6297, "step": 2569 }, { "epoch": 0.1432473106292849, "grad_norm": 0.4975999891757965, "learning_rate": 9.596983684586186e-05, "loss": 1.6558, "step": 2570 }, { "epoch": 0.14330304888244802, "grad_norm": 0.5479779839515686, "learning_rate": 9.596633380063544e-05, "loss": 1.78, "step": 2571 }, { "epoch": 0.14335878713561118, "grad_norm": 0.5358686447143555, "learning_rate": 9.596282929762492e-05, "loss": 1.848, "step": 2572 }, { "epoch": 0.1434145253887743, "grad_norm": 0.5355905890464783, "learning_rate": 9.595932333694142e-05, "loss": 1.847, "step": 2573 }, { "epoch": 0.14347026364193746, "grad_norm": 0.5640880465507507, "learning_rate": 9.595581591869616e-05, "loss": 1.713, "step": 2574 }, { "epoch": 0.14352600189510062, "grad_norm": 0.5763548016548157, "learning_rate": 9.595230704300035e-05, "loss": 1.9647, "step": 2575 }, { "epoch": 0.14358174014826375, "grad_norm": 0.5426276922225952, "learning_rate": 9.594879670996528e-05, "loss": 1.7378, "step": 2576 }, { "epoch": 0.1436374784014269, "grad_norm": 0.5128087997436523, "learning_rate": 9.594528491970228e-05, "loss": 1.7663, "step": 2577 }, { "epoch": 0.14369321665459003, "grad_norm": 0.5331497192382812, "learning_rate": 9.594177167232273e-05, "loss": 1.6068, "step": 2578 }, { "epoch": 0.1437489549077532, "grad_norm": 0.5513312220573425, "learning_rate": 9.593825696793803e-05, "loss": 1.6527, "step": 2579 }, { "epoch": 0.14380469316091635, "grad_norm": 0.5069592595100403, "learning_rate": 9.593474080665968e-05, "loss": 1.5839, "step": 2580 }, { "epoch": 0.14386043141407948, "grad_norm": 0.5478212237358093, "learning_rate": 9.593122318859915e-05, "loss": 1.8217, "step": 2581 }, { "epoch": 0.14391616966724263, "grad_norm": 0.5398098230361938, "learning_rate": 9.592770411386802e-05, "loss": 1.8395, "step": 2582 }, { "epoch": 0.14397190792040576, "grad_norm": 0.535152792930603, "learning_rate": 9.592418358257789e-05, "loss": 1.8477, "step": 2583 }, { "epoch": 0.14402764617356892, "grad_norm": 0.5321324467658997, "learning_rate": 9.592066159484043e-05, "loss": 1.6152, "step": 2584 }, { "epoch": 0.14408338442673208, "grad_norm": 0.525637686252594, "learning_rate": 9.59171381507673e-05, "loss": 1.8558, "step": 2585 }, { "epoch": 0.1441391226798952, "grad_norm": 0.5971347689628601, "learning_rate": 9.591361325047028e-05, "loss": 1.8752, "step": 2586 }, { "epoch": 0.14419486093305836, "grad_norm": 0.5029361844062805, "learning_rate": 9.591008689406114e-05, "loss": 1.6977, "step": 2587 }, { "epoch": 0.14425059918622152, "grad_norm": 0.5642208456993103, "learning_rate": 9.59065590816517e-05, "loss": 1.8379, "step": 2588 }, { "epoch": 0.14430633743938465, "grad_norm": 0.5269021391868591, "learning_rate": 9.590302981335387e-05, "loss": 1.98, "step": 2589 }, { "epoch": 0.1443620756925478, "grad_norm": 0.5572815537452698, "learning_rate": 9.589949908927957e-05, "loss": 1.7123, "step": 2590 }, { "epoch": 0.14441781394571093, "grad_norm": 0.5520729422569275, "learning_rate": 9.589596690954077e-05, "loss": 1.8578, "step": 2591 }, { "epoch": 0.1444735521988741, "grad_norm": 0.5181688070297241, "learning_rate": 9.589243327424951e-05, "loss": 1.7641, "step": 2592 }, { "epoch": 0.14452929045203725, "grad_norm": 0.5066071152687073, "learning_rate": 9.588889818351781e-05, "loss": 1.6991, "step": 2593 }, { "epoch": 0.14458502870520037, "grad_norm": 0.5530059933662415, "learning_rate": 9.588536163745782e-05, "loss": 1.7019, "step": 2594 }, { "epoch": 0.14464076695836353, "grad_norm": 0.5519603490829468, "learning_rate": 9.58818236361817e-05, "loss": 1.6645, "step": 2595 }, { "epoch": 0.14469650521152666, "grad_norm": 0.6039948463439941, "learning_rate": 9.587828417980163e-05, "loss": 2.0606, "step": 2596 }, { "epoch": 0.14475224346468982, "grad_norm": 0.5822129845619202, "learning_rate": 9.587474326842987e-05, "loss": 1.8879, "step": 2597 }, { "epoch": 0.14480798171785297, "grad_norm": 0.5391368865966797, "learning_rate": 9.587120090217874e-05, "loss": 1.6668, "step": 2598 }, { "epoch": 0.1448637199710161, "grad_norm": 0.505940854549408, "learning_rate": 9.586765708116056e-05, "loss": 1.6322, "step": 2599 }, { "epoch": 0.14491945822417926, "grad_norm": 0.5613484978675842, "learning_rate": 9.586411180548771e-05, "loss": 1.7002, "step": 2600 }, { "epoch": 0.1449751964773424, "grad_norm": 0.5343160629272461, "learning_rate": 9.586056507527266e-05, "loss": 1.8232, "step": 2601 }, { "epoch": 0.14503093473050555, "grad_norm": 0.5221366286277771, "learning_rate": 9.585701689062785e-05, "loss": 1.7799, "step": 2602 }, { "epoch": 0.1450866729836687, "grad_norm": 0.503301739692688, "learning_rate": 9.585346725166584e-05, "loss": 1.5724, "step": 2603 }, { "epoch": 0.14514241123683183, "grad_norm": 0.5650082230567932, "learning_rate": 9.584991615849921e-05, "loss": 1.898, "step": 2604 }, { "epoch": 0.145198149489995, "grad_norm": 0.4780997633934021, "learning_rate": 9.584636361124054e-05, "loss": 1.5643, "step": 2605 }, { "epoch": 0.14525388774315812, "grad_norm": 0.5057533979415894, "learning_rate": 9.584280961000253e-05, "loss": 1.575, "step": 2606 }, { "epoch": 0.14530962599632127, "grad_norm": 0.530737578868866, "learning_rate": 9.583925415489787e-05, "loss": 1.7932, "step": 2607 }, { "epoch": 0.14536536424948443, "grad_norm": 0.603374719619751, "learning_rate": 9.583569724603934e-05, "loss": 2.0627, "step": 2608 }, { "epoch": 0.14542110250264756, "grad_norm": 0.5549886226654053, "learning_rate": 9.583213888353972e-05, "loss": 1.7767, "step": 2609 }, { "epoch": 0.14547684075581072, "grad_norm": 0.6217805743217468, "learning_rate": 9.582857906751191e-05, "loss": 2.05, "step": 2610 }, { "epoch": 0.14553257900897387, "grad_norm": 0.5606620907783508, "learning_rate": 9.582501779806874e-05, "loss": 1.7722, "step": 2611 }, { "epoch": 0.145588317262137, "grad_norm": 0.5387722253799438, "learning_rate": 9.582145507532319e-05, "loss": 1.6958, "step": 2612 }, { "epoch": 0.14564405551530016, "grad_norm": 0.557847797870636, "learning_rate": 9.581789089938825e-05, "loss": 1.8401, "step": 2613 }, { "epoch": 0.1456997937684633, "grad_norm": 0.5201898217201233, "learning_rate": 9.581432527037693e-05, "loss": 1.7684, "step": 2614 }, { "epoch": 0.14575553202162644, "grad_norm": 0.5138794183731079, "learning_rate": 9.581075818840234e-05, "loss": 1.7435, "step": 2615 }, { "epoch": 0.1458112702747896, "grad_norm": 0.5721390247344971, "learning_rate": 9.58071896535776e-05, "loss": 1.8191, "step": 2616 }, { "epoch": 0.14586700852795273, "grad_norm": 0.5593292117118835, "learning_rate": 9.580361966601588e-05, "loss": 1.877, "step": 2617 }, { "epoch": 0.1459227467811159, "grad_norm": 0.5009481906890869, "learning_rate": 9.580004822583038e-05, "loss": 1.6282, "step": 2618 }, { "epoch": 0.14597848503427902, "grad_norm": 0.4969474673271179, "learning_rate": 9.579647533313439e-05, "loss": 1.7076, "step": 2619 }, { "epoch": 0.14603422328744217, "grad_norm": 0.5316969156265259, "learning_rate": 9.579290098804122e-05, "loss": 1.6271, "step": 2620 }, { "epoch": 0.14608996154060533, "grad_norm": 0.5574962496757507, "learning_rate": 9.578932519066422e-05, "loss": 1.8687, "step": 2621 }, { "epoch": 0.14614569979376846, "grad_norm": 0.499491423368454, "learning_rate": 9.57857479411168e-05, "loss": 1.6985, "step": 2622 }, { "epoch": 0.14620143804693161, "grad_norm": 0.654602587223053, "learning_rate": 9.57821692395124e-05, "loss": 1.7291, "step": 2623 }, { "epoch": 0.14625717630009474, "grad_norm": 0.5459001660346985, "learning_rate": 9.577858908596451e-05, "loss": 1.729, "step": 2624 }, { "epoch": 0.1463129145532579, "grad_norm": 0.5157297849655151, "learning_rate": 9.57750074805867e-05, "loss": 1.4164, "step": 2625 }, { "epoch": 0.14636865280642106, "grad_norm": 0.5205078125, "learning_rate": 9.577142442349254e-05, "loss": 1.7282, "step": 2626 }, { "epoch": 0.14642439105958419, "grad_norm": 0.563706636428833, "learning_rate": 9.576783991479565e-05, "loss": 1.8092, "step": 2627 }, { "epoch": 0.14648012931274734, "grad_norm": 0.5385141968727112, "learning_rate": 9.576425395460973e-05, "loss": 1.8241, "step": 2628 }, { "epoch": 0.14653586756591047, "grad_norm": 0.6100838780403137, "learning_rate": 9.576066654304849e-05, "loss": 1.9425, "step": 2629 }, { "epoch": 0.14659160581907363, "grad_norm": 0.5153439044952393, "learning_rate": 9.575707768022572e-05, "loss": 1.4287, "step": 2630 }, { "epoch": 0.14664734407223678, "grad_norm": 0.5562304258346558, "learning_rate": 9.575348736625523e-05, "loss": 1.9308, "step": 2631 }, { "epoch": 0.14670308232539991, "grad_norm": 0.5785409808158875, "learning_rate": 9.574989560125087e-05, "loss": 1.8831, "step": 2632 }, { "epoch": 0.14675882057856307, "grad_norm": 0.5315858721733093, "learning_rate": 9.574630238532658e-05, "loss": 1.5871, "step": 2633 }, { "epoch": 0.14681455883172623, "grad_norm": 0.5748802423477173, "learning_rate": 9.574270771859628e-05, "loss": 1.8394, "step": 2634 }, { "epoch": 0.14687029708488936, "grad_norm": 0.5130333304405212, "learning_rate": 9.5739111601174e-05, "loss": 1.8598, "step": 2635 }, { "epoch": 0.1469260353380525, "grad_norm": 0.5098990201950073, "learning_rate": 9.573551403317378e-05, "loss": 1.5862, "step": 2636 }, { "epoch": 0.14698177359121564, "grad_norm": 0.5426929593086243, "learning_rate": 9.573191501470971e-05, "loss": 1.8026, "step": 2637 }, { "epoch": 0.1470375118443788, "grad_norm": 0.5652133226394653, "learning_rate": 9.572831454589592e-05, "loss": 1.7529, "step": 2638 }, { "epoch": 0.14709325009754196, "grad_norm": 0.5370623469352722, "learning_rate": 9.572471262684662e-05, "loss": 1.7851, "step": 2639 }, { "epoch": 0.14714898835070508, "grad_norm": 0.5871500372886658, "learning_rate": 9.572110925767601e-05, "loss": 1.7617, "step": 2640 }, { "epoch": 0.14720472660386824, "grad_norm": 0.5181992053985596, "learning_rate": 9.571750443849841e-05, "loss": 1.6418, "step": 2641 }, { "epoch": 0.14726046485703137, "grad_norm": 0.5635068416595459, "learning_rate": 9.571389816942811e-05, "loss": 2.0309, "step": 2642 }, { "epoch": 0.14731620311019453, "grad_norm": 0.5830138921737671, "learning_rate": 9.571029045057948e-05, "loss": 1.8764, "step": 2643 }, { "epoch": 0.14737194136335768, "grad_norm": 0.5109788179397583, "learning_rate": 9.570668128206697e-05, "loss": 1.6183, "step": 2644 }, { "epoch": 0.1474276796165208, "grad_norm": 0.5681736469268799, "learning_rate": 9.5703070664005e-05, "loss": 1.738, "step": 2645 }, { "epoch": 0.14748341786968397, "grad_norm": 0.5385489463806152, "learning_rate": 9.56994585965081e-05, "loss": 1.7379, "step": 2646 }, { "epoch": 0.1475391561228471, "grad_norm": 0.5935365557670593, "learning_rate": 9.569584507969082e-05, "loss": 1.6596, "step": 2647 }, { "epoch": 0.14759489437601025, "grad_norm": 0.5758340358734131, "learning_rate": 9.569223011366776e-05, "loss": 1.7998, "step": 2648 }, { "epoch": 0.1476506326291734, "grad_norm": 0.5150250196456909, "learning_rate": 9.568861369855357e-05, "loss": 1.5843, "step": 2649 }, { "epoch": 0.14770637088233654, "grad_norm": 0.549801230430603, "learning_rate": 9.568499583446293e-05, "loss": 1.6966, "step": 2650 }, { "epoch": 0.1477621091354997, "grad_norm": 0.5092233419418335, "learning_rate": 9.568137652151059e-05, "loss": 1.7318, "step": 2651 }, { "epoch": 0.14781784738866283, "grad_norm": 0.5549139976501465, "learning_rate": 9.567775575981133e-05, "loss": 1.8252, "step": 2652 }, { "epoch": 0.14787358564182598, "grad_norm": 0.5805264115333557, "learning_rate": 9.567413354947997e-05, "loss": 1.8455, "step": 2653 }, { "epoch": 0.14792932389498914, "grad_norm": 0.5241934657096863, "learning_rate": 9.56705098906314e-05, "loss": 1.8003, "step": 2654 }, { "epoch": 0.14798506214815227, "grad_norm": 0.5738681554794312, "learning_rate": 9.566688478338053e-05, "loss": 1.765, "step": 2655 }, { "epoch": 0.14804080040131543, "grad_norm": 0.5123993158340454, "learning_rate": 9.566325822784232e-05, "loss": 1.686, "step": 2656 }, { "epoch": 0.14809653865447858, "grad_norm": 0.5327409505844116, "learning_rate": 9.56596302241318e-05, "loss": 1.9386, "step": 2657 }, { "epoch": 0.1481522769076417, "grad_norm": 0.4922872483730316, "learning_rate": 9.565600077236403e-05, "loss": 1.6464, "step": 2658 }, { "epoch": 0.14820801516080487, "grad_norm": 0.5839138031005859, "learning_rate": 9.565236987265411e-05, "loss": 2.0237, "step": 2659 }, { "epoch": 0.148263753413968, "grad_norm": 0.5407429933547974, "learning_rate": 9.564873752511718e-05, "loss": 1.9181, "step": 2660 }, { "epoch": 0.14831949166713115, "grad_norm": 0.5354205369949341, "learning_rate": 9.564510372986845e-05, "loss": 1.9004, "step": 2661 }, { "epoch": 0.1483752299202943, "grad_norm": 0.517620325088501, "learning_rate": 9.564146848702316e-05, "loss": 1.4634, "step": 2662 }, { "epoch": 0.14843096817345744, "grad_norm": 0.513761579990387, "learning_rate": 9.56378317966966e-05, "loss": 1.7994, "step": 2663 }, { "epoch": 0.1484867064266206, "grad_norm": 0.520189642906189, "learning_rate": 9.56341936590041e-05, "loss": 1.493, "step": 2664 }, { "epoch": 0.14854244467978373, "grad_norm": 0.5256882905960083, "learning_rate": 9.563055407406104e-05, "loss": 1.747, "step": 2665 }, { "epoch": 0.14859818293294688, "grad_norm": 0.5171797871589661, "learning_rate": 9.562691304198286e-05, "loss": 1.7043, "step": 2666 }, { "epoch": 0.14865392118611004, "grad_norm": 0.5845912098884583, "learning_rate": 9.5623270562885e-05, "loss": 1.8348, "step": 2667 }, { "epoch": 0.14870965943927317, "grad_norm": 0.5168249011039734, "learning_rate": 9.561962663688302e-05, "loss": 1.5255, "step": 2668 }, { "epoch": 0.14876539769243632, "grad_norm": 0.5021228790283203, "learning_rate": 9.561598126409245e-05, "loss": 1.5113, "step": 2669 }, { "epoch": 0.14882113594559945, "grad_norm": 0.5029981732368469, "learning_rate": 9.561233444462894e-05, "loss": 1.5927, "step": 2670 }, { "epoch": 0.1488768741987626, "grad_norm": 0.5585193634033203, "learning_rate": 9.56086861786081e-05, "loss": 1.9007, "step": 2671 }, { "epoch": 0.14893261245192577, "grad_norm": 0.4993244409561157, "learning_rate": 9.560503646614564e-05, "loss": 1.5592, "step": 2672 }, { "epoch": 0.1489883507050889, "grad_norm": 0.4925285875797272, "learning_rate": 9.560138530735734e-05, "loss": 1.5822, "step": 2673 }, { "epoch": 0.14904408895825205, "grad_norm": 0.5714946985244751, "learning_rate": 9.559773270235896e-05, "loss": 1.703, "step": 2674 }, { "epoch": 0.14909982721141518, "grad_norm": 0.5588274598121643, "learning_rate": 9.559407865126636e-05, "loss": 1.7473, "step": 2675 }, { "epoch": 0.14915556546457834, "grad_norm": 0.5327757000923157, "learning_rate": 9.559042315419542e-05, "loss": 1.6382, "step": 2676 }, { "epoch": 0.1492113037177415, "grad_norm": 0.5377374887466431, "learning_rate": 9.558676621126206e-05, "loss": 1.7602, "step": 2677 }, { "epoch": 0.14926704197090462, "grad_norm": 0.5468077659606934, "learning_rate": 9.558310782258227e-05, "loss": 1.7686, "step": 2678 }, { "epoch": 0.14932278022406778, "grad_norm": 0.5344017744064331, "learning_rate": 9.557944798827205e-05, "loss": 1.6661, "step": 2679 }, { "epoch": 0.14937851847723094, "grad_norm": 0.5011274218559265, "learning_rate": 9.557578670844751e-05, "loss": 1.6757, "step": 2680 }, { "epoch": 0.14943425673039407, "grad_norm": 0.5330647826194763, "learning_rate": 9.557212398322473e-05, "loss": 1.8146, "step": 2681 }, { "epoch": 0.14948999498355722, "grad_norm": 0.5211254954338074, "learning_rate": 9.556845981271989e-05, "loss": 1.7437, "step": 2682 }, { "epoch": 0.14954573323672035, "grad_norm": 0.603344738483429, "learning_rate": 9.556479419704918e-05, "loss": 2.0424, "step": 2683 }, { "epoch": 0.1496014714898835, "grad_norm": 0.5117289423942566, "learning_rate": 9.556112713632885e-05, "loss": 1.6523, "step": 2684 }, { "epoch": 0.14965720974304667, "grad_norm": 0.5624164938926697, "learning_rate": 9.555745863067522e-05, "loss": 1.8348, "step": 2685 }, { "epoch": 0.1497129479962098, "grad_norm": 0.4994141459465027, "learning_rate": 9.555378868020461e-05, "loss": 1.6003, "step": 2686 }, { "epoch": 0.14976868624937295, "grad_norm": 0.5267731547355652, "learning_rate": 9.555011728503343e-05, "loss": 1.6412, "step": 2687 }, { "epoch": 0.14982442450253608, "grad_norm": 0.4905613958835602, "learning_rate": 9.554644444527812e-05, "loss": 1.6397, "step": 2688 }, { "epoch": 0.14988016275569924, "grad_norm": 0.5710086226463318, "learning_rate": 9.554277016105512e-05, "loss": 2.0408, "step": 2689 }, { "epoch": 0.1499359010088624, "grad_norm": 0.5375673770904541, "learning_rate": 9.5539094432481e-05, "loss": 1.7599, "step": 2690 }, { "epoch": 0.14999163926202552, "grad_norm": 0.5491001009941101, "learning_rate": 9.55354172596723e-05, "loss": 1.6704, "step": 2691 }, { "epoch": 0.15004737751518868, "grad_norm": 0.5431581139564514, "learning_rate": 9.553173864274567e-05, "loss": 1.7792, "step": 2692 }, { "epoch": 0.1501031157683518, "grad_norm": 0.5338147282600403, "learning_rate": 9.552805858181775e-05, "loss": 1.7461, "step": 2693 }, { "epoch": 0.15015885402151496, "grad_norm": 0.5207554697990417, "learning_rate": 9.552437707700526e-05, "loss": 1.7735, "step": 2694 }, { "epoch": 0.15021459227467812, "grad_norm": 0.515975296497345, "learning_rate": 9.552069412842495e-05, "loss": 1.6318, "step": 2695 }, { "epoch": 0.15027033052784125, "grad_norm": 0.5207625031471252, "learning_rate": 9.551700973619364e-05, "loss": 1.665, "step": 2696 }, { "epoch": 0.1503260687810044, "grad_norm": 0.5158435702323914, "learning_rate": 9.551332390042816e-05, "loss": 1.743, "step": 2697 }, { "epoch": 0.15038180703416754, "grad_norm": 0.5647339224815369, "learning_rate": 9.55096366212454e-05, "loss": 1.9245, "step": 2698 }, { "epoch": 0.1504375452873307, "grad_norm": 0.545265793800354, "learning_rate": 9.55059478987623e-05, "loss": 1.5553, "step": 2699 }, { "epoch": 0.15049328354049385, "grad_norm": 0.5328176617622375, "learning_rate": 9.550225773309586e-05, "loss": 1.4489, "step": 2700 }, { "epoch": 0.15054902179365698, "grad_norm": 0.5154641270637512, "learning_rate": 9.54985661243631e-05, "loss": 1.9052, "step": 2701 }, { "epoch": 0.15060476004682014, "grad_norm": 0.5019435286521912, "learning_rate": 9.54948730726811e-05, "loss": 1.5049, "step": 2702 }, { "epoch": 0.1506604982999833, "grad_norm": 0.557501494884491, "learning_rate": 9.549117857816697e-05, "loss": 1.8818, "step": 2703 }, { "epoch": 0.15071623655314642, "grad_norm": 0.5352375507354736, "learning_rate": 9.548748264093789e-05, "loss": 1.6683, "step": 2704 }, { "epoch": 0.15077197480630958, "grad_norm": 0.5106709599494934, "learning_rate": 9.548378526111108e-05, "loss": 1.6966, "step": 2705 }, { "epoch": 0.1508277130594727, "grad_norm": 0.5565862655639648, "learning_rate": 9.54800864388038e-05, "loss": 1.8303, "step": 2706 }, { "epoch": 0.15088345131263586, "grad_norm": 0.5492972135543823, "learning_rate": 9.547638617413333e-05, "loss": 1.8624, "step": 2707 }, { "epoch": 0.15093918956579902, "grad_norm": 0.50017249584198, "learning_rate": 9.547268446721702e-05, "loss": 1.5654, "step": 2708 }, { "epoch": 0.15099492781896215, "grad_norm": 0.48998236656188965, "learning_rate": 9.54689813181723e-05, "loss": 1.6074, "step": 2709 }, { "epoch": 0.1510506660721253, "grad_norm": 0.5397832989692688, "learning_rate": 9.54652767271166e-05, "loss": 1.8095, "step": 2710 }, { "epoch": 0.15110640432528843, "grad_norm": 0.5553854703903198, "learning_rate": 9.54615706941674e-05, "loss": 1.8065, "step": 2711 }, { "epoch": 0.1511621425784516, "grad_norm": 0.5286390781402588, "learning_rate": 9.545786321944223e-05, "loss": 1.5857, "step": 2712 }, { "epoch": 0.15121788083161475, "grad_norm": 0.4900679588317871, "learning_rate": 9.545415430305869e-05, "loss": 1.5847, "step": 2713 }, { "epoch": 0.15127361908477788, "grad_norm": 0.5456913113594055, "learning_rate": 9.545044394513439e-05, "loss": 1.7911, "step": 2714 }, { "epoch": 0.15132935733794103, "grad_norm": 0.5544347763061523, "learning_rate": 9.544673214578698e-05, "loss": 1.7341, "step": 2715 }, { "epoch": 0.15138509559110416, "grad_norm": 0.5260149836540222, "learning_rate": 9.544301890513423e-05, "loss": 1.6531, "step": 2716 }, { "epoch": 0.15144083384426732, "grad_norm": 0.5473960638046265, "learning_rate": 9.543930422329386e-05, "loss": 1.7704, "step": 2717 }, { "epoch": 0.15149657209743048, "grad_norm": 0.5335630178451538, "learning_rate": 9.543558810038368e-05, "loss": 1.6427, "step": 2718 }, { "epoch": 0.1515523103505936, "grad_norm": 0.558547854423523, "learning_rate": 9.543187053652156e-05, "loss": 1.9572, "step": 2719 }, { "epoch": 0.15160804860375676, "grad_norm": 0.5423372983932495, "learning_rate": 9.54281515318254e-05, "loss": 1.6761, "step": 2720 }, { "epoch": 0.1516637868569199, "grad_norm": 0.5132402181625366, "learning_rate": 9.542443108641312e-05, "loss": 1.8216, "step": 2721 }, { "epoch": 0.15171952511008305, "grad_norm": 0.491897314786911, "learning_rate": 9.542070920040274e-05, "loss": 1.5411, "step": 2722 }, { "epoch": 0.1517752633632462, "grad_norm": 0.5645871758460999, "learning_rate": 9.541698587391229e-05, "loss": 1.848, "step": 2723 }, { "epoch": 0.15183100161640933, "grad_norm": 0.5238233208656311, "learning_rate": 9.541326110705983e-05, "loss": 1.7717, "step": 2724 }, { "epoch": 0.1518867398695725, "grad_norm": 0.5333484411239624, "learning_rate": 9.540953489996354e-05, "loss": 1.6865, "step": 2725 }, { "epoch": 0.15194247812273565, "grad_norm": 0.5394174456596375, "learning_rate": 9.540580725274153e-05, "loss": 1.7526, "step": 2726 }, { "epoch": 0.15199821637589878, "grad_norm": 0.5119402408599854, "learning_rate": 9.540207816551206e-05, "loss": 1.7543, "step": 2727 }, { "epoch": 0.15205395462906193, "grad_norm": 0.4968518912792206, "learning_rate": 9.539834763839337e-05, "loss": 1.4261, "step": 2728 }, { "epoch": 0.15210969288222506, "grad_norm": 0.5909052491188049, "learning_rate": 9.539461567150378e-05, "loss": 1.9545, "step": 2729 }, { "epoch": 0.15216543113538822, "grad_norm": 0.5353077054023743, "learning_rate": 9.539088226496167e-05, "loss": 1.7021, "step": 2730 }, { "epoch": 0.15222116938855138, "grad_norm": 0.526706874370575, "learning_rate": 9.538714741888541e-05, "loss": 1.7132, "step": 2731 }, { "epoch": 0.1522769076417145, "grad_norm": 0.5296183228492737, "learning_rate": 9.538341113339346e-05, "loss": 1.6896, "step": 2732 }, { "epoch": 0.15233264589487766, "grad_norm": 0.5836046934127808, "learning_rate": 9.537967340860432e-05, "loss": 1.7815, "step": 2733 }, { "epoch": 0.1523883841480408, "grad_norm": 0.5508841872215271, "learning_rate": 9.537593424463651e-05, "loss": 1.8918, "step": 2734 }, { "epoch": 0.15244412240120395, "grad_norm": 0.522796630859375, "learning_rate": 9.537219364160863e-05, "loss": 1.7225, "step": 2735 }, { "epoch": 0.1524998606543671, "grad_norm": 0.48475125432014465, "learning_rate": 9.536845159963932e-05, "loss": 1.5232, "step": 2736 }, { "epoch": 0.15255559890753023, "grad_norm": 0.5141192674636841, "learning_rate": 9.536470811884723e-05, "loss": 1.8193, "step": 2737 }, { "epoch": 0.1526113371606934, "grad_norm": 0.5721970796585083, "learning_rate": 9.536096319935108e-05, "loss": 1.9167, "step": 2738 }, { "epoch": 0.15266707541385652, "grad_norm": 0.53280109167099, "learning_rate": 9.535721684126967e-05, "loss": 1.8613, "step": 2739 }, { "epoch": 0.15272281366701967, "grad_norm": 0.5099390745162964, "learning_rate": 9.535346904472177e-05, "loss": 1.6646, "step": 2740 }, { "epoch": 0.15277855192018283, "grad_norm": 0.8719338774681091, "learning_rate": 9.53497198098263e-05, "loss": 1.7495, "step": 2741 }, { "epoch": 0.15283429017334596, "grad_norm": 0.6453019380569458, "learning_rate": 9.53459691367021e-05, "loss": 1.9952, "step": 2742 }, { "epoch": 0.15289002842650912, "grad_norm": 0.5782769322395325, "learning_rate": 9.534221702546814e-05, "loss": 1.9164, "step": 2743 }, { "epoch": 0.15294576667967225, "grad_norm": 0.4970633387565613, "learning_rate": 9.533846347624343e-05, "loss": 1.7106, "step": 2744 }, { "epoch": 0.1530015049328354, "grad_norm": 0.5226539373397827, "learning_rate": 9.533470848914698e-05, "loss": 1.6197, "step": 2745 }, { "epoch": 0.15305724318599856, "grad_norm": 0.5139595866203308, "learning_rate": 9.533095206429792e-05, "loss": 1.7638, "step": 2746 }, { "epoch": 0.1531129814391617, "grad_norm": 0.5007668733596802, "learning_rate": 9.532719420181535e-05, "loss": 1.5744, "step": 2747 }, { "epoch": 0.15316871969232485, "grad_norm": 0.5414915084838867, "learning_rate": 9.532343490181845e-05, "loss": 1.748, "step": 2748 }, { "epoch": 0.153224457945488, "grad_norm": 0.6250778436660767, "learning_rate": 9.531967416442646e-05, "loss": 1.8845, "step": 2749 }, { "epoch": 0.15328019619865113, "grad_norm": 0.5204728245735168, "learning_rate": 9.531591198975863e-05, "loss": 1.7691, "step": 2750 }, { "epoch": 0.1533359344518143, "grad_norm": 0.5631746649742126, "learning_rate": 9.531214837793429e-05, "loss": 1.6964, "step": 2751 }, { "epoch": 0.15339167270497742, "grad_norm": 0.49102160334587097, "learning_rate": 9.530838332907278e-05, "loss": 1.6693, "step": 2752 }, { "epoch": 0.15344741095814057, "grad_norm": 0.5530296564102173, "learning_rate": 9.530461684329352e-05, "loss": 1.932, "step": 2753 }, { "epoch": 0.15350314921130373, "grad_norm": 0.4979936480522156, "learning_rate": 9.530084892071596e-05, "loss": 1.6084, "step": 2754 }, { "epoch": 0.15355888746446686, "grad_norm": 0.5499585270881653, "learning_rate": 9.52970795614596e-05, "loss": 1.8431, "step": 2755 }, { "epoch": 0.15361462571763002, "grad_norm": 0.5399606227874756, "learning_rate": 9.529330876564398e-05, "loss": 1.7747, "step": 2756 }, { "epoch": 0.15367036397079314, "grad_norm": 0.5473707914352417, "learning_rate": 9.528953653338867e-05, "loss": 1.7633, "step": 2757 }, { "epoch": 0.1537261022239563, "grad_norm": 0.5312392711639404, "learning_rate": 9.528576286481332e-05, "loss": 1.7155, "step": 2758 }, { "epoch": 0.15378184047711946, "grad_norm": 0.5812214016914368, "learning_rate": 9.52819877600376e-05, "loss": 1.7427, "step": 2759 }, { "epoch": 0.1538375787302826, "grad_norm": 0.5881000757217407, "learning_rate": 9.527821121918126e-05, "loss": 1.9338, "step": 2760 }, { "epoch": 0.15389331698344574, "grad_norm": 0.4990249574184418, "learning_rate": 9.527443324236403e-05, "loss": 1.6865, "step": 2761 }, { "epoch": 0.15394905523660887, "grad_norm": 0.5099406242370605, "learning_rate": 9.527065382970576e-05, "loss": 1.4843, "step": 2762 }, { "epoch": 0.15400479348977203, "grad_norm": 0.555368959903717, "learning_rate": 9.52668729813263e-05, "loss": 1.7174, "step": 2763 }, { "epoch": 0.1540605317429352, "grad_norm": 0.5384423136711121, "learning_rate": 9.526309069734553e-05, "loss": 1.8855, "step": 2764 }, { "epoch": 0.15411626999609832, "grad_norm": 0.5143032073974609, "learning_rate": 9.525930697788345e-05, "loss": 1.7095, "step": 2765 }, { "epoch": 0.15417200824926147, "grad_norm": 0.4992869794368744, "learning_rate": 9.525552182306003e-05, "loss": 1.5436, "step": 2766 }, { "epoch": 0.1542277465024246, "grad_norm": 0.5122644901275635, "learning_rate": 9.525173523299531e-05, "loss": 1.8488, "step": 2767 }, { "epoch": 0.15428348475558776, "grad_norm": 0.49027514457702637, "learning_rate": 9.524794720780938e-05, "loss": 1.6764, "step": 2768 }, { "epoch": 0.15433922300875091, "grad_norm": 0.5170779824256897, "learning_rate": 9.524415774762239e-05, "loss": 1.7393, "step": 2769 }, { "epoch": 0.15439496126191404, "grad_norm": 0.5226306319236755, "learning_rate": 9.52403668525545e-05, "loss": 1.6587, "step": 2770 }, { "epoch": 0.1544506995150772, "grad_norm": 0.5146019458770752, "learning_rate": 9.523657452272594e-05, "loss": 1.5704, "step": 2771 }, { "epoch": 0.15450643776824036, "grad_norm": 0.5141226649284363, "learning_rate": 9.5232780758257e-05, "loss": 1.6701, "step": 2772 }, { "epoch": 0.15456217602140349, "grad_norm": 0.5106475353240967, "learning_rate": 9.522898555926796e-05, "loss": 1.7997, "step": 2773 }, { "epoch": 0.15461791427456664, "grad_norm": 0.4933443069458008, "learning_rate": 9.52251889258792e-05, "loss": 1.4629, "step": 2774 }, { "epoch": 0.15467365252772977, "grad_norm": 0.547154426574707, "learning_rate": 9.522139085821113e-05, "loss": 1.7481, "step": 2775 }, { "epoch": 0.15472939078089293, "grad_norm": 0.5420608520507812, "learning_rate": 9.521759135638422e-05, "loss": 1.781, "step": 2776 }, { "epoch": 0.15478512903405608, "grad_norm": 0.5556414723396301, "learning_rate": 9.521379042051894e-05, "loss": 1.5232, "step": 2777 }, { "epoch": 0.1548408672872192, "grad_norm": 0.546357274055481, "learning_rate": 9.520998805073584e-05, "loss": 1.663, "step": 2778 }, { "epoch": 0.15489660554038237, "grad_norm": 0.5195935964584351, "learning_rate": 9.52061842471555e-05, "loss": 1.632, "step": 2779 }, { "epoch": 0.1549523437935455, "grad_norm": 0.5412857532501221, "learning_rate": 9.520237900989858e-05, "loss": 1.7983, "step": 2780 }, { "epoch": 0.15500808204670866, "grad_norm": 0.5480208992958069, "learning_rate": 9.519857233908574e-05, "loss": 2.0205, "step": 2781 }, { "epoch": 0.1550638202998718, "grad_norm": 0.5754556655883789, "learning_rate": 9.519476423483771e-05, "loss": 1.9992, "step": 2782 }, { "epoch": 0.15511955855303494, "grad_norm": 0.560160756111145, "learning_rate": 9.519095469727527e-05, "loss": 1.8583, "step": 2783 }, { "epoch": 0.1551752968061981, "grad_norm": 0.5757945775985718, "learning_rate": 9.518714372651922e-05, "loss": 1.9257, "step": 2784 }, { "epoch": 0.15523103505936123, "grad_norm": 0.861761212348938, "learning_rate": 9.518333132269043e-05, "loss": 1.8291, "step": 2785 }, { "epoch": 0.15528677331252438, "grad_norm": 0.5081753134727478, "learning_rate": 9.517951748590983e-05, "loss": 1.5859, "step": 2786 }, { "epoch": 0.15534251156568754, "grad_norm": 0.5519318580627441, "learning_rate": 9.517570221629833e-05, "loss": 1.7556, "step": 2787 }, { "epoch": 0.15539824981885067, "grad_norm": 0.5754350423812866, "learning_rate": 9.517188551397695e-05, "loss": 1.8201, "step": 2788 }, { "epoch": 0.15545398807201383, "grad_norm": 0.5522143840789795, "learning_rate": 9.516806737906674e-05, "loss": 1.7392, "step": 2789 }, { "epoch": 0.15550972632517696, "grad_norm": 0.5845313668251038, "learning_rate": 9.516424781168877e-05, "loss": 1.7216, "step": 2790 }, { "epoch": 0.1555654645783401, "grad_norm": 0.57271808385849, "learning_rate": 9.516042681196419e-05, "loss": 1.561, "step": 2791 }, { "epoch": 0.15562120283150327, "grad_norm": 0.5778896808624268, "learning_rate": 9.515660438001417e-05, "loss": 2.061, "step": 2792 }, { "epoch": 0.1556769410846664, "grad_norm": 0.5089336633682251, "learning_rate": 9.515278051595996e-05, "loss": 1.5716, "step": 2793 }, { "epoch": 0.15573267933782955, "grad_norm": 0.5174574255943298, "learning_rate": 9.514895521992278e-05, "loss": 1.5369, "step": 2794 }, { "epoch": 0.1557884175909927, "grad_norm": 0.5474531650543213, "learning_rate": 9.5145128492024e-05, "loss": 1.9497, "step": 2795 }, { "epoch": 0.15584415584415584, "grad_norm": 0.5397194027900696, "learning_rate": 9.514130033238494e-05, "loss": 1.7145, "step": 2796 }, { "epoch": 0.155899894097319, "grad_norm": 0.5489051938056946, "learning_rate": 9.513747074112705e-05, "loss": 1.599, "step": 2797 }, { "epoch": 0.15595563235048213, "grad_norm": 0.5342767834663391, "learning_rate": 9.513363971837174e-05, "loss": 1.6787, "step": 2798 }, { "epoch": 0.15601137060364528, "grad_norm": 0.5298926830291748, "learning_rate": 9.512980726424052e-05, "loss": 1.6852, "step": 2799 }, { "epoch": 0.15606710885680844, "grad_norm": 0.5444782376289368, "learning_rate": 9.512597337885496e-05, "loss": 1.6972, "step": 2800 }, { "epoch": 0.15612284710997157, "grad_norm": 0.5541877150535583, "learning_rate": 9.51221380623366e-05, "loss": 1.6794, "step": 2801 }, { "epoch": 0.15617858536313473, "grad_norm": 0.6140812039375305, "learning_rate": 9.511830131480712e-05, "loss": 1.6826, "step": 2802 }, { "epoch": 0.15623432361629785, "grad_norm": 0.5042434930801392, "learning_rate": 9.511446313638819e-05, "loss": 1.6276, "step": 2803 }, { "epoch": 0.156290061869461, "grad_norm": 0.5544094443321228, "learning_rate": 9.51106235272015e-05, "loss": 1.7685, "step": 2804 }, { "epoch": 0.15634580012262417, "grad_norm": 0.49621298909187317, "learning_rate": 9.510678248736887e-05, "loss": 1.6194, "step": 2805 }, { "epoch": 0.1564015383757873, "grad_norm": 0.5988842248916626, "learning_rate": 9.510294001701208e-05, "loss": 1.8121, "step": 2806 }, { "epoch": 0.15645727662895045, "grad_norm": 0.5324400067329407, "learning_rate": 9.509909611625298e-05, "loss": 1.7674, "step": 2807 }, { "epoch": 0.15651301488211358, "grad_norm": 0.5413124561309814, "learning_rate": 9.509525078521353e-05, "loss": 1.5738, "step": 2808 }, { "epoch": 0.15656875313527674, "grad_norm": 0.5253452658653259, "learning_rate": 9.509140402401563e-05, "loss": 1.7126, "step": 2809 }, { "epoch": 0.1566244913884399, "grad_norm": 0.5672581791877747, "learning_rate": 9.508755583278131e-05, "loss": 1.8056, "step": 2810 }, { "epoch": 0.15668022964160302, "grad_norm": 0.49362093210220337, "learning_rate": 9.508370621163259e-05, "loss": 1.7569, "step": 2811 }, { "epoch": 0.15673596789476618, "grad_norm": 0.5672383308410645, "learning_rate": 9.507985516069154e-05, "loss": 2.0115, "step": 2812 }, { "epoch": 0.1567917061479293, "grad_norm": 0.576835036277771, "learning_rate": 9.507600268008034e-05, "loss": 2.0173, "step": 2813 }, { "epoch": 0.15684744440109247, "grad_norm": 0.5514403581619263, "learning_rate": 9.507214876992116e-05, "loss": 1.711, "step": 2814 }, { "epoch": 0.15690318265425562, "grad_norm": 0.5197775363922119, "learning_rate": 9.506829343033619e-05, "loss": 1.7613, "step": 2815 }, { "epoch": 0.15695892090741875, "grad_norm": 0.5949315428733826, "learning_rate": 9.506443666144773e-05, "loss": 1.9146, "step": 2816 }, { "epoch": 0.1570146591605819, "grad_norm": 0.5169588923454285, "learning_rate": 9.506057846337808e-05, "loss": 1.5925, "step": 2817 }, { "epoch": 0.15707039741374507, "grad_norm": 0.5083977580070496, "learning_rate": 9.505671883624959e-05, "loss": 1.7269, "step": 2818 }, { "epoch": 0.1571261356669082, "grad_norm": 0.5890203714370728, "learning_rate": 9.505285778018469e-05, "loss": 1.9239, "step": 2819 }, { "epoch": 0.15718187392007135, "grad_norm": 0.5113581418991089, "learning_rate": 9.504899529530582e-05, "loss": 1.4883, "step": 2820 }, { "epoch": 0.15723761217323448, "grad_norm": 0.5035502314567566, "learning_rate": 9.504513138173547e-05, "loss": 1.5673, "step": 2821 }, { "epoch": 0.15729335042639764, "grad_norm": 0.5176184773445129, "learning_rate": 9.504126603959618e-05, "loss": 1.492, "step": 2822 }, { "epoch": 0.1573490886795608, "grad_norm": 0.5595249533653259, "learning_rate": 9.503739926901055e-05, "loss": 1.916, "step": 2823 }, { "epoch": 0.15740482693272392, "grad_norm": 0.5306408405303955, "learning_rate": 9.50335310701012e-05, "loss": 1.8255, "step": 2824 }, { "epoch": 0.15746056518588708, "grad_norm": 0.5166139602661133, "learning_rate": 9.50296614429908e-05, "loss": 1.9614, "step": 2825 }, { "epoch": 0.1575163034390502, "grad_norm": 0.5143607258796692, "learning_rate": 9.502579038780207e-05, "loss": 1.5858, "step": 2826 }, { "epoch": 0.15757204169221337, "grad_norm": 0.5186240673065186, "learning_rate": 9.50219179046578e-05, "loss": 1.6746, "step": 2827 }, { "epoch": 0.15762777994537652, "grad_norm": 0.5193765759468079, "learning_rate": 9.50180439936808e-05, "loss": 1.5768, "step": 2828 }, { "epoch": 0.15768351819853965, "grad_norm": 0.5847373604774475, "learning_rate": 9.501416865499391e-05, "loss": 2.0199, "step": 2829 }, { "epoch": 0.1577392564517028, "grad_norm": 0.5198137760162354, "learning_rate": 9.501029188872004e-05, "loss": 1.6215, "step": 2830 }, { "epoch": 0.15779499470486594, "grad_norm": 0.5044419169425964, "learning_rate": 9.500641369498214e-05, "loss": 1.6355, "step": 2831 }, { "epoch": 0.1578507329580291, "grad_norm": 0.6085756421089172, "learning_rate": 9.50025340739032e-05, "loss": 2.107, "step": 2832 }, { "epoch": 0.15790647121119225, "grad_norm": 0.5201433300971985, "learning_rate": 9.499865302560626e-05, "loss": 1.5787, "step": 2833 }, { "epoch": 0.15796220946435538, "grad_norm": 0.5003561973571777, "learning_rate": 9.49947705502144e-05, "loss": 1.6343, "step": 2834 }, { "epoch": 0.15801794771751854, "grad_norm": 0.5781692862510681, "learning_rate": 9.499088664785077e-05, "loss": 1.8281, "step": 2835 }, { "epoch": 0.15807368597068167, "grad_norm": 0.5135318636894226, "learning_rate": 9.498700131863853e-05, "loss": 1.7294, "step": 2836 }, { "epoch": 0.15812942422384482, "grad_norm": 0.5199892520904541, "learning_rate": 9.49831145627009e-05, "loss": 1.6611, "step": 2837 }, { "epoch": 0.15818516247700798, "grad_norm": 0.49417805671691895, "learning_rate": 9.497922638016114e-05, "loss": 1.4057, "step": 2838 }, { "epoch": 0.1582409007301711, "grad_norm": 0.5626333951950073, "learning_rate": 9.497533677114257e-05, "loss": 1.7803, "step": 2839 }, { "epoch": 0.15829663898333426, "grad_norm": 0.5851137042045593, "learning_rate": 9.497144573576855e-05, "loss": 1.7828, "step": 2840 }, { "epoch": 0.15835237723649742, "grad_norm": 0.5782892107963562, "learning_rate": 9.496755327416245e-05, "loss": 1.9224, "step": 2841 }, { "epoch": 0.15840811548966055, "grad_norm": 0.519010603427887, "learning_rate": 9.496365938644775e-05, "loss": 1.6932, "step": 2842 }, { "epoch": 0.1584638537428237, "grad_norm": 0.588720440864563, "learning_rate": 9.495976407274794e-05, "loss": 1.7235, "step": 2843 }, { "epoch": 0.15851959199598684, "grad_norm": 0.530684769153595, "learning_rate": 9.495586733318654e-05, "loss": 1.7368, "step": 2844 }, { "epoch": 0.15857533024915, "grad_norm": 0.5223602652549744, "learning_rate": 9.495196916788714e-05, "loss": 1.5822, "step": 2845 }, { "epoch": 0.15863106850231315, "grad_norm": 0.5282277464866638, "learning_rate": 9.494806957697337e-05, "loss": 1.7119, "step": 2846 }, { "epoch": 0.15868680675547628, "grad_norm": 0.5861890912055969, "learning_rate": 9.49441685605689e-05, "loss": 1.7597, "step": 2847 }, { "epoch": 0.15874254500863944, "grad_norm": 0.6072325110435486, "learning_rate": 9.494026611879744e-05, "loss": 2.1445, "step": 2848 }, { "epoch": 0.15879828326180256, "grad_norm": 0.5348519086837769, "learning_rate": 9.493636225178276e-05, "loss": 1.5885, "step": 2849 }, { "epoch": 0.15885402151496572, "grad_norm": 0.5133005976676941, "learning_rate": 9.493245695964866e-05, "loss": 1.7934, "step": 2850 }, { "epoch": 0.15890975976812888, "grad_norm": 0.5469639897346497, "learning_rate": 9.492855024251901e-05, "loss": 1.7025, "step": 2851 }, { "epoch": 0.158965498021292, "grad_norm": 0.5326577425003052, "learning_rate": 9.492464210051771e-05, "loss": 1.6258, "step": 2852 }, { "epoch": 0.15902123627445516, "grad_norm": 0.6941805481910706, "learning_rate": 9.492073253376865e-05, "loss": 1.9171, "step": 2853 }, { "epoch": 0.1590769745276183, "grad_norm": 0.5997553467750549, "learning_rate": 9.491682154239589e-05, "loss": 1.9891, "step": 2854 }, { "epoch": 0.15913271278078145, "grad_norm": 0.5727251172065735, "learning_rate": 9.491290912652344e-05, "loss": 1.9522, "step": 2855 }, { "epoch": 0.1591884510339446, "grad_norm": 0.5947685837745667, "learning_rate": 9.490899528627536e-05, "loss": 2.0334, "step": 2856 }, { "epoch": 0.15924418928710773, "grad_norm": 0.5425087809562683, "learning_rate": 9.490508002177579e-05, "loss": 1.8532, "step": 2857 }, { "epoch": 0.1592999275402709, "grad_norm": 0.5523599982261658, "learning_rate": 9.490116333314889e-05, "loss": 1.6041, "step": 2858 }, { "epoch": 0.15935566579343402, "grad_norm": 0.5558710098266602, "learning_rate": 9.489724522051888e-05, "loss": 1.9383, "step": 2859 }, { "epoch": 0.15941140404659718, "grad_norm": 0.5611505508422852, "learning_rate": 9.489332568401004e-05, "loss": 1.8919, "step": 2860 }, { "epoch": 0.15946714229976033, "grad_norm": 0.5016571283340454, "learning_rate": 9.488940472374663e-05, "loss": 1.8347, "step": 2861 }, { "epoch": 0.15952288055292346, "grad_norm": 0.5290272831916809, "learning_rate": 9.488548233985305e-05, "loss": 1.697, "step": 2862 }, { "epoch": 0.15957861880608662, "grad_norm": 0.5488302707672119, "learning_rate": 9.488155853245366e-05, "loss": 1.9557, "step": 2863 }, { "epoch": 0.15963435705924978, "grad_norm": 0.5422006845474243, "learning_rate": 9.487763330167291e-05, "loss": 1.6364, "step": 2864 }, { "epoch": 0.1596900953124129, "grad_norm": 0.5467256307601929, "learning_rate": 9.487370664763529e-05, "loss": 1.7917, "step": 2865 }, { "epoch": 0.15974583356557606, "grad_norm": 0.538063108921051, "learning_rate": 9.486977857046532e-05, "loss": 1.8552, "step": 2866 }, { "epoch": 0.1598015718187392, "grad_norm": 0.5502356886863708, "learning_rate": 9.486584907028758e-05, "loss": 1.6089, "step": 2867 }, { "epoch": 0.15985731007190235, "grad_norm": 0.526684582233429, "learning_rate": 9.48619181472267e-05, "loss": 1.5357, "step": 2868 }, { "epoch": 0.1599130483250655, "grad_norm": 0.5427432656288147, "learning_rate": 9.485798580140735e-05, "loss": 1.7628, "step": 2869 }, { "epoch": 0.15996878657822863, "grad_norm": 0.5465673208236694, "learning_rate": 9.485405203295421e-05, "loss": 1.6318, "step": 2870 }, { "epoch": 0.1600245248313918, "grad_norm": 0.5261492729187012, "learning_rate": 9.485011684199207e-05, "loss": 1.6422, "step": 2871 }, { "epoch": 0.16008026308455492, "grad_norm": 0.571042001247406, "learning_rate": 9.484618022864571e-05, "loss": 1.5466, "step": 2872 }, { "epoch": 0.16013600133771808, "grad_norm": 0.5928837656974792, "learning_rate": 9.484224219304e-05, "loss": 2.0925, "step": 2873 }, { "epoch": 0.16019173959088123, "grad_norm": 0.4875600337982178, "learning_rate": 9.48383027352998e-05, "loss": 1.6183, "step": 2874 }, { "epoch": 0.16024747784404436, "grad_norm": 0.5074633955955505, "learning_rate": 9.483436185555007e-05, "loss": 1.5593, "step": 2875 }, { "epoch": 0.16030321609720752, "grad_norm": 0.553817093372345, "learning_rate": 9.483041955391578e-05, "loss": 1.7093, "step": 2876 }, { "epoch": 0.16035895435037065, "grad_norm": 0.5676888823509216, "learning_rate": 9.482647583052196e-05, "loss": 1.7555, "step": 2877 }, { "epoch": 0.1604146926035338, "grad_norm": 0.5311009883880615, "learning_rate": 9.48225306854937e-05, "loss": 1.7709, "step": 2878 }, { "epoch": 0.16047043085669696, "grad_norm": 0.5391182899475098, "learning_rate": 9.481858411895608e-05, "loss": 1.7296, "step": 2879 }, { "epoch": 0.1605261691098601, "grad_norm": 0.5432226657867432, "learning_rate": 9.481463613103429e-05, "loss": 1.7808, "step": 2880 }, { "epoch": 0.16058190736302325, "grad_norm": 0.5264506936073303, "learning_rate": 9.481068672185353e-05, "loss": 1.6362, "step": 2881 }, { "epoch": 0.16063764561618638, "grad_norm": 0.5308744311332703, "learning_rate": 9.480673589153904e-05, "loss": 1.5913, "step": 2882 }, { "epoch": 0.16069338386934953, "grad_norm": 0.4966695308685303, "learning_rate": 9.480278364021614e-05, "loss": 1.6744, "step": 2883 }, { "epoch": 0.1607491221225127, "grad_norm": 0.5250310301780701, "learning_rate": 9.479882996801017e-05, "loss": 1.5185, "step": 2884 }, { "epoch": 0.16080486037567582, "grad_norm": 0.5288892388343811, "learning_rate": 9.479487487504649e-05, "loss": 1.5259, "step": 2885 }, { "epoch": 0.16086059862883897, "grad_norm": 0.5666532516479492, "learning_rate": 9.479091836145057e-05, "loss": 1.7626, "step": 2886 }, { "epoch": 0.16091633688200213, "grad_norm": 0.5458130836486816, "learning_rate": 9.478696042734785e-05, "loss": 1.6936, "step": 2887 }, { "epoch": 0.16097207513516526, "grad_norm": 0.5105459690093994, "learning_rate": 9.478300107286389e-05, "loss": 1.4811, "step": 2888 }, { "epoch": 0.16102781338832842, "grad_norm": 0.5251494646072388, "learning_rate": 9.477904029812422e-05, "loss": 1.7184, "step": 2889 }, { "epoch": 0.16108355164149155, "grad_norm": 0.5484756231307983, "learning_rate": 9.477507810325448e-05, "loss": 1.4053, "step": 2890 }, { "epoch": 0.1611392898946547, "grad_norm": 0.5894975066184998, "learning_rate": 9.477111448838031e-05, "loss": 2.0827, "step": 2891 }, { "epoch": 0.16119502814781786, "grad_norm": 0.5738565921783447, "learning_rate": 9.476714945362745e-05, "loss": 1.8864, "step": 2892 }, { "epoch": 0.161250766400981, "grad_norm": 0.6212289333343506, "learning_rate": 9.47631829991216e-05, "loss": 1.9475, "step": 2893 }, { "epoch": 0.16130650465414414, "grad_norm": 0.6506125330924988, "learning_rate": 9.475921512498857e-05, "loss": 1.9044, "step": 2894 }, { "epoch": 0.16136224290730727, "grad_norm": 0.5559994578361511, "learning_rate": 9.475524583135421e-05, "loss": 1.5211, "step": 2895 }, { "epoch": 0.16141798116047043, "grad_norm": 0.5860363841056824, "learning_rate": 9.475127511834438e-05, "loss": 1.7724, "step": 2896 }, { "epoch": 0.1614737194136336, "grad_norm": 0.5559065341949463, "learning_rate": 9.474730298608504e-05, "loss": 1.8392, "step": 2897 }, { "epoch": 0.16152945766679672, "grad_norm": 0.5526688694953918, "learning_rate": 9.474332943470213e-05, "loss": 1.7909, "step": 2898 }, { "epoch": 0.16158519591995987, "grad_norm": 0.5582461357116699, "learning_rate": 9.47393544643217e-05, "loss": 1.9106, "step": 2899 }, { "epoch": 0.161640934173123, "grad_norm": 0.5841467380523682, "learning_rate": 9.473537807506977e-05, "loss": 1.922, "step": 2900 }, { "epoch": 0.16169667242628616, "grad_norm": 0.5061233043670654, "learning_rate": 9.47314002670725e-05, "loss": 1.5719, "step": 2901 }, { "epoch": 0.16175241067944932, "grad_norm": 0.4959016442298889, "learning_rate": 9.472742104045599e-05, "loss": 1.6517, "step": 2902 }, { "epoch": 0.16180814893261244, "grad_norm": 0.5075359344482422, "learning_rate": 9.472344039534646e-05, "loss": 1.7661, "step": 2903 }, { "epoch": 0.1618638871857756, "grad_norm": 0.5135536193847656, "learning_rate": 9.471945833187018e-05, "loss": 1.6874, "step": 2904 }, { "epoch": 0.16191962543893873, "grad_norm": 0.5618202090263367, "learning_rate": 9.471547485015341e-05, "loss": 1.6745, "step": 2905 }, { "epoch": 0.1619753636921019, "grad_norm": 0.5325173139572144, "learning_rate": 9.471148995032247e-05, "loss": 1.7141, "step": 2906 }, { "epoch": 0.16203110194526504, "grad_norm": 0.521827220916748, "learning_rate": 9.470750363250378e-05, "loss": 1.595, "step": 2907 }, { "epoch": 0.16208684019842817, "grad_norm": 0.5489259362220764, "learning_rate": 9.470351589682372e-05, "loss": 1.8687, "step": 2908 }, { "epoch": 0.16214257845159133, "grad_norm": 0.5823487043380737, "learning_rate": 9.469952674340877e-05, "loss": 1.8964, "step": 2909 }, { "epoch": 0.16219831670475449, "grad_norm": 0.5378115773200989, "learning_rate": 9.469553617238546e-05, "loss": 1.6171, "step": 2910 }, { "epoch": 0.16225405495791762, "grad_norm": 0.500411331653595, "learning_rate": 9.469154418388034e-05, "loss": 1.7592, "step": 2911 }, { "epoch": 0.16230979321108077, "grad_norm": 0.49383944272994995, "learning_rate": 9.468755077801999e-05, "loss": 1.6709, "step": 2912 }, { "epoch": 0.1623655314642439, "grad_norm": 0.5428176522254944, "learning_rate": 9.468355595493109e-05, "loss": 1.7304, "step": 2913 }, { "epoch": 0.16242126971740706, "grad_norm": 0.537581205368042, "learning_rate": 9.467955971474031e-05, "loss": 1.7252, "step": 2914 }, { "epoch": 0.16247700797057021, "grad_norm": 0.5622221231460571, "learning_rate": 9.46755620575744e-05, "loss": 1.7643, "step": 2915 }, { "epoch": 0.16253274622373334, "grad_norm": 0.5474369525909424, "learning_rate": 9.467156298356015e-05, "loss": 1.7263, "step": 2916 }, { "epoch": 0.1625884844768965, "grad_norm": 0.5429725646972656, "learning_rate": 9.466756249282435e-05, "loss": 1.7771, "step": 2917 }, { "epoch": 0.16264422273005963, "grad_norm": 0.5385332107543945, "learning_rate": 9.466356058549393e-05, "loss": 1.7372, "step": 2918 }, { "epoch": 0.16269996098322279, "grad_norm": 0.5135955214500427, "learning_rate": 9.465955726169575e-05, "loss": 1.7296, "step": 2919 }, { "epoch": 0.16275569923638594, "grad_norm": 0.5584880709648132, "learning_rate": 9.46555525215568e-05, "loss": 1.7907, "step": 2920 }, { "epoch": 0.16281143748954907, "grad_norm": 0.5609123706817627, "learning_rate": 9.46515463652041e-05, "loss": 1.8558, "step": 2921 }, { "epoch": 0.16286717574271223, "grad_norm": 0.5887969732284546, "learning_rate": 9.464753879276467e-05, "loss": 1.8673, "step": 2922 }, { "epoch": 0.16292291399587536, "grad_norm": 0.5207127332687378, "learning_rate": 9.464352980436562e-05, "loss": 1.8252, "step": 2923 }, { "epoch": 0.1629786522490385, "grad_norm": 0.4879356622695923, "learning_rate": 9.463951940013411e-05, "loss": 1.564, "step": 2924 }, { "epoch": 0.16303439050220167, "grad_norm": 0.5253145098686218, "learning_rate": 9.46355075801973e-05, "loss": 1.731, "step": 2925 }, { "epoch": 0.1630901287553648, "grad_norm": 0.5216013789176941, "learning_rate": 9.463149434468244e-05, "loss": 1.7954, "step": 2926 }, { "epoch": 0.16314586700852796, "grad_norm": 0.5162796974182129, "learning_rate": 9.46274796937168e-05, "loss": 1.6639, "step": 2927 }, { "epoch": 0.16320160526169109, "grad_norm": 0.5164597630500793, "learning_rate": 9.462346362742767e-05, "loss": 1.5104, "step": 2928 }, { "epoch": 0.16325734351485424, "grad_norm": 0.5458294153213501, "learning_rate": 9.461944614594248e-05, "loss": 1.7081, "step": 2929 }, { "epoch": 0.1633130817680174, "grad_norm": 0.525484025478363, "learning_rate": 9.461542724938859e-05, "loss": 1.8709, "step": 2930 }, { "epoch": 0.16336882002118053, "grad_norm": 0.5675646662712097, "learning_rate": 9.461140693789349e-05, "loss": 1.7861, "step": 2931 }, { "epoch": 0.16342455827434368, "grad_norm": 0.5174034833908081, "learning_rate": 9.460738521158466e-05, "loss": 1.745, "step": 2932 }, { "epoch": 0.16348029652750684, "grad_norm": 0.5687560439109802, "learning_rate": 9.460336207058964e-05, "loss": 1.8071, "step": 2933 }, { "epoch": 0.16353603478066997, "grad_norm": 0.5177374482154846, "learning_rate": 9.459933751503604e-05, "loss": 1.7359, "step": 2934 }, { "epoch": 0.16359177303383313, "grad_norm": 0.5742724537849426, "learning_rate": 9.459531154505147e-05, "loss": 1.6545, "step": 2935 }, { "epoch": 0.16364751128699626, "grad_norm": 0.555439293384552, "learning_rate": 9.459128416076365e-05, "loss": 1.5666, "step": 2936 }, { "epoch": 0.1637032495401594, "grad_norm": 0.5305073857307434, "learning_rate": 9.458725536230027e-05, "loss": 1.8546, "step": 2937 }, { "epoch": 0.16375898779332257, "grad_norm": 0.517587423324585, "learning_rate": 9.458322514978912e-05, "loss": 1.6707, "step": 2938 }, { "epoch": 0.1638147260464857, "grad_norm": 0.5396296977996826, "learning_rate": 9.4579193523358e-05, "loss": 1.6807, "step": 2939 }, { "epoch": 0.16387046429964885, "grad_norm": 0.545603334903717, "learning_rate": 9.457516048313478e-05, "loss": 1.7966, "step": 2940 }, { "epoch": 0.16392620255281198, "grad_norm": 0.5535080432891846, "learning_rate": 9.457112602924735e-05, "loss": 1.8103, "step": 2941 }, { "epoch": 0.16398194080597514, "grad_norm": 0.5278719663619995, "learning_rate": 9.456709016182368e-05, "loss": 1.7992, "step": 2942 }, { "epoch": 0.1640376790591383, "grad_norm": 0.5094558000564575, "learning_rate": 9.456305288099174e-05, "loss": 1.8232, "step": 2943 }, { "epoch": 0.16409341731230143, "grad_norm": 0.5989511013031006, "learning_rate": 9.45590141868796e-05, "loss": 1.8106, "step": 2944 }, { "epoch": 0.16414915556546458, "grad_norm": 0.5221716165542603, "learning_rate": 9.455497407961532e-05, "loss": 1.6316, "step": 2945 }, { "epoch": 0.1642048938186277, "grad_norm": 0.4996791481971741, "learning_rate": 9.455093255932704e-05, "loss": 1.4846, "step": 2946 }, { "epoch": 0.16426063207179087, "grad_norm": 0.5217500329017639, "learning_rate": 9.454688962614293e-05, "loss": 1.7717, "step": 2947 }, { "epoch": 0.16431637032495403, "grad_norm": 0.5416474938392639, "learning_rate": 9.45428452801912e-05, "loss": 1.8829, "step": 2948 }, { "epoch": 0.16437210857811715, "grad_norm": 0.5558078289031982, "learning_rate": 9.453879952160013e-05, "loss": 1.8933, "step": 2949 }, { "epoch": 0.1644278468312803, "grad_norm": 0.5439289808273315, "learning_rate": 9.4534752350498e-05, "loss": 1.6009, "step": 2950 }, { "epoch": 0.16448358508444344, "grad_norm": 0.5921631455421448, "learning_rate": 9.45307037670132e-05, "loss": 1.9932, "step": 2951 }, { "epoch": 0.1645393233376066, "grad_norm": 0.5491567850112915, "learning_rate": 9.452665377127409e-05, "loss": 1.9729, "step": 2952 }, { "epoch": 0.16459506159076975, "grad_norm": 0.6129978895187378, "learning_rate": 9.452260236340915e-05, "loss": 1.8995, "step": 2953 }, { "epoch": 0.16465079984393288, "grad_norm": 0.6029583215713501, "learning_rate": 9.451854954354684e-05, "loss": 1.8313, "step": 2954 }, { "epoch": 0.16470653809709604, "grad_norm": 0.5197410583496094, "learning_rate": 9.451449531181572e-05, "loss": 1.6307, "step": 2955 }, { "epoch": 0.1647622763502592, "grad_norm": 0.5214848518371582, "learning_rate": 9.451043966834431e-05, "loss": 1.7253, "step": 2956 }, { "epoch": 0.16481801460342232, "grad_norm": 0.48953381180763245, "learning_rate": 9.450638261326128e-05, "loss": 1.5122, "step": 2957 }, { "epoch": 0.16487375285658548, "grad_norm": 0.5038783550262451, "learning_rate": 9.450232414669528e-05, "loss": 1.7602, "step": 2958 }, { "epoch": 0.1649294911097486, "grad_norm": 0.5723398327827454, "learning_rate": 9.449826426877504e-05, "loss": 1.9841, "step": 2959 }, { "epoch": 0.16498522936291177, "grad_norm": 0.5200619101524353, "learning_rate": 9.44942029796293e-05, "loss": 1.7965, "step": 2960 }, { "epoch": 0.16504096761607492, "grad_norm": 0.6376471519470215, "learning_rate": 9.449014027938685e-05, "loss": 2.1267, "step": 2961 }, { "epoch": 0.16509670586923805, "grad_norm": 0.5397600531578064, "learning_rate": 9.448607616817655e-05, "loss": 1.7952, "step": 2962 }, { "epoch": 0.1651524441224012, "grad_norm": 0.5907739996910095, "learning_rate": 9.448201064612728e-05, "loss": 1.8026, "step": 2963 }, { "epoch": 0.16520818237556434, "grad_norm": 0.5700837969779968, "learning_rate": 9.447794371336799e-05, "loss": 2.1377, "step": 2964 }, { "epoch": 0.1652639206287275, "grad_norm": 0.5404232740402222, "learning_rate": 9.447387537002765e-05, "loss": 1.9586, "step": 2965 }, { "epoch": 0.16531965888189065, "grad_norm": 0.5181935429573059, "learning_rate": 9.446980561623527e-05, "loss": 1.4828, "step": 2966 }, { "epoch": 0.16537539713505378, "grad_norm": 0.6044127941131592, "learning_rate": 9.446573445211994e-05, "loss": 1.789, "step": 2967 }, { "epoch": 0.16543113538821694, "grad_norm": 0.5353678464889526, "learning_rate": 9.446166187781077e-05, "loss": 1.709, "step": 2968 }, { "epoch": 0.16548687364138007, "grad_norm": 0.5155282020568848, "learning_rate": 9.445758789343691e-05, "loss": 1.6335, "step": 2969 }, { "epoch": 0.16554261189454322, "grad_norm": 0.5247118473052979, "learning_rate": 9.445351249912757e-05, "loss": 1.6666, "step": 2970 }, { "epoch": 0.16559835014770638, "grad_norm": 0.5768206119537354, "learning_rate": 9.4449435695012e-05, "loss": 1.9109, "step": 2971 }, { "epoch": 0.1656540884008695, "grad_norm": 0.5591040849685669, "learning_rate": 9.444535748121949e-05, "loss": 1.781, "step": 2972 }, { "epoch": 0.16570982665403267, "grad_norm": 0.5098216533660889, "learning_rate": 9.444127785787938e-05, "loss": 1.7213, "step": 2973 }, { "epoch": 0.1657655649071958, "grad_norm": 0.5072734355926514, "learning_rate": 9.443719682512102e-05, "loss": 1.8224, "step": 2974 }, { "epoch": 0.16582130316035895, "grad_norm": 0.5172891020774841, "learning_rate": 9.443311438307389e-05, "loss": 1.8449, "step": 2975 }, { "epoch": 0.1658770414135221, "grad_norm": 0.557597815990448, "learning_rate": 9.442903053186743e-05, "loss": 1.6679, "step": 2976 }, { "epoch": 0.16593277966668524, "grad_norm": 0.518157422542572, "learning_rate": 9.442494527163115e-05, "loss": 1.6812, "step": 2977 }, { "epoch": 0.1659885179198484, "grad_norm": 0.5476084351539612, "learning_rate": 9.442085860249461e-05, "loss": 1.7849, "step": 2978 }, { "epoch": 0.16604425617301155, "grad_norm": 0.5458279252052307, "learning_rate": 9.441677052458745e-05, "loss": 1.8582, "step": 2979 }, { "epoch": 0.16609999442617468, "grad_norm": 0.592612624168396, "learning_rate": 9.441268103803928e-05, "loss": 2.0226, "step": 2980 }, { "epoch": 0.16615573267933784, "grad_norm": 0.5498427748680115, "learning_rate": 9.440859014297982e-05, "loss": 1.577, "step": 2981 }, { "epoch": 0.16621147093250097, "grad_norm": 0.5673382878303528, "learning_rate": 9.440449783953883e-05, "loss": 1.7272, "step": 2982 }, { "epoch": 0.16626720918566412, "grad_norm": 0.565617024898529, "learning_rate": 9.440040412784603e-05, "loss": 1.7481, "step": 2983 }, { "epoch": 0.16632294743882728, "grad_norm": 0.6157540678977966, "learning_rate": 9.439630900803129e-05, "loss": 1.9244, "step": 2984 }, { "epoch": 0.1663786856919904, "grad_norm": 0.4916851818561554, "learning_rate": 9.439221248022447e-05, "loss": 1.5845, "step": 2985 }, { "epoch": 0.16643442394515356, "grad_norm": 0.573154091835022, "learning_rate": 9.43881145445555e-05, "loss": 1.8841, "step": 2986 }, { "epoch": 0.1664901621983167, "grad_norm": 0.5438728332519531, "learning_rate": 9.438401520115434e-05, "loss": 1.7537, "step": 2987 }, { "epoch": 0.16654590045147985, "grad_norm": 0.5793212652206421, "learning_rate": 9.4379914450151e-05, "loss": 1.9331, "step": 2988 }, { "epoch": 0.166601638704643, "grad_norm": 0.5194965600967407, "learning_rate": 9.437581229167551e-05, "loss": 1.5948, "step": 2989 }, { "epoch": 0.16665737695780614, "grad_norm": 0.5872880816459656, "learning_rate": 9.4371708725858e-05, "loss": 1.7629, "step": 2990 }, { "epoch": 0.1667131152109693, "grad_norm": 0.519842803478241, "learning_rate": 9.436760375282859e-05, "loss": 1.766, "step": 2991 }, { "epoch": 0.16676885346413242, "grad_norm": 0.5351104736328125, "learning_rate": 9.436349737271745e-05, "loss": 1.8319, "step": 2992 }, { "epoch": 0.16682459171729558, "grad_norm": 0.5584455728530884, "learning_rate": 9.435938958565487e-05, "loss": 1.7975, "step": 2993 }, { "epoch": 0.16688032997045874, "grad_norm": 0.4804225564002991, "learning_rate": 9.435528039177105e-05, "loss": 1.7058, "step": 2994 }, { "epoch": 0.16693606822362186, "grad_norm": 0.5311334133148193, "learning_rate": 9.435116979119635e-05, "loss": 1.7305, "step": 2995 }, { "epoch": 0.16699180647678502, "grad_norm": 0.5292813777923584, "learning_rate": 9.434705778406114e-05, "loss": 1.6901, "step": 2996 }, { "epoch": 0.16704754472994815, "grad_norm": 0.5105124711990356, "learning_rate": 9.434294437049582e-05, "loss": 1.7462, "step": 2997 }, { "epoch": 0.1671032829831113, "grad_norm": 0.5604652762413025, "learning_rate": 9.433882955063084e-05, "loss": 1.7997, "step": 2998 }, { "epoch": 0.16715902123627446, "grad_norm": 0.555237889289856, "learning_rate": 9.43347133245967e-05, "loss": 1.923, "step": 2999 }, { "epoch": 0.1672147594894376, "grad_norm": 0.5382326245307922, "learning_rate": 9.433059569252394e-05, "loss": 1.7263, "step": 3000 }, { "epoch": 0.16727049774260075, "grad_norm": 0.6488143801689148, "learning_rate": 9.432647665454315e-05, "loss": 1.5881, "step": 3001 }, { "epoch": 0.1673262359957639, "grad_norm": 0.55712890625, "learning_rate": 9.432235621078497e-05, "loss": 1.9409, "step": 3002 }, { "epoch": 0.16738197424892703, "grad_norm": 0.5540611147880554, "learning_rate": 9.431823436138005e-05, "loss": 1.8471, "step": 3003 }, { "epoch": 0.1674377125020902, "grad_norm": 0.5297248959541321, "learning_rate": 9.431411110645915e-05, "loss": 1.6844, "step": 3004 }, { "epoch": 0.16749345075525332, "grad_norm": 0.5368382334709167, "learning_rate": 9.4309986446153e-05, "loss": 1.7333, "step": 3005 }, { "epoch": 0.16754918900841648, "grad_norm": 0.5433456897735596, "learning_rate": 9.430586038059244e-05, "loss": 1.9837, "step": 3006 }, { "epoch": 0.16760492726157963, "grad_norm": 0.5077199339866638, "learning_rate": 9.430173290990829e-05, "loss": 1.7391, "step": 3007 }, { "epoch": 0.16766066551474276, "grad_norm": 0.49970632791519165, "learning_rate": 9.429760403423148e-05, "loss": 1.5325, "step": 3008 }, { "epoch": 0.16771640376790592, "grad_norm": 0.5068593621253967, "learning_rate": 9.429347375369295e-05, "loss": 1.5849, "step": 3009 }, { "epoch": 0.16777214202106905, "grad_norm": 0.5405229330062866, "learning_rate": 9.428934206842365e-05, "loss": 1.7995, "step": 3010 }, { "epoch": 0.1678278802742322, "grad_norm": 0.5368816256523132, "learning_rate": 9.428520897855469e-05, "loss": 1.7941, "step": 3011 }, { "epoch": 0.16788361852739536, "grad_norm": 0.5910351872444153, "learning_rate": 9.428107448421708e-05, "loss": 1.8987, "step": 3012 }, { "epoch": 0.1679393567805585, "grad_norm": 0.5387074947357178, "learning_rate": 9.427693858554196e-05, "loss": 1.2377, "step": 3013 }, { "epoch": 0.16799509503372165, "grad_norm": 0.5382748246192932, "learning_rate": 9.42728012826605e-05, "loss": 1.8915, "step": 3014 }, { "epoch": 0.16805083328688478, "grad_norm": 0.5706035494804382, "learning_rate": 9.426866257570391e-05, "loss": 1.9298, "step": 3015 }, { "epoch": 0.16810657154004793, "grad_norm": 0.517613410949707, "learning_rate": 9.426452246480347e-05, "loss": 1.6459, "step": 3016 }, { "epoch": 0.1681623097932111, "grad_norm": 0.5248231291770935, "learning_rate": 9.426038095009042e-05, "loss": 1.8506, "step": 3017 }, { "epoch": 0.16821804804637422, "grad_norm": 0.49280843138694763, "learning_rate": 9.425623803169616e-05, "loss": 1.5642, "step": 3018 }, { "epoch": 0.16827378629953738, "grad_norm": 0.5404548048973083, "learning_rate": 9.425209370975208e-05, "loss": 1.7475, "step": 3019 }, { "epoch": 0.1683295245527005, "grad_norm": 0.5196406245231628, "learning_rate": 9.424794798438958e-05, "loss": 1.8123, "step": 3020 }, { "epoch": 0.16838526280586366, "grad_norm": 0.5767018795013428, "learning_rate": 9.424380085574015e-05, "loss": 1.9773, "step": 3021 }, { "epoch": 0.16844100105902682, "grad_norm": 0.5589628219604492, "learning_rate": 9.423965232393532e-05, "loss": 1.8269, "step": 3022 }, { "epoch": 0.16849673931218995, "grad_norm": 0.5162323117256165, "learning_rate": 9.423550238910666e-05, "loss": 1.7838, "step": 3023 }, { "epoch": 0.1685524775653531, "grad_norm": 0.5301263332366943, "learning_rate": 9.423135105138577e-05, "loss": 1.7805, "step": 3024 }, { "epoch": 0.16860821581851626, "grad_norm": 0.5383440256118774, "learning_rate": 9.42271983109043e-05, "loss": 1.8054, "step": 3025 }, { "epoch": 0.1686639540716794, "grad_norm": 0.572410523891449, "learning_rate": 9.422304416779397e-05, "loss": 1.7666, "step": 3026 }, { "epoch": 0.16871969232484255, "grad_norm": 0.5496928691864014, "learning_rate": 9.421888862218651e-05, "loss": 1.8725, "step": 3027 }, { "epoch": 0.16877543057800568, "grad_norm": 0.5649563670158386, "learning_rate": 9.421473167421373e-05, "loss": 1.873, "step": 3028 }, { "epoch": 0.16883116883116883, "grad_norm": 0.5560464262962341, "learning_rate": 9.421057332400744e-05, "loss": 1.6385, "step": 3029 }, { "epoch": 0.168886907084332, "grad_norm": 0.5245364904403687, "learning_rate": 9.420641357169954e-05, "loss": 1.758, "step": 3030 }, { "epoch": 0.16894264533749512, "grad_norm": 0.5251185297966003, "learning_rate": 9.420225241742193e-05, "loss": 1.829, "step": 3031 }, { "epoch": 0.16899838359065827, "grad_norm": 0.5360503792762756, "learning_rate": 9.419808986130661e-05, "loss": 1.7447, "step": 3032 }, { "epoch": 0.1690541218438214, "grad_norm": 0.579368531703949, "learning_rate": 9.419392590348555e-05, "loss": 1.7367, "step": 3033 }, { "epoch": 0.16910986009698456, "grad_norm": 0.5943927764892578, "learning_rate": 9.418976054409084e-05, "loss": 1.8542, "step": 3034 }, { "epoch": 0.16916559835014772, "grad_norm": 0.5310322642326355, "learning_rate": 9.418559378325457e-05, "loss": 1.5941, "step": 3035 }, { "epoch": 0.16922133660331085, "grad_norm": 0.5201945304870605, "learning_rate": 9.418142562110888e-05, "loss": 1.6894, "step": 3036 }, { "epoch": 0.169277074856474, "grad_norm": 0.49601128697395325, "learning_rate": 9.417725605778598e-05, "loss": 1.5647, "step": 3037 }, { "epoch": 0.16933281310963713, "grad_norm": 0.5370486378669739, "learning_rate": 9.417308509341806e-05, "loss": 1.7843, "step": 3038 }, { "epoch": 0.1693885513628003, "grad_norm": 0.5515000820159912, "learning_rate": 9.416891272813747e-05, "loss": 1.8156, "step": 3039 }, { "epoch": 0.16944428961596344, "grad_norm": 0.5245648622512817, "learning_rate": 9.416473896207645e-05, "loss": 1.7029, "step": 3040 }, { "epoch": 0.16950002786912657, "grad_norm": 0.6024215817451477, "learning_rate": 9.416056379536744e-05, "loss": 1.8892, "step": 3041 }, { "epoch": 0.16955576612228973, "grad_norm": 0.5456023812294006, "learning_rate": 9.415638722814279e-05, "loss": 1.7344, "step": 3042 }, { "epoch": 0.16961150437545286, "grad_norm": 0.47283026576042175, "learning_rate": 9.415220926053501e-05, "loss": 1.4281, "step": 3043 }, { "epoch": 0.16966724262861602, "grad_norm": 0.5906921029090881, "learning_rate": 9.414802989267657e-05, "loss": 1.772, "step": 3044 }, { "epoch": 0.16972298088177917, "grad_norm": 0.5549463033676147, "learning_rate": 9.414384912470002e-05, "loss": 1.6814, "step": 3045 }, { "epoch": 0.1697787191349423, "grad_norm": 0.5007080435752869, "learning_rate": 9.413966695673795e-05, "loss": 1.7041, "step": 3046 }, { "epoch": 0.16983445738810546, "grad_norm": 0.5527877807617188, "learning_rate": 9.413548338892301e-05, "loss": 1.8597, "step": 3047 }, { "epoch": 0.16989019564126862, "grad_norm": 0.5755193829536438, "learning_rate": 9.413129842138786e-05, "loss": 2.115, "step": 3048 }, { "epoch": 0.16994593389443174, "grad_norm": 0.5897433161735535, "learning_rate": 9.412711205426521e-05, "loss": 1.5559, "step": 3049 }, { "epoch": 0.1700016721475949, "grad_norm": 0.5253439545631409, "learning_rate": 9.412292428768787e-05, "loss": 1.8423, "step": 3050 }, { "epoch": 0.17005741040075803, "grad_norm": 0.5220539569854736, "learning_rate": 9.411873512178862e-05, "loss": 1.6792, "step": 3051 }, { "epoch": 0.1701131486539212, "grad_norm": 0.5669887661933899, "learning_rate": 9.41145445567003e-05, "loss": 1.8432, "step": 3052 }, { "epoch": 0.17016888690708434, "grad_norm": 0.5661007761955261, "learning_rate": 9.411035259255585e-05, "loss": 1.9316, "step": 3053 }, { "epoch": 0.17022462516024747, "grad_norm": 0.5614895820617676, "learning_rate": 9.41061592294882e-05, "loss": 1.8668, "step": 3054 }, { "epoch": 0.17028036341341063, "grad_norm": 0.541671872138977, "learning_rate": 9.410196446763034e-05, "loss": 1.9025, "step": 3055 }, { "epoch": 0.17033610166657376, "grad_norm": 0.54454106092453, "learning_rate": 9.409776830711528e-05, "loss": 1.7351, "step": 3056 }, { "epoch": 0.17039183991973691, "grad_norm": 0.581135094165802, "learning_rate": 9.409357074807612e-05, "loss": 2.0981, "step": 3057 }, { "epoch": 0.17044757817290007, "grad_norm": 0.5024539232254028, "learning_rate": 9.4089371790646e-05, "loss": 1.74, "step": 3058 }, { "epoch": 0.1705033164260632, "grad_norm": 0.527542233467102, "learning_rate": 9.408517143495806e-05, "loss": 1.7409, "step": 3059 }, { "epoch": 0.17055905467922636, "grad_norm": 0.5976712107658386, "learning_rate": 9.40809696811455e-05, "loss": 1.6624, "step": 3060 }, { "epoch": 0.1706147929323895, "grad_norm": 0.5328633785247803, "learning_rate": 9.40767665293416e-05, "loss": 1.7723, "step": 3061 }, { "epoch": 0.17067053118555264, "grad_norm": 0.5550236701965332, "learning_rate": 9.407256197967965e-05, "loss": 1.771, "step": 3062 }, { "epoch": 0.1707262694387158, "grad_norm": 0.5482365489006042, "learning_rate": 9.4068356032293e-05, "loss": 1.5427, "step": 3063 }, { "epoch": 0.17078200769187893, "grad_norm": 0.5379420518875122, "learning_rate": 9.406414868731502e-05, "loss": 1.7884, "step": 3064 }, { "epoch": 0.17083774594504209, "grad_norm": 0.5322206020355225, "learning_rate": 9.405993994487917e-05, "loss": 1.7756, "step": 3065 }, { "epoch": 0.17089348419820521, "grad_norm": 0.5303000807762146, "learning_rate": 9.40557298051189e-05, "loss": 1.7589, "step": 3066 }, { "epoch": 0.17094922245136837, "grad_norm": 0.5660407543182373, "learning_rate": 9.405151826816776e-05, "loss": 1.7427, "step": 3067 }, { "epoch": 0.17100496070453153, "grad_norm": 0.5341696739196777, "learning_rate": 9.404730533415929e-05, "loss": 1.8757, "step": 3068 }, { "epoch": 0.17106069895769466, "grad_norm": 0.533214271068573, "learning_rate": 9.40430910032271e-05, "loss": 1.8219, "step": 3069 }, { "epoch": 0.1711164372108578, "grad_norm": 0.6056374311447144, "learning_rate": 9.403887527550486e-05, "loss": 1.9808, "step": 3070 }, { "epoch": 0.17117217546402097, "grad_norm": 0.5189699530601501, "learning_rate": 9.403465815112626e-05, "loss": 1.6841, "step": 3071 }, { "epoch": 0.1712279137171841, "grad_norm": 0.5255261659622192, "learning_rate": 9.403043963022505e-05, "loss": 1.5559, "step": 3072 }, { "epoch": 0.17128365197034726, "grad_norm": 0.8432055115699768, "learning_rate": 9.4026219712935e-05, "loss": 1.8316, "step": 3073 }, { "epoch": 0.17133939022351038, "grad_norm": 0.5276064276695251, "learning_rate": 9.402199839938996e-05, "loss": 1.678, "step": 3074 }, { "epoch": 0.17139512847667354, "grad_norm": 0.5075768232345581, "learning_rate": 9.401777568972379e-05, "loss": 1.5931, "step": 3075 }, { "epoch": 0.1714508667298367, "grad_norm": 0.5471227169036865, "learning_rate": 9.401355158407042e-05, "loss": 1.8761, "step": 3076 }, { "epoch": 0.17150660498299983, "grad_norm": 0.5062270760536194, "learning_rate": 9.400932608256381e-05, "loss": 1.6682, "step": 3077 }, { "epoch": 0.17156234323616298, "grad_norm": 0.5492522716522217, "learning_rate": 9.400509918533798e-05, "loss": 1.6889, "step": 3078 }, { "epoch": 0.1716180814893261, "grad_norm": 0.5703136324882507, "learning_rate": 9.400087089252695e-05, "loss": 1.6925, "step": 3079 }, { "epoch": 0.17167381974248927, "grad_norm": 0.5027966499328613, "learning_rate": 9.399664120426484e-05, "loss": 1.4425, "step": 3080 }, { "epoch": 0.17172955799565243, "grad_norm": 0.558413028717041, "learning_rate": 9.39924101206858e-05, "loss": 1.6485, "step": 3081 }, { "epoch": 0.17178529624881556, "grad_norm": 0.6047654151916504, "learning_rate": 9.3988177641924e-05, "loss": 1.835, "step": 3082 }, { "epoch": 0.1718410345019787, "grad_norm": 0.5760734677314758, "learning_rate": 9.398394376811368e-05, "loss": 1.7104, "step": 3083 }, { "epoch": 0.17189677275514184, "grad_norm": 0.5076540112495422, "learning_rate": 9.397970849938911e-05, "loss": 1.5808, "step": 3084 }, { "epoch": 0.171952511008305, "grad_norm": 0.5645167827606201, "learning_rate": 9.39754718358846e-05, "loss": 1.771, "step": 3085 }, { "epoch": 0.17200824926146815, "grad_norm": 0.5443428158760071, "learning_rate": 9.397123377773451e-05, "loss": 1.8713, "step": 3086 }, { "epoch": 0.17206398751463128, "grad_norm": 0.513888418674469, "learning_rate": 9.396699432507325e-05, "loss": 1.5279, "step": 3087 }, { "epoch": 0.17211972576779444, "grad_norm": 0.5408303141593933, "learning_rate": 9.396275347803529e-05, "loss": 1.8924, "step": 3088 }, { "epoch": 0.1721754640209576, "grad_norm": 0.5284982323646545, "learning_rate": 9.395851123675512e-05, "loss": 1.7562, "step": 3089 }, { "epoch": 0.17223120227412073, "grad_norm": 0.5364746451377869, "learning_rate": 9.395426760136726e-05, "loss": 1.599, "step": 3090 }, { "epoch": 0.17228694052728388, "grad_norm": 0.5527182817459106, "learning_rate": 9.39500225720063e-05, "loss": 1.7657, "step": 3091 }, { "epoch": 0.172342678780447, "grad_norm": 0.5294612646102905, "learning_rate": 9.394577614880687e-05, "loss": 1.684, "step": 3092 }, { "epoch": 0.17239841703361017, "grad_norm": 0.5614673495292664, "learning_rate": 9.394152833190364e-05, "loss": 1.8619, "step": 3093 }, { "epoch": 0.17245415528677333, "grad_norm": 0.5280752182006836, "learning_rate": 9.393727912143134e-05, "loss": 1.6454, "step": 3094 }, { "epoch": 0.17250989353993645, "grad_norm": 0.5236919522285461, "learning_rate": 9.39330285175247e-05, "loss": 1.6498, "step": 3095 }, { "epoch": 0.1725656317930996, "grad_norm": 0.5192380547523499, "learning_rate": 9.392877652031855e-05, "loss": 1.8345, "step": 3096 }, { "epoch": 0.17262137004626274, "grad_norm": 0.5223302841186523, "learning_rate": 9.392452312994773e-05, "loss": 1.5056, "step": 3097 }, { "epoch": 0.1726771082994259, "grad_norm": 0.5231219530105591, "learning_rate": 9.392026834654714e-05, "loss": 1.5868, "step": 3098 }, { "epoch": 0.17273284655258905, "grad_norm": 0.5619219541549683, "learning_rate": 9.39160121702517e-05, "loss": 1.8988, "step": 3099 }, { "epoch": 0.17278858480575218, "grad_norm": 0.5591604709625244, "learning_rate": 9.391175460119642e-05, "loss": 1.7228, "step": 3100 }, { "epoch": 0.17284432305891534, "grad_norm": 0.5290101766586304, "learning_rate": 9.39074956395163e-05, "loss": 1.6436, "step": 3101 }, { "epoch": 0.17290006131207847, "grad_norm": 0.5596829056739807, "learning_rate": 9.390323528534641e-05, "loss": 1.498, "step": 3102 }, { "epoch": 0.17295579956524162, "grad_norm": 0.5178213119506836, "learning_rate": 9.389897353882188e-05, "loss": 1.6834, "step": 3103 }, { "epoch": 0.17301153781840478, "grad_norm": 0.46845757961273193, "learning_rate": 9.389471040007784e-05, "loss": 1.4012, "step": 3104 }, { "epoch": 0.1730672760715679, "grad_norm": 0.5671401619911194, "learning_rate": 9.389044586924953e-05, "loss": 1.7005, "step": 3105 }, { "epoch": 0.17312301432473107, "grad_norm": 0.5250539779663086, "learning_rate": 9.388617994647218e-05, "loss": 1.6934, "step": 3106 }, { "epoch": 0.1731787525778942, "grad_norm": 0.5091891884803772, "learning_rate": 9.388191263188107e-05, "loss": 1.5041, "step": 3107 }, { "epoch": 0.17323449083105735, "grad_norm": 0.5298328995704651, "learning_rate": 9.387764392561153e-05, "loss": 1.6184, "step": 3108 }, { "epoch": 0.1732902290842205, "grad_norm": 0.5605019330978394, "learning_rate": 9.387337382779894e-05, "loss": 1.8302, "step": 3109 }, { "epoch": 0.17334596733738364, "grad_norm": 0.554153561592102, "learning_rate": 9.386910233857875e-05, "loss": 1.6565, "step": 3110 }, { "epoch": 0.1734017055905468, "grad_norm": 0.5952569246292114, "learning_rate": 9.386482945808641e-05, "loss": 1.5957, "step": 3111 }, { "epoch": 0.17345744384370995, "grad_norm": 0.6842632293701172, "learning_rate": 9.386055518645742e-05, "loss": 1.7147, "step": 3112 }, { "epoch": 0.17351318209687308, "grad_norm": 0.6011619567871094, "learning_rate": 9.385627952382736e-05, "loss": 2.0401, "step": 3113 }, { "epoch": 0.17356892035003624, "grad_norm": 0.5976441502571106, "learning_rate": 9.38520024703318e-05, "loss": 1.9242, "step": 3114 }, { "epoch": 0.17362465860319937, "grad_norm": 0.4991317689418793, "learning_rate": 9.38477240261064e-05, "loss": 1.689, "step": 3115 }, { "epoch": 0.17368039685636252, "grad_norm": 0.5823774337768555, "learning_rate": 9.384344419128684e-05, "loss": 1.7896, "step": 3116 }, { "epoch": 0.17373613510952568, "grad_norm": 0.584511399269104, "learning_rate": 9.383916296600886e-05, "loss": 1.7828, "step": 3117 }, { "epoch": 0.1737918733626888, "grad_norm": 0.5839495062828064, "learning_rate": 9.383488035040821e-05, "loss": 1.9487, "step": 3118 }, { "epoch": 0.17384761161585197, "grad_norm": 0.5381820201873779, "learning_rate": 9.383059634462077e-05, "loss": 1.6792, "step": 3119 }, { "epoch": 0.1739033498690151, "grad_norm": 0.5147883892059326, "learning_rate": 9.382631094878234e-05, "loss": 1.6627, "step": 3120 }, { "epoch": 0.17395908812217825, "grad_norm": 0.6467978358268738, "learning_rate": 9.382202416302885e-05, "loss": 1.7446, "step": 3121 }, { "epoch": 0.1740148263753414, "grad_norm": 0.5035672187805176, "learning_rate": 9.381773598749626e-05, "loss": 1.6078, "step": 3122 }, { "epoch": 0.17407056462850454, "grad_norm": 0.5837130546569824, "learning_rate": 9.381344642232056e-05, "loss": 1.792, "step": 3123 }, { "epoch": 0.1741263028816677, "grad_norm": 0.5331088900566101, "learning_rate": 9.380915546763778e-05, "loss": 1.788, "step": 3124 }, { "epoch": 0.17418204113483082, "grad_norm": 0.5427802801132202, "learning_rate": 9.380486312358402e-05, "loss": 1.8515, "step": 3125 }, { "epoch": 0.17423777938799398, "grad_norm": 0.4916117489337921, "learning_rate": 9.380056939029541e-05, "loss": 1.5184, "step": 3126 }, { "epoch": 0.17429351764115714, "grad_norm": 0.559158980846405, "learning_rate": 9.379627426790812e-05, "loss": 1.8659, "step": 3127 }, { "epoch": 0.17434925589432027, "grad_norm": 0.5941457152366638, "learning_rate": 9.379197775655833e-05, "loss": 1.7891, "step": 3128 }, { "epoch": 0.17440499414748342, "grad_norm": 0.4794413447380066, "learning_rate": 9.378767985638235e-05, "loss": 1.4975, "step": 3129 }, { "epoch": 0.17446073240064655, "grad_norm": 0.5934321284294128, "learning_rate": 9.378338056751647e-05, "loss": 1.9019, "step": 3130 }, { "epoch": 0.1745164706538097, "grad_norm": 0.5290476679801941, "learning_rate": 9.377907989009702e-05, "loss": 1.7563, "step": 3131 }, { "epoch": 0.17457220890697286, "grad_norm": 0.5909081101417542, "learning_rate": 9.37747778242604e-05, "loss": 2.009, "step": 3132 }, { "epoch": 0.174627947160136, "grad_norm": 0.5411567687988281, "learning_rate": 9.377047437014308e-05, "loss": 1.8264, "step": 3133 }, { "epoch": 0.17468368541329915, "grad_norm": 0.5046765208244324, "learning_rate": 9.376616952788149e-05, "loss": 1.6131, "step": 3134 }, { "epoch": 0.1747394236664623, "grad_norm": 0.528154194355011, "learning_rate": 9.376186329761219e-05, "loss": 1.7159, "step": 3135 }, { "epoch": 0.17479516191962544, "grad_norm": 0.5536481142044067, "learning_rate": 9.375755567947173e-05, "loss": 1.5203, "step": 3136 }, { "epoch": 0.1748509001727886, "grad_norm": 0.5683685541152954, "learning_rate": 9.375324667359673e-05, "loss": 1.7154, "step": 3137 }, { "epoch": 0.17490663842595172, "grad_norm": 0.4969169497489929, "learning_rate": 9.374893628012384e-05, "loss": 1.7277, "step": 3138 }, { "epoch": 0.17496237667911488, "grad_norm": 0.548058032989502, "learning_rate": 9.374462449918976e-05, "loss": 1.7931, "step": 3139 }, { "epoch": 0.17501811493227803, "grad_norm": 0.5391299724578857, "learning_rate": 9.374031133093124e-05, "loss": 1.8076, "step": 3140 }, { "epoch": 0.17507385318544116, "grad_norm": 0.5356679558753967, "learning_rate": 9.373599677548508e-05, "loss": 1.7212, "step": 3141 }, { "epoch": 0.17512959143860432, "grad_norm": 0.5841724276542664, "learning_rate": 9.373168083298809e-05, "loss": 1.9175, "step": 3142 }, { "epoch": 0.17518532969176745, "grad_norm": 0.5568740963935852, "learning_rate": 9.372736350357717e-05, "loss": 1.842, "step": 3143 }, { "epoch": 0.1752410679449306, "grad_norm": 0.5539031028747559, "learning_rate": 9.372304478738922e-05, "loss": 1.8881, "step": 3144 }, { "epoch": 0.17529680619809376, "grad_norm": 0.5519389510154724, "learning_rate": 9.371872468456122e-05, "loss": 1.7381, "step": 3145 }, { "epoch": 0.1753525444512569, "grad_norm": 0.5324805378913879, "learning_rate": 9.371440319523016e-05, "loss": 1.745, "step": 3146 }, { "epoch": 0.17540828270442005, "grad_norm": 0.5449910759925842, "learning_rate": 9.37100803195331e-05, "loss": 1.8071, "step": 3147 }, { "epoch": 0.17546402095758318, "grad_norm": 0.5846375823020935, "learning_rate": 9.370575605760716e-05, "loss": 1.8659, "step": 3148 }, { "epoch": 0.17551975921074633, "grad_norm": 0.4958127737045288, "learning_rate": 9.370143040958943e-05, "loss": 1.5791, "step": 3149 }, { "epoch": 0.1755754974639095, "grad_norm": 0.5119603276252747, "learning_rate": 9.369710337561714e-05, "loss": 1.7657, "step": 3150 }, { "epoch": 0.17563123571707262, "grad_norm": 0.5698620080947876, "learning_rate": 9.36927749558275e-05, "loss": 2.0541, "step": 3151 }, { "epoch": 0.17568697397023578, "grad_norm": 0.5704925656318665, "learning_rate": 9.368844515035779e-05, "loss": 1.762, "step": 3152 }, { "epoch": 0.1757427122233989, "grad_norm": 0.5676224231719971, "learning_rate": 9.368411395934533e-05, "loss": 1.5928, "step": 3153 }, { "epoch": 0.17579845047656206, "grad_norm": 0.5878868699073792, "learning_rate": 9.367978138292747e-05, "loss": 1.9292, "step": 3154 }, { "epoch": 0.17585418872972522, "grad_norm": 0.5323675274848938, "learning_rate": 9.36754474212416e-05, "loss": 1.8832, "step": 3155 }, { "epoch": 0.17590992698288835, "grad_norm": 0.49846091866493225, "learning_rate": 9.36711120744252e-05, "loss": 1.4679, "step": 3156 }, { "epoch": 0.1759656652360515, "grad_norm": 0.5483475923538208, "learning_rate": 9.366677534261572e-05, "loss": 1.7744, "step": 3157 }, { "epoch": 0.17602140348921466, "grad_norm": 0.5628114938735962, "learning_rate": 9.366243722595074e-05, "loss": 1.8169, "step": 3158 }, { "epoch": 0.1760771417423778, "grad_norm": 0.5500927567481995, "learning_rate": 9.365809772456782e-05, "loss": 1.8714, "step": 3159 }, { "epoch": 0.17613287999554095, "grad_norm": 0.5269673466682434, "learning_rate": 9.365375683860458e-05, "loss": 1.797, "step": 3160 }, { "epoch": 0.17618861824870408, "grad_norm": 0.5542075037956238, "learning_rate": 9.36494145681987e-05, "loss": 1.8027, "step": 3161 }, { "epoch": 0.17624435650186723, "grad_norm": 0.5421326756477356, "learning_rate": 9.364507091348788e-05, "loss": 1.7254, "step": 3162 }, { "epoch": 0.1763000947550304, "grad_norm": 0.4979914128780365, "learning_rate": 9.364072587460988e-05, "loss": 1.7505, "step": 3163 }, { "epoch": 0.17635583300819352, "grad_norm": 0.5363655686378479, "learning_rate": 9.363637945170249e-05, "loss": 1.7651, "step": 3164 }, { "epoch": 0.17641157126135668, "grad_norm": 0.5159875750541687, "learning_rate": 9.363203164490356e-05, "loss": 1.7096, "step": 3165 }, { "epoch": 0.1764673095145198, "grad_norm": 0.590908408164978, "learning_rate": 9.362768245435098e-05, "loss": 2.0557, "step": 3166 }, { "epoch": 0.17652304776768296, "grad_norm": 0.5476133823394775, "learning_rate": 9.362333188018269e-05, "loss": 1.6362, "step": 3167 }, { "epoch": 0.17657878602084612, "grad_norm": 0.5187797546386719, "learning_rate": 9.361897992253665e-05, "loss": 1.6019, "step": 3168 }, { "epoch": 0.17663452427400925, "grad_norm": 0.5152827501296997, "learning_rate": 9.361462658155089e-05, "loss": 1.7042, "step": 3169 }, { "epoch": 0.1766902625271724, "grad_norm": 0.5961150527000427, "learning_rate": 9.361027185736346e-05, "loss": 1.7224, "step": 3170 }, { "epoch": 0.17674600078033553, "grad_norm": 0.5234068632125854, "learning_rate": 9.360591575011245e-05, "loss": 1.6534, "step": 3171 }, { "epoch": 0.1768017390334987, "grad_norm": 0.5417289137840271, "learning_rate": 9.360155825993607e-05, "loss": 1.8964, "step": 3172 }, { "epoch": 0.17685747728666185, "grad_norm": 0.535892903804779, "learning_rate": 9.359719938697246e-05, "loss": 1.7262, "step": 3173 }, { "epoch": 0.17691321553982498, "grad_norm": 0.5440612435340881, "learning_rate": 9.359283913135988e-05, "loss": 1.7775, "step": 3174 }, { "epoch": 0.17696895379298813, "grad_norm": 0.6108183264732361, "learning_rate": 9.358847749323659e-05, "loss": 1.9756, "step": 3175 }, { "epoch": 0.17702469204615126, "grad_norm": 0.5500672459602356, "learning_rate": 9.358411447274094e-05, "loss": 1.7427, "step": 3176 }, { "epoch": 0.17708043029931442, "grad_norm": 0.5370178818702698, "learning_rate": 9.357975007001129e-05, "loss": 1.8076, "step": 3177 }, { "epoch": 0.17713616855247757, "grad_norm": 0.5063850283622742, "learning_rate": 9.357538428518607e-05, "loss": 1.6826, "step": 3178 }, { "epoch": 0.1771919068056407, "grad_norm": 0.5165611505508423, "learning_rate": 9.357101711840372e-05, "loss": 1.6545, "step": 3179 }, { "epoch": 0.17724764505880386, "grad_norm": 0.521656334400177, "learning_rate": 9.356664856980273e-05, "loss": 1.4337, "step": 3180 }, { "epoch": 0.17730338331196702, "grad_norm": 0.527341902256012, "learning_rate": 9.356227863952168e-05, "loss": 1.7241, "step": 3181 }, { "epoch": 0.17735912156513015, "grad_norm": 0.494210422039032, "learning_rate": 9.355790732769911e-05, "loss": 1.5474, "step": 3182 }, { "epoch": 0.1774148598182933, "grad_norm": 0.5171836614608765, "learning_rate": 9.35535346344737e-05, "loss": 1.5795, "step": 3183 }, { "epoch": 0.17747059807145643, "grad_norm": 0.5571975111961365, "learning_rate": 9.354916055998409e-05, "loss": 1.8093, "step": 3184 }, { "epoch": 0.1775263363246196, "grad_norm": 0.5996416807174683, "learning_rate": 9.354478510436902e-05, "loss": 2.0041, "step": 3185 }, { "epoch": 0.17758207457778274, "grad_norm": 0.4972604811191559, "learning_rate": 9.354040826776727e-05, "loss": 1.7329, "step": 3186 }, { "epoch": 0.17763781283094587, "grad_norm": 0.5599552392959595, "learning_rate": 9.35360300503176e-05, "loss": 1.7857, "step": 3187 }, { "epoch": 0.17769355108410903, "grad_norm": 0.5476880669593811, "learning_rate": 9.35316504521589e-05, "loss": 1.7757, "step": 3188 }, { "epoch": 0.17774928933727216, "grad_norm": 0.5362497568130493, "learning_rate": 9.352726947343006e-05, "loss": 1.7656, "step": 3189 }, { "epoch": 0.17780502759043532, "grad_norm": 0.5269262194633484, "learning_rate": 9.352288711427001e-05, "loss": 1.7716, "step": 3190 }, { "epoch": 0.17786076584359847, "grad_norm": 0.5733572244644165, "learning_rate": 9.351850337481773e-05, "loss": 1.7623, "step": 3191 }, { "epoch": 0.1779165040967616, "grad_norm": 0.5491241812705994, "learning_rate": 9.351411825521228e-05, "loss": 1.7835, "step": 3192 }, { "epoch": 0.17797224234992476, "grad_norm": 0.5553460121154785, "learning_rate": 9.350973175559267e-05, "loss": 1.9064, "step": 3193 }, { "epoch": 0.1780279806030879, "grad_norm": 0.5257185101509094, "learning_rate": 9.350534387609807e-05, "loss": 1.7245, "step": 3194 }, { "epoch": 0.17808371885625104, "grad_norm": 0.5201014876365662, "learning_rate": 9.35009546168676e-05, "loss": 1.6991, "step": 3195 }, { "epoch": 0.1781394571094142, "grad_norm": 0.5365905165672302, "learning_rate": 9.34965639780405e-05, "loss": 1.747, "step": 3196 }, { "epoch": 0.17819519536257733, "grad_norm": 0.5471792221069336, "learning_rate": 9.349217195975598e-05, "loss": 1.8114, "step": 3197 }, { "epoch": 0.1782509336157405, "grad_norm": 0.5407313704490662, "learning_rate": 9.348777856215334e-05, "loss": 1.7719, "step": 3198 }, { "epoch": 0.17830667186890362, "grad_norm": 0.5418484807014465, "learning_rate": 9.348338378537192e-05, "loss": 1.7989, "step": 3199 }, { "epoch": 0.17836241012206677, "grad_norm": 0.5235376954078674, "learning_rate": 9.347898762955109e-05, "loss": 1.5998, "step": 3200 }, { "epoch": 0.17841814837522993, "grad_norm": 0.5582895874977112, "learning_rate": 9.347459009483028e-05, "loss": 1.7352, "step": 3201 }, { "epoch": 0.17847388662839306, "grad_norm": 0.5512102246284485, "learning_rate": 9.347019118134893e-05, "loss": 1.8595, "step": 3202 }, { "epoch": 0.17852962488155621, "grad_norm": 0.5874474048614502, "learning_rate": 9.346579088924658e-05, "loss": 1.8312, "step": 3203 }, { "epoch": 0.17858536313471937, "grad_norm": 0.5523637533187866, "learning_rate": 9.346138921866276e-05, "loss": 1.9124, "step": 3204 }, { "epoch": 0.1786411013878825, "grad_norm": 0.5245184898376465, "learning_rate": 9.345698616973707e-05, "loss": 1.8279, "step": 3205 }, { "epoch": 0.17869683964104566, "grad_norm": 0.5538264513015747, "learning_rate": 9.345258174260915e-05, "loss": 1.8218, "step": 3206 }, { "epoch": 0.1787525778942088, "grad_norm": 0.5474498271942139, "learning_rate": 9.344817593741868e-05, "loss": 1.6772, "step": 3207 }, { "epoch": 0.17880831614737194, "grad_norm": 0.5437337756156921, "learning_rate": 9.344376875430539e-05, "loss": 1.8402, "step": 3208 }, { "epoch": 0.1788640544005351, "grad_norm": 0.6069798469543457, "learning_rate": 9.343936019340906e-05, "loss": 2.0245, "step": 3209 }, { "epoch": 0.17891979265369823, "grad_norm": 0.5451731085777283, "learning_rate": 9.343495025486948e-05, "loss": 1.7243, "step": 3210 }, { "epoch": 0.17897553090686139, "grad_norm": 0.5307853817939758, "learning_rate": 9.343053893882654e-05, "loss": 1.8062, "step": 3211 }, { "epoch": 0.17903126916002451, "grad_norm": 0.5642760992050171, "learning_rate": 9.34261262454201e-05, "loss": 1.9111, "step": 3212 }, { "epoch": 0.17908700741318767, "grad_norm": 0.5641029477119446, "learning_rate": 9.342171217479014e-05, "loss": 1.892, "step": 3213 }, { "epoch": 0.17914274566635083, "grad_norm": 0.5118708610534668, "learning_rate": 9.341729672707664e-05, "loss": 1.7303, "step": 3214 }, { "epoch": 0.17919848391951396, "grad_norm": 0.5048193335533142, "learning_rate": 9.341287990241962e-05, "loss": 1.5011, "step": 3215 }, { "epoch": 0.1792542221726771, "grad_norm": 0.5508407950401306, "learning_rate": 9.340846170095917e-05, "loss": 1.8355, "step": 3216 }, { "epoch": 0.17930996042584024, "grad_norm": 0.5779476165771484, "learning_rate": 9.34040421228354e-05, "loss": 1.8892, "step": 3217 }, { "epoch": 0.1793656986790034, "grad_norm": 0.5211353898048401, "learning_rate": 9.339962116818848e-05, "loss": 1.6359, "step": 3218 }, { "epoch": 0.17942143693216656, "grad_norm": 0.5479955077171326, "learning_rate": 9.339519883715862e-05, "loss": 1.7594, "step": 3219 }, { "epoch": 0.17947717518532968, "grad_norm": 0.49651384353637695, "learning_rate": 9.339077512988606e-05, "loss": 1.5873, "step": 3220 }, { "epoch": 0.17953291343849284, "grad_norm": 0.569810152053833, "learning_rate": 9.338635004651108e-05, "loss": 1.6675, "step": 3221 }, { "epoch": 0.17958865169165597, "grad_norm": 0.5437332987785339, "learning_rate": 9.338192358717406e-05, "loss": 1.8268, "step": 3222 }, { "epoch": 0.17964438994481913, "grad_norm": 0.5670780539512634, "learning_rate": 9.337749575201535e-05, "loss": 1.6647, "step": 3223 }, { "epoch": 0.17970012819798228, "grad_norm": 0.5969633460044861, "learning_rate": 9.337306654117538e-05, "loss": 1.7202, "step": 3224 }, { "epoch": 0.1797558664511454, "grad_norm": 0.48552221059799194, "learning_rate": 9.336863595479462e-05, "loss": 1.4645, "step": 3225 }, { "epoch": 0.17981160470430857, "grad_norm": 0.5412662625312805, "learning_rate": 9.33642039930136e-05, "loss": 1.8443, "step": 3226 }, { "epoch": 0.17986734295747173, "grad_norm": 0.5973519682884216, "learning_rate": 9.335977065597285e-05, "loss": 1.98, "step": 3227 }, { "epoch": 0.17992308121063486, "grad_norm": 0.5288311243057251, "learning_rate": 9.335533594381297e-05, "loss": 1.5549, "step": 3228 }, { "epoch": 0.179978819463798, "grad_norm": 0.5504105687141418, "learning_rate": 9.335089985667463e-05, "loss": 1.5479, "step": 3229 }, { "epoch": 0.18003455771696114, "grad_norm": 0.4889037609100342, "learning_rate": 9.334646239469848e-05, "loss": 1.7899, "step": 3230 }, { "epoch": 0.1800902959701243, "grad_norm": 0.5372660756111145, "learning_rate": 9.334202355802528e-05, "loss": 1.7351, "step": 3231 }, { "epoch": 0.18014603422328745, "grad_norm": 0.5164480209350586, "learning_rate": 9.333758334679581e-05, "loss": 1.6461, "step": 3232 }, { "epoch": 0.18020177247645058, "grad_norm": 0.539726972579956, "learning_rate": 9.333314176115084e-05, "loss": 1.6368, "step": 3233 }, { "epoch": 0.18025751072961374, "grad_norm": 0.6785762310028076, "learning_rate": 9.33286988012313e-05, "loss": 2.0446, "step": 3234 }, { "epoch": 0.18031324898277687, "grad_norm": 0.580847442150116, "learning_rate": 9.332425446717803e-05, "loss": 1.8455, "step": 3235 }, { "epoch": 0.18036898723594003, "grad_norm": 0.5236613154411316, "learning_rate": 9.331980875913202e-05, "loss": 1.4925, "step": 3236 }, { "epoch": 0.18042472548910318, "grad_norm": 0.5626049637794495, "learning_rate": 9.331536167723423e-05, "loss": 1.7695, "step": 3237 }, { "epoch": 0.1804804637422663, "grad_norm": 0.5435861349105835, "learning_rate": 9.331091322162573e-05, "loss": 1.8594, "step": 3238 }, { "epoch": 0.18053620199542947, "grad_norm": 0.5868507027626038, "learning_rate": 9.330646339244759e-05, "loss": 1.8194, "step": 3239 }, { "epoch": 0.1805919402485926, "grad_norm": 0.5488845705986023, "learning_rate": 9.330201218984092e-05, "loss": 1.6584, "step": 3240 }, { "epoch": 0.18064767850175575, "grad_norm": 0.5238907933235168, "learning_rate": 9.329755961394688e-05, "loss": 1.757, "step": 3241 }, { "epoch": 0.1807034167549189, "grad_norm": 0.5120671987533569, "learning_rate": 9.32931056649067e-05, "loss": 1.6786, "step": 3242 }, { "epoch": 0.18075915500808204, "grad_norm": 0.49454161524772644, "learning_rate": 9.328865034286161e-05, "loss": 1.457, "step": 3243 }, { "epoch": 0.1808148932612452, "grad_norm": 0.5296444892883301, "learning_rate": 9.328419364795295e-05, "loss": 1.691, "step": 3244 }, { "epoch": 0.18087063151440833, "grad_norm": 0.5104671120643616, "learning_rate": 9.327973558032201e-05, "loss": 1.6702, "step": 3245 }, { "epoch": 0.18092636976757148, "grad_norm": 0.5683085322380066, "learning_rate": 9.32752761401102e-05, "loss": 1.6912, "step": 3246 }, { "epoch": 0.18098210802073464, "grad_norm": 0.5360772609710693, "learning_rate": 9.327081532745896e-05, "loss": 1.7894, "step": 3247 }, { "epoch": 0.18103784627389777, "grad_norm": 0.6272693872451782, "learning_rate": 9.326635314250971e-05, "loss": 2.0331, "step": 3248 }, { "epoch": 0.18109358452706092, "grad_norm": 0.5494347810745239, "learning_rate": 9.326188958540403e-05, "loss": 1.8261, "step": 3249 }, { "epoch": 0.18114932278022408, "grad_norm": 0.5473103523254395, "learning_rate": 9.325742465628342e-05, "loss": 1.5244, "step": 3250 }, { "epoch": 0.1812050610333872, "grad_norm": 0.5626412034034729, "learning_rate": 9.325295835528953e-05, "loss": 1.8512, "step": 3251 }, { "epoch": 0.18126079928655037, "grad_norm": 0.5165623426437378, "learning_rate": 9.324849068256397e-05, "loss": 1.8405, "step": 3252 }, { "epoch": 0.1813165375397135, "grad_norm": 0.5183326601982117, "learning_rate": 9.324402163824846e-05, "loss": 1.7193, "step": 3253 }, { "epoch": 0.18137227579287665, "grad_norm": 0.5188653469085693, "learning_rate": 9.323955122248468e-05, "loss": 1.6715, "step": 3254 }, { "epoch": 0.1814280140460398, "grad_norm": 0.5316330194473267, "learning_rate": 9.323507943541447e-05, "loss": 1.5796, "step": 3255 }, { "epoch": 0.18148375229920294, "grad_norm": 0.5456557869911194, "learning_rate": 9.323060627717961e-05, "loss": 1.7856, "step": 3256 }, { "epoch": 0.1815394905523661, "grad_norm": 0.5671826004981995, "learning_rate": 9.322613174792197e-05, "loss": 1.7715, "step": 3257 }, { "epoch": 0.18159522880552922, "grad_norm": 0.5530715584754944, "learning_rate": 9.322165584778347e-05, "loss": 1.9437, "step": 3258 }, { "epoch": 0.18165096705869238, "grad_norm": 0.5097282528877258, "learning_rate": 9.321717857690601e-05, "loss": 1.5789, "step": 3259 }, { "epoch": 0.18170670531185554, "grad_norm": 0.5106785297393799, "learning_rate": 9.321269993543166e-05, "loss": 1.7718, "step": 3260 }, { "epoch": 0.18176244356501867, "grad_norm": 0.5174189209938049, "learning_rate": 9.320821992350239e-05, "loss": 1.6088, "step": 3261 }, { "epoch": 0.18181818181818182, "grad_norm": 0.5284159779548645, "learning_rate": 9.320373854126032e-05, "loss": 1.6355, "step": 3262 }, { "epoch": 0.18187392007134495, "grad_norm": 0.5431947708129883, "learning_rate": 9.319925578884754e-05, "loss": 1.8282, "step": 3263 }, { "epoch": 0.1819296583245081, "grad_norm": 0.5244488716125488, "learning_rate": 9.319477166640626e-05, "loss": 1.8765, "step": 3264 }, { "epoch": 0.18198539657767127, "grad_norm": 0.5338707566261292, "learning_rate": 9.319028617407865e-05, "loss": 1.7684, "step": 3265 }, { "epoch": 0.1820411348308344, "grad_norm": 0.5929536819458008, "learning_rate": 9.318579931200697e-05, "loss": 1.9083, "step": 3266 }, { "epoch": 0.18209687308399755, "grad_norm": 0.5214221477508545, "learning_rate": 9.318131108033355e-05, "loss": 1.6844, "step": 3267 }, { "epoch": 0.18215261133716068, "grad_norm": 0.5370472073554993, "learning_rate": 9.31768214792007e-05, "loss": 1.9451, "step": 3268 }, { "epoch": 0.18220834959032384, "grad_norm": 0.5181378722190857, "learning_rate": 9.31723305087508e-05, "loss": 1.7534, "step": 3269 }, { "epoch": 0.182264087843487, "grad_norm": 0.5766522884368896, "learning_rate": 9.316783816912629e-05, "loss": 1.876, "step": 3270 }, { "epoch": 0.18231982609665012, "grad_norm": 0.5224177241325378, "learning_rate": 9.316334446046966e-05, "loss": 1.7254, "step": 3271 }, { "epoch": 0.18237556434981328, "grad_norm": 0.5871415138244629, "learning_rate": 9.315884938292339e-05, "loss": 1.6292, "step": 3272 }, { "epoch": 0.18243130260297644, "grad_norm": 0.5917293429374695, "learning_rate": 9.315435293663005e-05, "loss": 2.0649, "step": 3273 }, { "epoch": 0.18248704085613957, "grad_norm": 0.5843697190284729, "learning_rate": 9.314985512173223e-05, "loss": 1.8282, "step": 3274 }, { "epoch": 0.18254277910930272, "grad_norm": 0.5423409938812256, "learning_rate": 9.31453559383726e-05, "loss": 1.7501, "step": 3275 }, { "epoch": 0.18259851736246585, "grad_norm": 0.5610026717185974, "learning_rate": 9.314085538669383e-05, "loss": 1.7287, "step": 3276 }, { "epoch": 0.182654255615629, "grad_norm": 0.5071337223052979, "learning_rate": 9.313635346683865e-05, "loss": 1.6779, "step": 3277 }, { "epoch": 0.18270999386879216, "grad_norm": 0.5492652058601379, "learning_rate": 9.313185017894985e-05, "loss": 1.7884, "step": 3278 }, { "epoch": 0.1827657321219553, "grad_norm": 0.4901118874549866, "learning_rate": 9.312734552317023e-05, "loss": 1.5747, "step": 3279 }, { "epoch": 0.18282147037511845, "grad_norm": 0.515848696231842, "learning_rate": 9.312283949964267e-05, "loss": 1.4992, "step": 3280 }, { "epoch": 0.18287720862828158, "grad_norm": 0.497324675321579, "learning_rate": 9.311833210851007e-05, "loss": 1.5226, "step": 3281 }, { "epoch": 0.18293294688144474, "grad_norm": 0.5232150554656982, "learning_rate": 9.311382334991536e-05, "loss": 1.6106, "step": 3282 }, { "epoch": 0.1829886851346079, "grad_norm": 0.6029054522514343, "learning_rate": 9.310931322400156e-05, "loss": 1.9531, "step": 3283 }, { "epoch": 0.18304442338777102, "grad_norm": 0.70119708776474, "learning_rate": 9.310480173091168e-05, "loss": 1.9566, "step": 3284 }, { "epoch": 0.18310016164093418, "grad_norm": 0.5252953767776489, "learning_rate": 9.31002888707888e-05, "loss": 1.8004, "step": 3285 }, { "epoch": 0.1831558998940973, "grad_norm": 0.5744017958641052, "learning_rate": 9.309577464377606e-05, "loss": 1.8203, "step": 3286 }, { "epoch": 0.18321163814726046, "grad_norm": 0.5286086797714233, "learning_rate": 9.309125905001659e-05, "loss": 1.8127, "step": 3287 }, { "epoch": 0.18326737640042362, "grad_norm": 0.5180408954620361, "learning_rate": 9.308674208965364e-05, "loss": 1.5432, "step": 3288 }, { "epoch": 0.18332311465358675, "grad_norm": 0.568420946598053, "learning_rate": 9.308222376283045e-05, "loss": 1.853, "step": 3289 }, { "epoch": 0.1833788529067499, "grad_norm": 0.9352191090583801, "learning_rate": 9.30777040696903e-05, "loss": 1.531, "step": 3290 }, { "epoch": 0.18343459115991304, "grad_norm": 0.5612093210220337, "learning_rate": 9.307318301037656e-05, "loss": 2.0149, "step": 3291 }, { "epoch": 0.1834903294130762, "grad_norm": 0.5616469979286194, "learning_rate": 9.306866058503257e-05, "loss": 1.6388, "step": 3292 }, { "epoch": 0.18354606766623935, "grad_norm": 0.5579656958580017, "learning_rate": 9.306413679380177e-05, "loss": 1.8719, "step": 3293 }, { "epoch": 0.18360180591940248, "grad_norm": 0.5343957543373108, "learning_rate": 9.305961163682764e-05, "loss": 1.7592, "step": 3294 }, { "epoch": 0.18365754417256563, "grad_norm": 0.5974972248077393, "learning_rate": 9.305508511425367e-05, "loss": 1.834, "step": 3295 }, { "epoch": 0.1837132824257288, "grad_norm": 0.5827033519744873, "learning_rate": 9.305055722622344e-05, "loss": 1.8606, "step": 3296 }, { "epoch": 0.18376902067889192, "grad_norm": 0.5568636059761047, "learning_rate": 9.304602797288054e-05, "loss": 1.8952, "step": 3297 }, { "epoch": 0.18382475893205508, "grad_norm": 0.6066376566886902, "learning_rate": 9.30414973543686e-05, "loss": 1.9215, "step": 3298 }, { "epoch": 0.1838804971852182, "grad_norm": 0.5111042261123657, "learning_rate": 9.303696537083132e-05, "loss": 1.5506, "step": 3299 }, { "epoch": 0.18393623543838136, "grad_norm": 0.501711905002594, "learning_rate": 9.303243202241242e-05, "loss": 1.5003, "step": 3300 }, { "epoch": 0.18399197369154452, "grad_norm": 0.543425977230072, "learning_rate": 9.302789730925567e-05, "loss": 1.5837, "step": 3301 }, { "epoch": 0.18404771194470765, "grad_norm": 0.5619440674781799, "learning_rate": 9.30233612315049e-05, "loss": 1.8285, "step": 3302 }, { "epoch": 0.1841034501978708, "grad_norm": 0.5294018387794495, "learning_rate": 9.301882378930394e-05, "loss": 1.6032, "step": 3303 }, { "epoch": 0.18415918845103393, "grad_norm": 0.6101817488670349, "learning_rate": 9.301428498279671e-05, "loss": 1.9998, "step": 3304 }, { "epoch": 0.1842149267041971, "grad_norm": 0.5133767127990723, "learning_rate": 9.300974481212715e-05, "loss": 1.6816, "step": 3305 }, { "epoch": 0.18427066495736025, "grad_norm": 0.5289322137832642, "learning_rate": 9.300520327743924e-05, "loss": 1.4649, "step": 3306 }, { "epoch": 0.18432640321052338, "grad_norm": 0.5560780763626099, "learning_rate": 9.300066037887704e-05, "loss": 1.6704, "step": 3307 }, { "epoch": 0.18438214146368653, "grad_norm": 0.5855201482772827, "learning_rate": 9.29961161165846e-05, "loss": 1.9368, "step": 3308 }, { "epoch": 0.18443787971684966, "grad_norm": 0.5227165818214417, "learning_rate": 9.299157049070603e-05, "loss": 1.663, "step": 3309 }, { "epoch": 0.18449361797001282, "grad_norm": 0.555633008480072, "learning_rate": 9.298702350138551e-05, "loss": 1.6634, "step": 3310 }, { "epoch": 0.18454935622317598, "grad_norm": 0.5284892916679382, "learning_rate": 9.298247514876724e-05, "loss": 1.7772, "step": 3311 }, { "epoch": 0.1846050944763391, "grad_norm": 0.5455605983734131, "learning_rate": 9.297792543299545e-05, "loss": 1.7826, "step": 3312 }, { "epoch": 0.18466083272950226, "grad_norm": 0.6630359292030334, "learning_rate": 9.297337435421446e-05, "loss": 2.0859, "step": 3313 }, { "epoch": 0.1847165709826654, "grad_norm": 0.4958614408969879, "learning_rate": 9.296882191256857e-05, "loss": 1.6861, "step": 3314 }, { "epoch": 0.18477230923582855, "grad_norm": 0.506952702999115, "learning_rate": 9.29642681082022e-05, "loss": 1.6616, "step": 3315 }, { "epoch": 0.1848280474889917, "grad_norm": 0.5598859190940857, "learning_rate": 9.295971294125973e-05, "loss": 1.8831, "step": 3316 }, { "epoch": 0.18488378574215483, "grad_norm": 0.5533158183097839, "learning_rate": 9.295515641188563e-05, "loss": 1.6373, "step": 3317 }, { "epoch": 0.184939523995318, "grad_norm": 0.5264914035797119, "learning_rate": 9.295059852022443e-05, "loss": 1.6668, "step": 3318 }, { "epoch": 0.18499526224848115, "grad_norm": 0.542248010635376, "learning_rate": 9.294603926642064e-05, "loss": 1.5566, "step": 3319 }, { "epoch": 0.18505100050164427, "grad_norm": 0.5599246621131897, "learning_rate": 9.294147865061891e-05, "loss": 1.8183, "step": 3320 }, { "epoch": 0.18510673875480743, "grad_norm": 0.48394709825515747, "learning_rate": 9.293691667296382e-05, "loss": 1.4792, "step": 3321 }, { "epoch": 0.18516247700797056, "grad_norm": 0.5670637488365173, "learning_rate": 9.293235333360009e-05, "loss": 1.8202, "step": 3322 }, { "epoch": 0.18521821526113372, "grad_norm": 0.5079344511032104, "learning_rate": 9.29277886326724e-05, "loss": 1.698, "step": 3323 }, { "epoch": 0.18527395351429687, "grad_norm": 0.6303577423095703, "learning_rate": 9.292322257032555e-05, "loss": 1.8882, "step": 3324 }, { "epoch": 0.18532969176746, "grad_norm": 0.5548877716064453, "learning_rate": 9.291865514670435e-05, "loss": 1.8684, "step": 3325 }, { "epoch": 0.18538543002062316, "grad_norm": 0.5407868027687073, "learning_rate": 9.291408636195364e-05, "loss": 1.7726, "step": 3326 }, { "epoch": 0.1854411682737863, "grad_norm": 0.5434556007385254, "learning_rate": 9.29095162162183e-05, "loss": 1.8152, "step": 3327 }, { "epoch": 0.18549690652694945, "grad_norm": 0.5405827164649963, "learning_rate": 9.290494470964332e-05, "loss": 1.7364, "step": 3328 }, { "epoch": 0.1855526447801126, "grad_norm": 0.4682316184043884, "learning_rate": 9.290037184237362e-05, "loss": 1.6331, "step": 3329 }, { "epoch": 0.18560838303327573, "grad_norm": 0.5418784618377686, "learning_rate": 9.289579761455426e-05, "loss": 1.9186, "step": 3330 }, { "epoch": 0.1856641212864389, "grad_norm": 0.6001595854759216, "learning_rate": 9.289122202633029e-05, "loss": 1.8436, "step": 3331 }, { "epoch": 0.18571985953960202, "grad_norm": 0.5514225363731384, "learning_rate": 9.288664507784686e-05, "loss": 1.8193, "step": 3332 }, { "epoch": 0.18577559779276517, "grad_norm": 0.5329412817955017, "learning_rate": 9.288206676924906e-05, "loss": 1.5945, "step": 3333 }, { "epoch": 0.18583133604592833, "grad_norm": 0.5613374710083008, "learning_rate": 9.287748710068214e-05, "loss": 1.8746, "step": 3334 }, { "epoch": 0.18588707429909146, "grad_norm": 0.5720524191856384, "learning_rate": 9.287290607229131e-05, "loss": 1.6635, "step": 3335 }, { "epoch": 0.18594281255225462, "grad_norm": 0.5446194410324097, "learning_rate": 9.286832368422187e-05, "loss": 1.6587, "step": 3336 }, { "epoch": 0.18599855080541774, "grad_norm": 0.5358483791351318, "learning_rate": 9.286373993661916e-05, "loss": 1.8244, "step": 3337 }, { "epoch": 0.1860542890585809, "grad_norm": 0.5477625727653503, "learning_rate": 9.28591548296285e-05, "loss": 1.8085, "step": 3338 }, { "epoch": 0.18611002731174406, "grad_norm": 0.528417706489563, "learning_rate": 9.285456836339537e-05, "loss": 1.7652, "step": 3339 }, { "epoch": 0.1861657655649072, "grad_norm": 0.5157662630081177, "learning_rate": 9.284998053806516e-05, "loss": 1.7365, "step": 3340 }, { "epoch": 0.18622150381807034, "grad_norm": 0.5836164951324463, "learning_rate": 9.284539135378341e-05, "loss": 1.8217, "step": 3341 }, { "epoch": 0.1862772420712335, "grad_norm": 0.5283136963844299, "learning_rate": 9.284080081069565e-05, "loss": 1.7073, "step": 3342 }, { "epoch": 0.18633298032439663, "grad_norm": 0.5611073970794678, "learning_rate": 9.283620890894749e-05, "loss": 1.6885, "step": 3343 }, { "epoch": 0.1863887185775598, "grad_norm": 0.5854252576828003, "learning_rate": 9.283161564868452e-05, "loss": 1.8512, "step": 3344 }, { "epoch": 0.18644445683072292, "grad_norm": 0.5314401984214783, "learning_rate": 9.282702103005243e-05, "loss": 1.8003, "step": 3345 }, { "epoch": 0.18650019508388607, "grad_norm": 0.5689622759819031, "learning_rate": 9.282242505319693e-05, "loss": 1.7775, "step": 3346 }, { "epoch": 0.18655593333704923, "grad_norm": 0.5099941492080688, "learning_rate": 9.281782771826378e-05, "loss": 1.4253, "step": 3347 }, { "epoch": 0.18661167159021236, "grad_norm": 0.557032585144043, "learning_rate": 9.281322902539878e-05, "loss": 1.7682, "step": 3348 }, { "epoch": 0.18666740984337551, "grad_norm": 0.5229087471961975, "learning_rate": 9.280862897474776e-05, "loss": 1.5904, "step": 3349 }, { "epoch": 0.18672314809653864, "grad_norm": 0.5913739800453186, "learning_rate": 9.280402756645663e-05, "loss": 1.9147, "step": 3350 }, { "epoch": 0.1867788863497018, "grad_norm": 0.5528784990310669, "learning_rate": 9.279942480067131e-05, "loss": 1.7212, "step": 3351 }, { "epoch": 0.18683462460286496, "grad_norm": 0.5475696921348572, "learning_rate": 9.279482067753777e-05, "loss": 1.8177, "step": 3352 }, { "epoch": 0.18689036285602809, "grad_norm": 0.5523363947868347, "learning_rate": 9.279021519720203e-05, "loss": 1.7726, "step": 3353 }, { "epoch": 0.18694610110919124, "grad_norm": 0.4846109449863434, "learning_rate": 9.278560835981016e-05, "loss": 1.7335, "step": 3354 }, { "epoch": 0.18700183936235437, "grad_norm": 0.5322748422622681, "learning_rate": 9.278100016550825e-05, "loss": 1.8071, "step": 3355 }, { "epoch": 0.18705757761551753, "grad_norm": 0.5510337352752686, "learning_rate": 9.277639061444244e-05, "loss": 1.7673, "step": 3356 }, { "epoch": 0.18711331586868069, "grad_norm": 0.5218777060508728, "learning_rate": 9.277177970675893e-05, "loss": 1.686, "step": 3357 }, { "epoch": 0.18716905412184381, "grad_norm": 0.5483778715133667, "learning_rate": 9.276716744260392e-05, "loss": 1.8069, "step": 3358 }, { "epoch": 0.18722479237500697, "grad_norm": 0.5690082907676697, "learning_rate": 9.276255382212373e-05, "loss": 1.7838, "step": 3359 }, { "epoch": 0.1872805306281701, "grad_norm": 0.5564740896224976, "learning_rate": 9.275793884546465e-05, "loss": 1.6363, "step": 3360 }, { "epoch": 0.18733626888133326, "grad_norm": 0.5689534544944763, "learning_rate": 9.275332251277305e-05, "loss": 1.7624, "step": 3361 }, { "epoch": 0.1873920071344964, "grad_norm": 0.5340893864631653, "learning_rate": 9.274870482419533e-05, "loss": 1.785, "step": 3362 }, { "epoch": 0.18744774538765954, "grad_norm": 0.556954562664032, "learning_rate": 9.274408577987792e-05, "loss": 1.7629, "step": 3363 }, { "epoch": 0.1875034836408227, "grad_norm": 0.5275453329086304, "learning_rate": 9.273946537996734e-05, "loss": 1.6675, "step": 3364 }, { "epoch": 0.18755922189398586, "grad_norm": 0.5510149598121643, "learning_rate": 9.273484362461011e-05, "loss": 1.8703, "step": 3365 }, { "epoch": 0.18761496014714898, "grad_norm": 0.5040173530578613, "learning_rate": 9.273022051395278e-05, "loss": 1.646, "step": 3366 }, { "epoch": 0.18767069840031214, "grad_norm": 0.5532334446907043, "learning_rate": 9.272559604814201e-05, "loss": 1.7221, "step": 3367 }, { "epoch": 0.18772643665347527, "grad_norm": 0.5305314064025879, "learning_rate": 9.272097022732443e-05, "loss": 1.5933, "step": 3368 }, { "epoch": 0.18778217490663843, "grad_norm": 0.5466606020927429, "learning_rate": 9.271634305164675e-05, "loss": 1.7312, "step": 3369 }, { "epoch": 0.18783791315980158, "grad_norm": 0.5373468995094299, "learning_rate": 9.271171452125575e-05, "loss": 1.7442, "step": 3370 }, { "epoch": 0.1878936514129647, "grad_norm": 0.5270282626152039, "learning_rate": 9.270708463629815e-05, "loss": 1.7939, "step": 3371 }, { "epoch": 0.18794938966612787, "grad_norm": 0.5657024383544922, "learning_rate": 9.270245339692086e-05, "loss": 1.8941, "step": 3372 }, { "epoch": 0.188005127919291, "grad_norm": 0.5092267990112305, "learning_rate": 9.269782080327071e-05, "loss": 1.6895, "step": 3373 }, { "epoch": 0.18806086617245416, "grad_norm": 0.5645020008087158, "learning_rate": 9.269318685549463e-05, "loss": 1.6734, "step": 3374 }, { "epoch": 0.1881166044256173, "grad_norm": 0.5031103491783142, "learning_rate": 9.268855155373957e-05, "loss": 1.848, "step": 3375 }, { "epoch": 0.18817234267878044, "grad_norm": 0.5133728981018066, "learning_rate": 9.268391489815257e-05, "loss": 1.4297, "step": 3376 }, { "epoch": 0.1882280809319436, "grad_norm": 0.5471519231796265, "learning_rate": 9.267927688888062e-05, "loss": 1.8073, "step": 3377 }, { "epoch": 0.18828381918510673, "grad_norm": 0.545860230922699, "learning_rate": 9.267463752607089e-05, "loss": 1.751, "step": 3378 }, { "epoch": 0.18833955743826988, "grad_norm": 0.4829151928424835, "learning_rate": 9.266999680987043e-05, "loss": 1.498, "step": 3379 }, { "epoch": 0.18839529569143304, "grad_norm": 0.5440730452537537, "learning_rate": 9.266535474042647e-05, "loss": 1.4733, "step": 3380 }, { "epoch": 0.18845103394459617, "grad_norm": 0.7026723623275757, "learning_rate": 9.266071131788621e-05, "loss": 1.904, "step": 3381 }, { "epoch": 0.18850677219775933, "grad_norm": 0.49864065647125244, "learning_rate": 9.26560665423969e-05, "loss": 1.8644, "step": 3382 }, { "epoch": 0.18856251045092245, "grad_norm": 0.5199279189109802, "learning_rate": 9.265142041410589e-05, "loss": 1.6917, "step": 3383 }, { "epoch": 0.1886182487040856, "grad_norm": 0.5546734929084778, "learning_rate": 9.26467729331605e-05, "loss": 1.7944, "step": 3384 }, { "epoch": 0.18867398695724877, "grad_norm": 0.5777541399002075, "learning_rate": 9.26421240997081e-05, "loss": 1.9372, "step": 3385 }, { "epoch": 0.1887297252104119, "grad_norm": 0.6016680598258972, "learning_rate": 9.263747391389615e-05, "loss": 1.9591, "step": 3386 }, { "epoch": 0.18878546346357505, "grad_norm": 0.5046743154525757, "learning_rate": 9.263282237587213e-05, "loss": 1.5718, "step": 3387 }, { "epoch": 0.1888412017167382, "grad_norm": 0.5458966493606567, "learning_rate": 9.262816948578354e-05, "loss": 1.7829, "step": 3388 }, { "epoch": 0.18889693996990134, "grad_norm": 0.5983991026878357, "learning_rate": 9.262351524377797e-05, "loss": 1.8848, "step": 3389 }, { "epoch": 0.1889526782230645, "grad_norm": 0.5047475099563599, "learning_rate": 9.261885965000298e-05, "loss": 1.3356, "step": 3390 }, { "epoch": 0.18900841647622763, "grad_norm": 0.5353848338127136, "learning_rate": 9.261420270460628e-05, "loss": 1.7632, "step": 3391 }, { "epoch": 0.18906415472939078, "grad_norm": 0.5097886919975281, "learning_rate": 9.26095444077355e-05, "loss": 1.6608, "step": 3392 }, { "epoch": 0.18911989298255394, "grad_norm": 0.5497481226921082, "learning_rate": 9.260488475953842e-05, "loss": 1.8704, "step": 3393 }, { "epoch": 0.18917563123571707, "grad_norm": 0.5084047317504883, "learning_rate": 9.26002237601628e-05, "loss": 1.515, "step": 3394 }, { "epoch": 0.18923136948888022, "grad_norm": 0.5252576470375061, "learning_rate": 9.259556140975644e-05, "loss": 1.448, "step": 3395 }, { "epoch": 0.18928710774204335, "grad_norm": 0.5760124325752258, "learning_rate": 9.259089770846723e-05, "loss": 1.7052, "step": 3396 }, { "epoch": 0.1893428459952065, "grad_norm": 0.5604876279830933, "learning_rate": 9.258623265644309e-05, "loss": 1.8782, "step": 3397 }, { "epoch": 0.18939858424836967, "grad_norm": 0.5331717133522034, "learning_rate": 9.258156625383192e-05, "loss": 1.6754, "step": 3398 }, { "epoch": 0.1894543225015328, "grad_norm": 0.5478466153144836, "learning_rate": 9.257689850078174e-05, "loss": 1.7709, "step": 3399 }, { "epoch": 0.18951006075469595, "grad_norm": 0.5751819014549255, "learning_rate": 9.257222939744059e-05, "loss": 1.6806, "step": 3400 }, { "epoch": 0.18956579900785908, "grad_norm": 0.557999849319458, "learning_rate": 9.256755894395652e-05, "loss": 1.6614, "step": 3401 }, { "epoch": 0.18962153726102224, "grad_norm": 0.6242285370826721, "learning_rate": 9.256288714047767e-05, "loss": 1.9115, "step": 3402 }, { "epoch": 0.1896772755141854, "grad_norm": 0.5403860807418823, "learning_rate": 9.255821398715221e-05, "loss": 1.6686, "step": 3403 }, { "epoch": 0.18973301376734852, "grad_norm": 0.5129532814025879, "learning_rate": 9.255353948412833e-05, "loss": 1.5406, "step": 3404 }, { "epoch": 0.18978875202051168, "grad_norm": 0.5771991014480591, "learning_rate": 9.254886363155429e-05, "loss": 1.8979, "step": 3405 }, { "epoch": 0.1898444902736748, "grad_norm": 0.5569978356361389, "learning_rate": 9.254418642957835e-05, "loss": 1.7284, "step": 3406 }, { "epoch": 0.18990022852683797, "grad_norm": 0.5016009211540222, "learning_rate": 9.253950787834889e-05, "loss": 1.7517, "step": 3407 }, { "epoch": 0.18995596678000112, "grad_norm": 0.47752997279167175, "learning_rate": 9.253482797801425e-05, "loss": 1.7069, "step": 3408 }, { "epoch": 0.19001170503316425, "grad_norm": 0.4722379446029663, "learning_rate": 9.253014672872285e-05, "loss": 1.4309, "step": 3409 }, { "epoch": 0.1900674432863274, "grad_norm": 0.516113817691803, "learning_rate": 9.252546413062319e-05, "loss": 1.6337, "step": 3410 }, { "epoch": 0.19012318153949057, "grad_norm": 0.4841940402984619, "learning_rate": 9.252078018386374e-05, "loss": 1.4486, "step": 3411 }, { "epoch": 0.1901789197926537, "grad_norm": 0.566828191280365, "learning_rate": 9.251609488859304e-05, "loss": 1.524, "step": 3412 }, { "epoch": 0.19023465804581685, "grad_norm": 0.5277671813964844, "learning_rate": 9.251140824495972e-05, "loss": 1.6331, "step": 3413 }, { "epoch": 0.19029039629897998, "grad_norm": 0.5212645530700684, "learning_rate": 9.250672025311237e-05, "loss": 1.6409, "step": 3414 }, { "epoch": 0.19034613455214314, "grad_norm": 0.5892760753631592, "learning_rate": 9.250203091319968e-05, "loss": 1.7712, "step": 3415 }, { "epoch": 0.1904018728053063, "grad_norm": 0.5454036593437195, "learning_rate": 9.24973402253704e-05, "loss": 1.888, "step": 3416 }, { "epoch": 0.19045761105846942, "grad_norm": 0.5001441836357117, "learning_rate": 9.249264818977324e-05, "loss": 1.6808, "step": 3417 }, { "epoch": 0.19051334931163258, "grad_norm": 0.5732707977294922, "learning_rate": 9.248795480655704e-05, "loss": 1.8398, "step": 3418 }, { "epoch": 0.1905690875647957, "grad_norm": 0.5356916785240173, "learning_rate": 9.248326007587063e-05, "loss": 1.7295, "step": 3419 }, { "epoch": 0.19062482581795887, "grad_norm": 0.5317162275314331, "learning_rate": 9.247856399786292e-05, "loss": 1.7717, "step": 3420 }, { "epoch": 0.19068056407112202, "grad_norm": 0.5117460489273071, "learning_rate": 9.247386657268283e-05, "loss": 1.5417, "step": 3421 }, { "epoch": 0.19073630232428515, "grad_norm": 0.5263468623161316, "learning_rate": 9.24691678004793e-05, "loss": 1.8882, "step": 3422 }, { "epoch": 0.1907920405774483, "grad_norm": 0.5721904635429382, "learning_rate": 9.24644676814014e-05, "loss": 1.8083, "step": 3423 }, { "epoch": 0.19084777883061144, "grad_norm": 0.5673632025718689, "learning_rate": 9.245976621559817e-05, "loss": 1.8532, "step": 3424 }, { "epoch": 0.1909035170837746, "grad_norm": 0.5096221566200256, "learning_rate": 9.24550634032187e-05, "loss": 1.5365, "step": 3425 }, { "epoch": 0.19095925533693775, "grad_norm": 0.545087456703186, "learning_rate": 9.245035924441217e-05, "loss": 1.854, "step": 3426 }, { "epoch": 0.19101499359010088, "grad_norm": 0.5424298644065857, "learning_rate": 9.244565373932774e-05, "loss": 1.7373, "step": 3427 }, { "epoch": 0.19107073184326404, "grad_norm": 0.5558550357818604, "learning_rate": 9.244094688811465e-05, "loss": 1.746, "step": 3428 }, { "epoch": 0.19112647009642716, "grad_norm": 0.49283209443092346, "learning_rate": 9.243623869092218e-05, "loss": 1.3836, "step": 3429 }, { "epoch": 0.19118220834959032, "grad_norm": 0.5955911874771118, "learning_rate": 9.24315291478996e-05, "loss": 1.8499, "step": 3430 }, { "epoch": 0.19123794660275348, "grad_norm": 0.5249252319335938, "learning_rate": 9.242681825919635e-05, "loss": 1.6767, "step": 3431 }, { "epoch": 0.1912936848559166, "grad_norm": 0.5496412515640259, "learning_rate": 9.242210602496178e-05, "loss": 1.7963, "step": 3432 }, { "epoch": 0.19134942310907976, "grad_norm": 0.5590277910232544, "learning_rate": 9.241739244534534e-05, "loss": 1.7885, "step": 3433 }, { "epoch": 0.19140516136224292, "grad_norm": 0.5826262831687927, "learning_rate": 9.241267752049653e-05, "loss": 1.7971, "step": 3434 }, { "epoch": 0.19146089961540605, "grad_norm": 0.5477822422981262, "learning_rate": 9.240796125056486e-05, "loss": 1.7376, "step": 3435 }, { "epoch": 0.1915166378685692, "grad_norm": 0.5088443756103516, "learning_rate": 9.240324363569992e-05, "loss": 1.6705, "step": 3436 }, { "epoch": 0.19157237612173234, "grad_norm": 0.5802351832389832, "learning_rate": 9.239852467605132e-05, "loss": 1.8505, "step": 3437 }, { "epoch": 0.1916281143748955, "grad_norm": 0.5589656829833984, "learning_rate": 9.239380437176872e-05, "loss": 1.7993, "step": 3438 }, { "epoch": 0.19168385262805865, "grad_norm": 0.5384811162948608, "learning_rate": 9.238908272300181e-05, "loss": 1.803, "step": 3439 }, { "epoch": 0.19173959088122178, "grad_norm": 0.5251903533935547, "learning_rate": 9.238435972990036e-05, "loss": 1.6364, "step": 3440 }, { "epoch": 0.19179532913438493, "grad_norm": 0.5536593794822693, "learning_rate": 9.237963539261412e-05, "loss": 1.8069, "step": 3441 }, { "epoch": 0.19185106738754806, "grad_norm": 0.49031203985214233, "learning_rate": 9.237490971129294e-05, "loss": 1.6969, "step": 3442 }, { "epoch": 0.19190680564071122, "grad_norm": 0.5111910700798035, "learning_rate": 9.23701826860867e-05, "loss": 1.7135, "step": 3443 }, { "epoch": 0.19196254389387438, "grad_norm": 0.5502627491950989, "learning_rate": 9.236545431714529e-05, "loss": 1.8724, "step": 3444 }, { "epoch": 0.1920182821470375, "grad_norm": 0.5772512555122375, "learning_rate": 9.236072460461867e-05, "loss": 1.7944, "step": 3445 }, { "epoch": 0.19207402040020066, "grad_norm": 0.6393208503723145, "learning_rate": 9.235599354865686e-05, "loss": 1.5557, "step": 3446 }, { "epoch": 0.1921297586533638, "grad_norm": 0.5822187066078186, "learning_rate": 9.235126114940989e-05, "loss": 1.8263, "step": 3447 }, { "epoch": 0.19218549690652695, "grad_norm": 0.5391358733177185, "learning_rate": 9.234652740702781e-05, "loss": 1.7186, "step": 3448 }, { "epoch": 0.1922412351596901, "grad_norm": 0.4919295907020569, "learning_rate": 9.23417923216608e-05, "loss": 1.5176, "step": 3449 }, { "epoch": 0.19229697341285323, "grad_norm": 0.547146737575531, "learning_rate": 9.233705589345902e-05, "loss": 1.8129, "step": 3450 }, { "epoch": 0.1923527116660164, "grad_norm": 0.4958893358707428, "learning_rate": 9.233231812257265e-05, "loss": 1.5314, "step": 3451 }, { "epoch": 0.19240844991917952, "grad_norm": 0.4873281419277191, "learning_rate": 9.232757900915197e-05, "loss": 1.6043, "step": 3452 }, { "epoch": 0.19246418817234268, "grad_norm": 0.5672634840011597, "learning_rate": 9.232283855334727e-05, "loss": 1.8168, "step": 3453 }, { "epoch": 0.19251992642550583, "grad_norm": 0.514673113822937, "learning_rate": 9.231809675530888e-05, "loss": 1.7076, "step": 3454 }, { "epoch": 0.19257566467866896, "grad_norm": 0.5566558241844177, "learning_rate": 9.23133536151872e-05, "loss": 1.8021, "step": 3455 }, { "epoch": 0.19263140293183212, "grad_norm": 0.5627939701080322, "learning_rate": 9.230860913313266e-05, "loss": 1.659, "step": 3456 }, { "epoch": 0.19268714118499528, "grad_norm": 0.5632688403129578, "learning_rate": 9.23038633092957e-05, "loss": 1.8172, "step": 3457 }, { "epoch": 0.1927428794381584, "grad_norm": 0.5149570107460022, "learning_rate": 9.229911614382685e-05, "loss": 1.6086, "step": 3458 }, { "epoch": 0.19279861769132156, "grad_norm": 0.5687461495399475, "learning_rate": 9.229436763687665e-05, "loss": 1.7102, "step": 3459 }, { "epoch": 0.1928543559444847, "grad_norm": 0.527733325958252, "learning_rate": 9.228961778859572e-05, "loss": 1.6291, "step": 3460 }, { "epoch": 0.19291009419764785, "grad_norm": 0.5713732242584229, "learning_rate": 9.228486659913467e-05, "loss": 1.7628, "step": 3461 }, { "epoch": 0.192965832450811, "grad_norm": 0.5368852019309998, "learning_rate": 9.228011406864417e-05, "loss": 1.6604, "step": 3462 }, { "epoch": 0.19302157070397413, "grad_norm": 0.5099670886993408, "learning_rate": 9.227536019727498e-05, "loss": 1.6571, "step": 3463 }, { "epoch": 0.1930773089571373, "grad_norm": 0.5792325735092163, "learning_rate": 9.227060498517785e-05, "loss": 1.6586, "step": 3464 }, { "epoch": 0.19313304721030042, "grad_norm": 0.5870433449745178, "learning_rate": 9.226584843250357e-05, "loss": 1.6693, "step": 3465 }, { "epoch": 0.19318878546346357, "grad_norm": 0.5723249316215515, "learning_rate": 9.226109053940302e-05, "loss": 1.8516, "step": 3466 }, { "epoch": 0.19324452371662673, "grad_norm": 0.5492411255836487, "learning_rate": 9.225633130602707e-05, "loss": 1.8369, "step": 3467 }, { "epoch": 0.19330026196978986, "grad_norm": 0.5040132403373718, "learning_rate": 9.225157073252666e-05, "loss": 1.7936, "step": 3468 }, { "epoch": 0.19335600022295302, "grad_norm": 0.5484923124313354, "learning_rate": 9.224680881905279e-05, "loss": 1.8398, "step": 3469 }, { "epoch": 0.19341173847611615, "grad_norm": 0.6042559146881104, "learning_rate": 9.224204556575644e-05, "loss": 1.8699, "step": 3470 }, { "epoch": 0.1934674767292793, "grad_norm": 0.5580307841300964, "learning_rate": 9.22372809727887e-05, "loss": 1.6961, "step": 3471 }, { "epoch": 0.19352321498244246, "grad_norm": 0.5399236679077148, "learning_rate": 9.223251504030066e-05, "loss": 1.6302, "step": 3472 }, { "epoch": 0.1935789532356056, "grad_norm": 0.5522669553756714, "learning_rate": 9.222774776844349e-05, "loss": 1.765, "step": 3473 }, { "epoch": 0.19363469148876875, "grad_norm": 0.5530064105987549, "learning_rate": 9.222297915736834e-05, "loss": 1.783, "step": 3474 }, { "epoch": 0.19369042974193187, "grad_norm": 0.5082196593284607, "learning_rate": 9.22182092072265e-05, "loss": 1.6188, "step": 3475 }, { "epoch": 0.19374616799509503, "grad_norm": 0.5311219692230225, "learning_rate": 9.221343791816918e-05, "loss": 1.7017, "step": 3476 }, { "epoch": 0.1938019062482582, "grad_norm": 0.542589545249939, "learning_rate": 9.220866529034776e-05, "loss": 1.7064, "step": 3477 }, { "epoch": 0.19385764450142132, "grad_norm": 0.5327942967414856, "learning_rate": 9.220389132391356e-05, "loss": 1.7807, "step": 3478 }, { "epoch": 0.19391338275458447, "grad_norm": 0.523639976978302, "learning_rate": 9.219911601901799e-05, "loss": 1.5785, "step": 3479 }, { "epoch": 0.19396912100774763, "grad_norm": 0.5756027102470398, "learning_rate": 9.21943393758125e-05, "loss": 2.0297, "step": 3480 }, { "epoch": 0.19402485926091076, "grad_norm": 0.5392191410064697, "learning_rate": 9.218956139444858e-05, "loss": 1.6824, "step": 3481 }, { "epoch": 0.19408059751407392, "grad_norm": 0.536055326461792, "learning_rate": 9.218478207507775e-05, "loss": 1.7264, "step": 3482 }, { "epoch": 0.19413633576723704, "grad_norm": 0.5701099634170532, "learning_rate": 9.218000141785158e-05, "loss": 1.7967, "step": 3483 }, { "epoch": 0.1941920740204002, "grad_norm": 0.586493194103241, "learning_rate": 9.21752194229217e-05, "loss": 2.0026, "step": 3484 }, { "epoch": 0.19424781227356336, "grad_norm": 0.5607553124427795, "learning_rate": 9.217043609043975e-05, "loss": 1.8374, "step": 3485 }, { "epoch": 0.1943035505267265, "grad_norm": 0.5268848538398743, "learning_rate": 9.216565142055745e-05, "loss": 1.6248, "step": 3486 }, { "epoch": 0.19435928877988964, "grad_norm": 0.563528299331665, "learning_rate": 9.216086541342652e-05, "loss": 1.8659, "step": 3487 }, { "epoch": 0.19441502703305277, "grad_norm": 0.5309708714485168, "learning_rate": 9.215607806919877e-05, "loss": 1.7026, "step": 3488 }, { "epoch": 0.19447076528621593, "grad_norm": 0.5582777857780457, "learning_rate": 9.2151289388026e-05, "loss": 1.8766, "step": 3489 }, { "epoch": 0.1945265035393791, "grad_norm": 0.5012943744659424, "learning_rate": 9.214649937006008e-05, "loss": 1.372, "step": 3490 }, { "epoch": 0.19458224179254222, "grad_norm": 0.5534226298332214, "learning_rate": 9.214170801545294e-05, "loss": 1.8491, "step": 3491 }, { "epoch": 0.19463798004570537, "grad_norm": 0.5312340259552002, "learning_rate": 9.213691532435654e-05, "loss": 1.4738, "step": 3492 }, { "epoch": 0.1946937182988685, "grad_norm": 0.5233004093170166, "learning_rate": 9.213212129692284e-05, "loss": 1.5871, "step": 3493 }, { "epoch": 0.19474945655203166, "grad_norm": 0.5227805972099304, "learning_rate": 9.212732593330389e-05, "loss": 1.6355, "step": 3494 }, { "epoch": 0.19480519480519481, "grad_norm": 0.5237340927124023, "learning_rate": 9.21225292336518e-05, "loss": 1.8903, "step": 3495 }, { "epoch": 0.19486093305835794, "grad_norm": 0.5420545935630798, "learning_rate": 9.211773119811867e-05, "loss": 1.9006, "step": 3496 }, { "epoch": 0.1949166713115211, "grad_norm": 0.534702718257904, "learning_rate": 9.211293182685667e-05, "loss": 1.5601, "step": 3497 }, { "epoch": 0.19497240956468423, "grad_norm": 0.5968030095100403, "learning_rate": 9.210813112001802e-05, "loss": 1.7871, "step": 3498 }, { "epoch": 0.19502814781784739, "grad_norm": 0.5270916223526001, "learning_rate": 9.210332907775494e-05, "loss": 1.69, "step": 3499 }, { "epoch": 0.19508388607101054, "grad_norm": 0.5496137142181396, "learning_rate": 9.209852570021975e-05, "loss": 1.916, "step": 3500 }, { "epoch": 0.19513962432417367, "grad_norm": 0.5198974013328552, "learning_rate": 9.209372098756476e-05, "loss": 1.6651, "step": 3501 }, { "epoch": 0.19519536257733683, "grad_norm": 0.5615696907043457, "learning_rate": 9.208891493994239e-05, "loss": 1.7589, "step": 3502 }, { "epoch": 0.19525110083049999, "grad_norm": 0.5367715954780579, "learning_rate": 9.208410755750501e-05, "loss": 1.5889, "step": 3503 }, { "epoch": 0.19530683908366311, "grad_norm": 0.6133012771606445, "learning_rate": 9.207929884040511e-05, "loss": 1.8472, "step": 3504 }, { "epoch": 0.19536257733682627, "grad_norm": 0.6582043170928955, "learning_rate": 9.20744887887952e-05, "loss": 1.6471, "step": 3505 }, { "epoch": 0.1954183155899894, "grad_norm": 0.5180196762084961, "learning_rate": 9.206967740282783e-05, "loss": 1.5727, "step": 3506 }, { "epoch": 0.19547405384315256, "grad_norm": 0.5526701807975769, "learning_rate": 9.206486468265555e-05, "loss": 1.635, "step": 3507 }, { "epoch": 0.1955297920963157, "grad_norm": 0.6198756694793701, "learning_rate": 9.206005062843102e-05, "loss": 1.7088, "step": 3508 }, { "epoch": 0.19558553034947884, "grad_norm": 0.5373274683952332, "learning_rate": 9.205523524030693e-05, "loss": 1.7032, "step": 3509 }, { "epoch": 0.195641268602642, "grad_norm": 0.5724993944168091, "learning_rate": 9.205041851843596e-05, "loss": 1.8822, "step": 3510 }, { "epoch": 0.19569700685580513, "grad_norm": 0.5542033314704895, "learning_rate": 9.20456004629709e-05, "loss": 1.333, "step": 3511 }, { "epoch": 0.19575274510896828, "grad_norm": 0.5784552693367004, "learning_rate": 9.204078107406454e-05, "loss": 1.8277, "step": 3512 }, { "epoch": 0.19580848336213144, "grad_norm": 0.5339728593826294, "learning_rate": 9.203596035186969e-05, "loss": 1.5545, "step": 3513 }, { "epoch": 0.19586422161529457, "grad_norm": 0.5574887990951538, "learning_rate": 9.203113829653927e-05, "loss": 1.6811, "step": 3514 }, { "epoch": 0.19591995986845773, "grad_norm": 0.48576298356056213, "learning_rate": 9.202631490822622e-05, "loss": 1.548, "step": 3515 }, { "epoch": 0.19597569812162086, "grad_norm": 0.516997218132019, "learning_rate": 9.202149018708347e-05, "loss": 1.6624, "step": 3516 }, { "epoch": 0.196031436374784, "grad_norm": 0.5537340641021729, "learning_rate": 9.201666413326408e-05, "loss": 1.8557, "step": 3517 }, { "epoch": 0.19608717462794717, "grad_norm": 0.5295738577842712, "learning_rate": 9.201183674692107e-05, "loss": 1.5435, "step": 3518 }, { "epoch": 0.1961429128811103, "grad_norm": 0.47536125779151917, "learning_rate": 9.200700802820754e-05, "loss": 1.4683, "step": 3519 }, { "epoch": 0.19619865113427346, "grad_norm": 0.546451985836029, "learning_rate": 9.200217797727662e-05, "loss": 1.8706, "step": 3520 }, { "epoch": 0.19625438938743658, "grad_norm": 0.5166674256324768, "learning_rate": 9.199734659428152e-05, "loss": 1.5608, "step": 3521 }, { "epoch": 0.19631012764059974, "grad_norm": 0.5700700879096985, "learning_rate": 9.199251387937545e-05, "loss": 1.7221, "step": 3522 }, { "epoch": 0.1963658658937629, "grad_norm": 0.6089435815811157, "learning_rate": 9.198767983271166e-05, "loss": 1.7989, "step": 3523 }, { "epoch": 0.19642160414692603, "grad_norm": 0.6160342693328857, "learning_rate": 9.198284445444348e-05, "loss": 2.0033, "step": 3524 }, { "epoch": 0.19647734240008918, "grad_norm": 0.6272563338279724, "learning_rate": 9.197800774472426e-05, "loss": 1.9705, "step": 3525 }, { "epoch": 0.19653308065325234, "grad_norm": 0.4671235680580139, "learning_rate": 9.197316970370737e-05, "loss": 1.0644, "step": 3526 }, { "epoch": 0.19658881890641547, "grad_norm": 0.5911363959312439, "learning_rate": 9.196833033154625e-05, "loss": 1.662, "step": 3527 }, { "epoch": 0.19664455715957863, "grad_norm": 0.552719235420227, "learning_rate": 9.19634896283944e-05, "loss": 1.7743, "step": 3528 }, { "epoch": 0.19670029541274175, "grad_norm": 0.5252164006233215, "learning_rate": 9.195864759440531e-05, "loss": 1.7682, "step": 3529 }, { "epoch": 0.1967560336659049, "grad_norm": 0.53560471534729, "learning_rate": 9.195380422973257e-05, "loss": 1.6731, "step": 3530 }, { "epoch": 0.19681177191906807, "grad_norm": 0.5091952085494995, "learning_rate": 9.194895953452976e-05, "loss": 1.4618, "step": 3531 }, { "epoch": 0.1968675101722312, "grad_norm": 0.5449403524398804, "learning_rate": 9.194411350895053e-05, "loss": 1.7007, "step": 3532 }, { "epoch": 0.19692324842539435, "grad_norm": 0.5258320569992065, "learning_rate": 9.193926615314857e-05, "loss": 1.8571, "step": 3533 }, { "epoch": 0.19697898667855748, "grad_norm": 0.5018019080162048, "learning_rate": 9.193441746727762e-05, "loss": 1.4968, "step": 3534 }, { "epoch": 0.19703472493172064, "grad_norm": 0.570955753326416, "learning_rate": 9.192956745149144e-05, "loss": 1.8938, "step": 3535 }, { "epoch": 0.1970904631848838, "grad_norm": 0.595371663570404, "learning_rate": 9.192471610594384e-05, "loss": 2.0865, "step": 3536 }, { "epoch": 0.19714620143804693, "grad_norm": 0.5452008247375488, "learning_rate": 9.191986343078868e-05, "loss": 1.7354, "step": 3537 }, { "epoch": 0.19720193969121008, "grad_norm": 0.5002971887588501, "learning_rate": 9.191500942617988e-05, "loss": 1.5218, "step": 3538 }, { "epoch": 0.1972576779443732, "grad_norm": 0.5388283133506775, "learning_rate": 9.191015409227134e-05, "loss": 1.6676, "step": 3539 }, { "epoch": 0.19731341619753637, "grad_norm": 0.5798291563987732, "learning_rate": 9.190529742921707e-05, "loss": 1.8602, "step": 3540 }, { "epoch": 0.19736915445069952, "grad_norm": 0.5622314214706421, "learning_rate": 9.190043943717111e-05, "loss": 1.7324, "step": 3541 }, { "epoch": 0.19742489270386265, "grad_norm": 0.5845619440078735, "learning_rate": 9.189558011628749e-05, "loss": 1.8098, "step": 3542 }, { "epoch": 0.1974806309570258, "grad_norm": 0.5707986354827881, "learning_rate": 9.189071946672033e-05, "loss": 1.9953, "step": 3543 }, { "epoch": 0.19753636921018894, "grad_norm": 0.5030776858329773, "learning_rate": 9.18858574886238e-05, "loss": 1.6697, "step": 3544 }, { "epoch": 0.1975921074633521, "grad_norm": 0.5452118515968323, "learning_rate": 9.188099418215208e-05, "loss": 1.4443, "step": 3545 }, { "epoch": 0.19764784571651525, "grad_norm": 0.5277875065803528, "learning_rate": 9.187612954745942e-05, "loss": 1.738, "step": 3546 }, { "epoch": 0.19770358396967838, "grad_norm": 0.563870906829834, "learning_rate": 9.187126358470006e-05, "loss": 1.7746, "step": 3547 }, { "epoch": 0.19775932222284154, "grad_norm": 0.5097183585166931, "learning_rate": 9.186639629402836e-05, "loss": 1.5869, "step": 3548 }, { "epoch": 0.1978150604760047, "grad_norm": 0.5304349660873413, "learning_rate": 9.186152767559866e-05, "loss": 1.4967, "step": 3549 }, { "epoch": 0.19787079872916782, "grad_norm": 0.5379878878593445, "learning_rate": 9.185665772956539e-05, "loss": 1.7457, "step": 3550 }, { "epoch": 0.19792653698233098, "grad_norm": 0.5299242734909058, "learning_rate": 9.185178645608297e-05, "loss": 1.6194, "step": 3551 }, { "epoch": 0.1979822752354941, "grad_norm": 0.5131285190582275, "learning_rate": 9.184691385530588e-05, "loss": 1.8616, "step": 3552 }, { "epoch": 0.19803801348865727, "grad_norm": 0.5294276475906372, "learning_rate": 9.184203992738869e-05, "loss": 1.5835, "step": 3553 }, { "epoch": 0.19809375174182042, "grad_norm": 0.544457197189331, "learning_rate": 9.183716467248593e-05, "loss": 1.6874, "step": 3554 }, { "epoch": 0.19814948999498355, "grad_norm": 0.5258937478065491, "learning_rate": 9.183228809075223e-05, "loss": 1.7946, "step": 3555 }, { "epoch": 0.1982052282481467, "grad_norm": 0.5388005971908569, "learning_rate": 9.182741018234228e-05, "loss": 1.6509, "step": 3556 }, { "epoch": 0.19826096650130984, "grad_norm": 0.5726017951965332, "learning_rate": 9.182253094741073e-05, "loss": 1.6885, "step": 3557 }, { "epoch": 0.198316704754473, "grad_norm": 0.5634879469871521, "learning_rate": 9.181765038611234e-05, "loss": 1.7431, "step": 3558 }, { "epoch": 0.19837244300763615, "grad_norm": 0.5139129161834717, "learning_rate": 9.18127684986019e-05, "loss": 1.763, "step": 3559 }, { "epoch": 0.19842818126079928, "grad_norm": 0.5589642524719238, "learning_rate": 9.180788528503423e-05, "loss": 1.9388, "step": 3560 }, { "epoch": 0.19848391951396244, "grad_norm": 0.538282036781311, "learning_rate": 9.18030007455642e-05, "loss": 1.8491, "step": 3561 }, { "epoch": 0.19853965776712557, "grad_norm": 0.5197616219520569, "learning_rate": 9.179811488034671e-05, "loss": 1.657, "step": 3562 }, { "epoch": 0.19859539602028872, "grad_norm": 0.569980800151825, "learning_rate": 9.17932276895367e-05, "loss": 1.8632, "step": 3563 }, { "epoch": 0.19865113427345188, "grad_norm": 0.6533870100975037, "learning_rate": 9.17883391732892e-05, "loss": 2.2768, "step": 3564 }, { "epoch": 0.198706872526615, "grad_norm": 0.5272773504257202, "learning_rate": 9.178344933175922e-05, "loss": 1.7145, "step": 3565 }, { "epoch": 0.19876261077977816, "grad_norm": 0.5350964069366455, "learning_rate": 9.177855816510184e-05, "loss": 1.6678, "step": 3566 }, { "epoch": 0.1988183490329413, "grad_norm": 0.5308762788772583, "learning_rate": 9.177366567347216e-05, "loss": 1.6745, "step": 3567 }, { "epoch": 0.19887408728610445, "grad_norm": 0.552905261516571, "learning_rate": 9.176877185702539e-05, "loss": 1.7337, "step": 3568 }, { "epoch": 0.1989298255392676, "grad_norm": 0.5350809693336487, "learning_rate": 9.17638767159167e-05, "loss": 1.754, "step": 3569 }, { "epoch": 0.19898556379243074, "grad_norm": 0.5393645167350769, "learning_rate": 9.175898025030134e-05, "loss": 1.6508, "step": 3570 }, { "epoch": 0.1990413020455939, "grad_norm": 0.5781660079956055, "learning_rate": 9.175408246033458e-05, "loss": 1.7258, "step": 3571 }, { "epoch": 0.19909704029875705, "grad_norm": 0.5230069160461426, "learning_rate": 9.17491833461718e-05, "loss": 1.5918, "step": 3572 }, { "epoch": 0.19915277855192018, "grad_norm": 0.54449063539505, "learning_rate": 9.174428290796833e-05, "loss": 1.4328, "step": 3573 }, { "epoch": 0.19920851680508334, "grad_norm": 0.5652233958244324, "learning_rate": 9.173938114587957e-05, "loss": 1.6627, "step": 3574 }, { "epoch": 0.19926425505824646, "grad_norm": 0.5487927198410034, "learning_rate": 9.173447806006102e-05, "loss": 1.6238, "step": 3575 }, { "epoch": 0.19931999331140962, "grad_norm": 0.5450085401535034, "learning_rate": 9.172957365066815e-05, "loss": 1.8033, "step": 3576 }, { "epoch": 0.19937573156457278, "grad_norm": 0.5951147079467773, "learning_rate": 9.17246679178565e-05, "loss": 2.0117, "step": 3577 }, { "epoch": 0.1994314698177359, "grad_norm": 0.5555893778800964, "learning_rate": 9.171976086178164e-05, "loss": 1.6994, "step": 3578 }, { "epoch": 0.19948720807089906, "grad_norm": 0.4888277053833008, "learning_rate": 9.171485248259924e-05, "loss": 1.555, "step": 3579 }, { "epoch": 0.1995429463240622, "grad_norm": 0.5293035507202148, "learning_rate": 9.170994278046492e-05, "loss": 1.7463, "step": 3580 }, { "epoch": 0.19959868457722535, "grad_norm": 0.544032096862793, "learning_rate": 9.17050317555344e-05, "loss": 1.8112, "step": 3581 }, { "epoch": 0.1996544228303885, "grad_norm": 0.5483592748641968, "learning_rate": 9.170011940796341e-05, "loss": 1.7906, "step": 3582 }, { "epoch": 0.19971016108355163, "grad_norm": 0.6069881319999695, "learning_rate": 9.16952057379078e-05, "loss": 2.0624, "step": 3583 }, { "epoch": 0.1997658993367148, "grad_norm": 0.5667694211006165, "learning_rate": 9.169029074552333e-05, "loss": 1.8233, "step": 3584 }, { "epoch": 0.19982163758987792, "grad_norm": 0.5053529739379883, "learning_rate": 9.168537443096592e-05, "loss": 1.6512, "step": 3585 }, { "epoch": 0.19987737584304108, "grad_norm": 0.5334288477897644, "learning_rate": 9.168045679439149e-05, "loss": 1.5675, "step": 3586 }, { "epoch": 0.19993311409620423, "grad_norm": 0.61188805103302, "learning_rate": 9.167553783595597e-05, "loss": 1.8834, "step": 3587 }, { "epoch": 0.19998885234936736, "grad_norm": 0.5691487193107605, "learning_rate": 9.167061755581539e-05, "loss": 1.7663, "step": 3588 }, { "epoch": 0.20004459060253052, "grad_norm": 0.5586220026016235, "learning_rate": 9.166569595412575e-05, "loss": 1.8832, "step": 3589 }, { "epoch": 0.20010032885569365, "grad_norm": 0.4987550675868988, "learning_rate": 9.166077303104319e-05, "loss": 1.661, "step": 3590 }, { "epoch": 0.2001560671088568, "grad_norm": 0.5463746190071106, "learning_rate": 9.165584878672378e-05, "loss": 1.6764, "step": 3591 }, { "epoch": 0.20021180536201996, "grad_norm": 0.5752919316291809, "learning_rate": 9.165092322132374e-05, "loss": 1.6847, "step": 3592 }, { "epoch": 0.2002675436151831, "grad_norm": 0.5271925330162048, "learning_rate": 9.164599633499925e-05, "loss": 1.7428, "step": 3593 }, { "epoch": 0.20032328186834625, "grad_norm": 0.4875536262989044, "learning_rate": 9.164106812790657e-05, "loss": 1.3011, "step": 3594 }, { "epoch": 0.2003790201215094, "grad_norm": 0.5993346571922302, "learning_rate": 9.1636138600202e-05, "loss": 1.8065, "step": 3595 }, { "epoch": 0.20043475837467253, "grad_norm": 0.5418604612350464, "learning_rate": 9.163120775204187e-05, "loss": 1.6812, "step": 3596 }, { "epoch": 0.2004904966278357, "grad_norm": 0.5411487817764282, "learning_rate": 9.162627558358255e-05, "loss": 1.8109, "step": 3597 }, { "epoch": 0.20054623488099882, "grad_norm": 0.5583702325820923, "learning_rate": 9.162134209498046e-05, "loss": 1.8183, "step": 3598 }, { "epoch": 0.20060197313416198, "grad_norm": 0.6028481721878052, "learning_rate": 9.161640728639207e-05, "loss": 1.8642, "step": 3599 }, { "epoch": 0.20065771138732513, "grad_norm": 0.5424187183380127, "learning_rate": 9.161147115797388e-05, "loss": 1.8178, "step": 3600 }, { "epoch": 0.20071344964048826, "grad_norm": 0.6147588491439819, "learning_rate": 9.160653370988243e-05, "loss": 1.7343, "step": 3601 }, { "epoch": 0.20076918789365142, "grad_norm": 0.5581020712852478, "learning_rate": 9.160159494227434e-05, "loss": 1.713, "step": 3602 }, { "epoch": 0.20082492614681455, "grad_norm": 0.5363709330558777, "learning_rate": 9.15966548553062e-05, "loss": 1.8839, "step": 3603 }, { "epoch": 0.2008806643999777, "grad_norm": 0.5731095671653748, "learning_rate": 9.159171344913469e-05, "loss": 1.8919, "step": 3604 }, { "epoch": 0.20093640265314086, "grad_norm": 0.5256056785583496, "learning_rate": 9.158677072391653e-05, "loss": 1.7236, "step": 3605 }, { "epoch": 0.200992140906304, "grad_norm": 0.5467107892036438, "learning_rate": 9.158182667980846e-05, "loss": 1.6551, "step": 3606 }, { "epoch": 0.20104787915946715, "grad_norm": 0.5082773566246033, "learning_rate": 9.157688131696729e-05, "loss": 1.6537, "step": 3607 }, { "epoch": 0.20110361741263028, "grad_norm": 0.5320789813995361, "learning_rate": 9.157193463554986e-05, "loss": 1.6112, "step": 3608 }, { "epoch": 0.20115935566579343, "grad_norm": 0.5658825635910034, "learning_rate": 9.156698663571305e-05, "loss": 1.6377, "step": 3609 }, { "epoch": 0.2012150939189566, "grad_norm": 0.593096137046814, "learning_rate": 9.156203731761376e-05, "loss": 1.7296, "step": 3610 }, { "epoch": 0.20127083217211972, "grad_norm": 0.5300352573394775, "learning_rate": 9.155708668140899e-05, "loss": 1.4073, "step": 3611 }, { "epoch": 0.20132657042528287, "grad_norm": 0.5179193019866943, "learning_rate": 9.155213472725571e-05, "loss": 1.5432, "step": 3612 }, { "epoch": 0.201382308678446, "grad_norm": 0.5618082880973816, "learning_rate": 9.154718145531098e-05, "loss": 1.79, "step": 3613 }, { "epoch": 0.20143804693160916, "grad_norm": 0.47643256187438965, "learning_rate": 9.15422268657319e-05, "loss": 1.4084, "step": 3614 }, { "epoch": 0.20149378518477232, "grad_norm": 0.6415194272994995, "learning_rate": 9.15372709586756e-05, "loss": 1.9196, "step": 3615 }, { "epoch": 0.20154952343793545, "grad_norm": 0.5599740147590637, "learning_rate": 9.153231373429922e-05, "loss": 1.8508, "step": 3616 }, { "epoch": 0.2016052616910986, "grad_norm": 0.5777899622917175, "learning_rate": 9.152735519276002e-05, "loss": 1.8367, "step": 3617 }, { "epoch": 0.20166099994426176, "grad_norm": 0.5653935670852661, "learning_rate": 9.152239533421523e-05, "loss": 1.5819, "step": 3618 }, { "epoch": 0.2017167381974249, "grad_norm": 0.5558584928512573, "learning_rate": 9.151743415882215e-05, "loss": 1.8245, "step": 3619 }, { "epoch": 0.20177247645058805, "grad_norm": 0.5481976866722107, "learning_rate": 9.151247166673811e-05, "loss": 1.6422, "step": 3620 }, { "epoch": 0.20182821470375117, "grad_norm": 0.49504461884498596, "learning_rate": 9.150750785812052e-05, "loss": 1.5992, "step": 3621 }, { "epoch": 0.20188395295691433, "grad_norm": 0.6056009531021118, "learning_rate": 9.150254273312677e-05, "loss": 1.7729, "step": 3622 }, { "epoch": 0.2019396912100775, "grad_norm": 0.5418253540992737, "learning_rate": 9.149757629191436e-05, "loss": 1.8279, "step": 3623 }, { "epoch": 0.20199542946324062, "grad_norm": 0.5427140593528748, "learning_rate": 9.149260853464077e-05, "loss": 1.6135, "step": 3624 }, { "epoch": 0.20205116771640377, "grad_norm": 0.5552391409873962, "learning_rate": 9.148763946146354e-05, "loss": 1.6617, "step": 3625 }, { "epoch": 0.2021069059695669, "grad_norm": 0.5886726975440979, "learning_rate": 9.148266907254031e-05, "loss": 1.9072, "step": 3626 }, { "epoch": 0.20216264422273006, "grad_norm": 0.587967038154602, "learning_rate": 9.147769736802864e-05, "loss": 1.7807, "step": 3627 }, { "epoch": 0.20221838247589322, "grad_norm": 0.5265384912490845, "learning_rate": 9.147272434808627e-05, "loss": 1.5633, "step": 3628 }, { "epoch": 0.20227412072905634, "grad_norm": 0.5282620191574097, "learning_rate": 9.146775001287088e-05, "loss": 1.579, "step": 3629 }, { "epoch": 0.2023298589822195, "grad_norm": 0.5758345723152161, "learning_rate": 9.146277436254022e-05, "loss": 1.8881, "step": 3630 }, { "epoch": 0.20238559723538263, "grad_norm": 0.5375788807868958, "learning_rate": 9.145779739725213e-05, "loss": 1.7915, "step": 3631 }, { "epoch": 0.2024413354885458, "grad_norm": 0.5047256350517273, "learning_rate": 9.14528191171644e-05, "loss": 1.8006, "step": 3632 }, { "epoch": 0.20249707374170894, "grad_norm": 0.5424186587333679, "learning_rate": 9.144783952243493e-05, "loss": 1.5753, "step": 3633 }, { "epoch": 0.20255281199487207, "grad_norm": 0.5288758277893066, "learning_rate": 9.144285861322166e-05, "loss": 1.7276, "step": 3634 }, { "epoch": 0.20260855024803523, "grad_norm": 0.638491690158844, "learning_rate": 9.143787638968254e-05, "loss": 1.8898, "step": 3635 }, { "epoch": 0.20266428850119836, "grad_norm": 0.5804757475852966, "learning_rate": 9.143289285197558e-05, "loss": 1.9973, "step": 3636 }, { "epoch": 0.20272002675436152, "grad_norm": 0.6197081804275513, "learning_rate": 9.142790800025883e-05, "loss": 1.7459, "step": 3637 }, { "epoch": 0.20277576500752467, "grad_norm": 0.6034955382347107, "learning_rate": 9.142292183469039e-05, "loss": 1.9412, "step": 3638 }, { "epoch": 0.2028315032606878, "grad_norm": 0.5404736995697021, "learning_rate": 9.141793435542836e-05, "loss": 1.6073, "step": 3639 }, { "epoch": 0.20288724151385096, "grad_norm": 0.48670318722724915, "learning_rate": 9.141294556263096e-05, "loss": 1.5109, "step": 3640 }, { "epoch": 0.20294297976701411, "grad_norm": 0.5840024948120117, "learning_rate": 9.140795545645636e-05, "loss": 1.7593, "step": 3641 }, { "epoch": 0.20299871802017724, "grad_norm": 0.5371603965759277, "learning_rate": 9.140296403706284e-05, "loss": 1.6055, "step": 3642 }, { "epoch": 0.2030544562733404, "grad_norm": 1.0509992837905884, "learning_rate": 9.13979713046087e-05, "loss": 2.0113, "step": 3643 }, { "epoch": 0.20311019452650353, "grad_norm": 0.49479854106903076, "learning_rate": 9.139297725925229e-05, "loss": 1.516, "step": 3644 }, { "epoch": 0.20316593277966669, "grad_norm": 0.5389636754989624, "learning_rate": 9.138798190115196e-05, "loss": 1.9002, "step": 3645 }, { "epoch": 0.20322167103282984, "grad_norm": 0.5524114370346069, "learning_rate": 9.138298523046617e-05, "loss": 1.6288, "step": 3646 }, { "epoch": 0.20327740928599297, "grad_norm": 0.49681249260902405, "learning_rate": 9.137798724735336e-05, "loss": 1.4397, "step": 3647 }, { "epoch": 0.20333314753915613, "grad_norm": 0.6418421268463135, "learning_rate": 9.137298795197204e-05, "loss": 2.1691, "step": 3648 }, { "epoch": 0.20338888579231926, "grad_norm": 0.5589434504508972, "learning_rate": 9.136798734448077e-05, "loss": 1.781, "step": 3649 }, { "epoch": 0.20344462404548241, "grad_norm": 0.5447176694869995, "learning_rate": 9.136298542503814e-05, "loss": 1.6205, "step": 3650 }, { "epoch": 0.20350036229864557, "grad_norm": 0.5343891978263855, "learning_rate": 9.135798219380276e-05, "loss": 1.7727, "step": 3651 }, { "epoch": 0.2035561005518087, "grad_norm": 0.5254631638526917, "learning_rate": 9.135297765093333e-05, "loss": 1.7057, "step": 3652 }, { "epoch": 0.20361183880497186, "grad_norm": 0.5393111109733582, "learning_rate": 9.134797179658854e-05, "loss": 1.7132, "step": 3653 }, { "epoch": 0.20366757705813499, "grad_norm": 0.5616254806518555, "learning_rate": 9.134296463092717e-05, "loss": 1.9128, "step": 3654 }, { "epoch": 0.20372331531129814, "grad_norm": 0.5558052659034729, "learning_rate": 9.133795615410801e-05, "loss": 1.6986, "step": 3655 }, { "epoch": 0.2037790535644613, "grad_norm": 0.5616979002952576, "learning_rate": 9.13329463662899e-05, "loss": 1.9381, "step": 3656 }, { "epoch": 0.20383479181762443, "grad_norm": 0.5200750827789307, "learning_rate": 9.132793526763171e-05, "loss": 1.6176, "step": 3657 }, { "epoch": 0.20389053007078758, "grad_norm": 0.5086760520935059, "learning_rate": 9.132292285829237e-05, "loss": 1.5035, "step": 3658 }, { "epoch": 0.20394626832395074, "grad_norm": 0.5122929215431213, "learning_rate": 9.131790913843086e-05, "loss": 1.6288, "step": 3659 }, { "epoch": 0.20400200657711387, "grad_norm": 0.5770255923271179, "learning_rate": 9.131289410820616e-05, "loss": 1.71, "step": 3660 }, { "epoch": 0.20405774483027703, "grad_norm": 0.5811052322387695, "learning_rate": 9.130787776777734e-05, "loss": 1.9395, "step": 3661 }, { "epoch": 0.20411348308344016, "grad_norm": 0.5475841164588928, "learning_rate": 9.130286011730347e-05, "loss": 1.8358, "step": 3662 }, { "epoch": 0.2041692213366033, "grad_norm": 0.5167744755744934, "learning_rate": 9.129784115694369e-05, "loss": 1.602, "step": 3663 }, { "epoch": 0.20422495958976647, "grad_norm": 0.5313771963119507, "learning_rate": 9.129282088685718e-05, "loss": 1.7868, "step": 3664 }, { "epoch": 0.2042806978429296, "grad_norm": 0.5149242877960205, "learning_rate": 9.128779930720313e-05, "loss": 1.6943, "step": 3665 }, { "epoch": 0.20433643609609276, "grad_norm": 0.5548785924911499, "learning_rate": 9.128277641814082e-05, "loss": 1.9083, "step": 3666 }, { "epoch": 0.20439217434925588, "grad_norm": 0.5865716338157654, "learning_rate": 9.127775221982954e-05, "loss": 1.9183, "step": 3667 }, { "epoch": 0.20444791260241904, "grad_norm": 0.5036227703094482, "learning_rate": 9.127272671242861e-05, "loss": 1.6097, "step": 3668 }, { "epoch": 0.2045036508555822, "grad_norm": 0.5178596377372742, "learning_rate": 9.126769989609745e-05, "loss": 1.7121, "step": 3669 }, { "epoch": 0.20455938910874533, "grad_norm": 0.585189938545227, "learning_rate": 9.126267177099543e-05, "loss": 1.8327, "step": 3670 }, { "epoch": 0.20461512736190848, "grad_norm": 0.5853554606437683, "learning_rate": 9.125764233728206e-05, "loss": 1.9047, "step": 3671 }, { "epoch": 0.2046708656150716, "grad_norm": 0.5730652213096619, "learning_rate": 9.125261159511682e-05, "loss": 1.8311, "step": 3672 }, { "epoch": 0.20472660386823477, "grad_norm": 0.5045105814933777, "learning_rate": 9.124757954465925e-05, "loss": 1.5241, "step": 3673 }, { "epoch": 0.20478234212139793, "grad_norm": 0.5725773572921753, "learning_rate": 9.124254618606897e-05, "loss": 1.6949, "step": 3674 }, { "epoch": 0.20483808037456105, "grad_norm": 0.5756915211677551, "learning_rate": 9.123751151950557e-05, "loss": 1.8553, "step": 3675 }, { "epoch": 0.2048938186277242, "grad_norm": 0.5354653000831604, "learning_rate": 9.123247554512873e-05, "loss": 1.7906, "step": 3676 }, { "epoch": 0.20494955688088734, "grad_norm": 0.5941489934921265, "learning_rate": 9.122743826309819e-05, "loss": 1.7721, "step": 3677 }, { "epoch": 0.2050052951340505, "grad_norm": 0.5832119584083557, "learning_rate": 9.122239967357366e-05, "loss": 1.9673, "step": 3678 }, { "epoch": 0.20506103338721365, "grad_norm": 0.6178232431411743, "learning_rate": 9.121735977671495e-05, "loss": 2.0516, "step": 3679 }, { "epoch": 0.20511677164037678, "grad_norm": 0.5315244197845459, "learning_rate": 9.121231857268191e-05, "loss": 1.5958, "step": 3680 }, { "epoch": 0.20517250989353994, "grad_norm": 0.5662999153137207, "learning_rate": 9.120727606163442e-05, "loss": 1.6989, "step": 3681 }, { "epoch": 0.2052282481467031, "grad_norm": 0.49450522661209106, "learning_rate": 9.120223224373238e-05, "loss": 1.4492, "step": 3682 }, { "epoch": 0.20528398639986623, "grad_norm": 0.572935163974762, "learning_rate": 9.119718711913575e-05, "loss": 1.6674, "step": 3683 }, { "epoch": 0.20533972465302938, "grad_norm": 0.5418963432312012, "learning_rate": 9.119214068800456e-05, "loss": 1.6326, "step": 3684 }, { "epoch": 0.2053954629061925, "grad_norm": 0.5970882773399353, "learning_rate": 9.118709295049883e-05, "loss": 1.9069, "step": 3685 }, { "epoch": 0.20545120115935567, "grad_norm": 0.5530537962913513, "learning_rate": 9.118204390677863e-05, "loss": 1.6096, "step": 3686 }, { "epoch": 0.20550693941251882, "grad_norm": 0.5641506314277649, "learning_rate": 9.117699355700412e-05, "loss": 1.7118, "step": 3687 }, { "epoch": 0.20556267766568195, "grad_norm": 0.6086058616638184, "learning_rate": 9.117194190133545e-05, "loss": 1.713, "step": 3688 }, { "epoch": 0.2056184159188451, "grad_norm": 0.577290952205658, "learning_rate": 9.116688893993284e-05, "loss": 1.8858, "step": 3689 }, { "epoch": 0.20567415417200824, "grad_norm": 0.5066075325012207, "learning_rate": 9.116183467295651e-05, "loss": 1.5922, "step": 3690 }, { "epoch": 0.2057298924251714, "grad_norm": 0.5287824273109436, "learning_rate": 9.115677910056681e-05, "loss": 1.4441, "step": 3691 }, { "epoch": 0.20578563067833455, "grad_norm": 0.62456214427948, "learning_rate": 9.115172222292401e-05, "loss": 1.9545, "step": 3692 }, { "epoch": 0.20584136893149768, "grad_norm": 0.5801160335540771, "learning_rate": 9.114666404018853e-05, "loss": 2.0095, "step": 3693 }, { "epoch": 0.20589710718466084, "grad_norm": 0.5162177085876465, "learning_rate": 9.114160455252074e-05, "loss": 1.7295, "step": 3694 }, { "epoch": 0.20595284543782397, "grad_norm": 0.5912075042724609, "learning_rate": 9.113654376008115e-05, "loss": 1.787, "step": 3695 }, { "epoch": 0.20600858369098712, "grad_norm": 0.5578693747520447, "learning_rate": 9.113148166303023e-05, "loss": 1.6167, "step": 3696 }, { "epoch": 0.20606432194415028, "grad_norm": 0.5576518177986145, "learning_rate": 9.112641826152853e-05, "loss": 1.7855, "step": 3697 }, { "epoch": 0.2061200601973134, "grad_norm": 0.5475178360939026, "learning_rate": 9.11213535557366e-05, "loss": 1.7013, "step": 3698 }, { "epoch": 0.20617579845047657, "grad_norm": 0.5434138178825378, "learning_rate": 9.111628754581512e-05, "loss": 1.7804, "step": 3699 }, { "epoch": 0.2062315367036397, "grad_norm": 0.5596892237663269, "learning_rate": 9.111122023192471e-05, "loss": 1.8347, "step": 3700 }, { "epoch": 0.20628727495680285, "grad_norm": 0.5505380034446716, "learning_rate": 9.110615161422609e-05, "loss": 1.878, "step": 3701 }, { "epoch": 0.206343013209966, "grad_norm": 0.6178278923034668, "learning_rate": 9.110108169288001e-05, "loss": 1.7626, "step": 3702 }, { "epoch": 0.20639875146312914, "grad_norm": 0.5460211038589478, "learning_rate": 9.109601046804726e-05, "loss": 1.8064, "step": 3703 }, { "epoch": 0.2064544897162923, "grad_norm": 0.5765804052352905, "learning_rate": 9.109093793988865e-05, "loss": 1.8228, "step": 3704 }, { "epoch": 0.20651022796945545, "grad_norm": 0.5335574746131897, "learning_rate": 9.108586410856508e-05, "loss": 1.8011, "step": 3705 }, { "epoch": 0.20656596622261858, "grad_norm": 0.5536273717880249, "learning_rate": 9.108078897423743e-05, "loss": 1.8751, "step": 3706 }, { "epoch": 0.20662170447578174, "grad_norm": 0.5405413508415222, "learning_rate": 9.107571253706668e-05, "loss": 1.8607, "step": 3707 }, { "epoch": 0.20667744272894487, "grad_norm": 0.5240110158920288, "learning_rate": 9.107063479721383e-05, "loss": 1.4375, "step": 3708 }, { "epoch": 0.20673318098210802, "grad_norm": 0.4756803512573242, "learning_rate": 9.106555575483988e-05, "loss": 1.3254, "step": 3709 }, { "epoch": 0.20678891923527118, "grad_norm": 0.5738046765327454, "learning_rate": 9.106047541010593e-05, "loss": 1.776, "step": 3710 }, { "epoch": 0.2068446574884343, "grad_norm": 0.5442799925804138, "learning_rate": 9.105539376317312e-05, "loss": 1.7099, "step": 3711 }, { "epoch": 0.20690039574159746, "grad_norm": 0.5695345401763916, "learning_rate": 9.105031081420259e-05, "loss": 1.6337, "step": 3712 }, { "epoch": 0.2069561339947606, "grad_norm": 0.4725694954395294, "learning_rate": 9.104522656335553e-05, "loss": 1.4172, "step": 3713 }, { "epoch": 0.20701187224792375, "grad_norm": 0.5137088894844055, "learning_rate": 9.10401410107932e-05, "loss": 1.6826, "step": 3714 }, { "epoch": 0.2070676105010869, "grad_norm": 0.5813738703727722, "learning_rate": 9.103505415667686e-05, "loss": 1.9881, "step": 3715 }, { "epoch": 0.20712334875425004, "grad_norm": 0.5776458382606506, "learning_rate": 9.102996600116786e-05, "loss": 1.8194, "step": 3716 }, { "epoch": 0.2071790870074132, "grad_norm": 0.6059629917144775, "learning_rate": 9.102487654442758e-05, "loss": 1.9822, "step": 3717 }, { "epoch": 0.20723482526057632, "grad_norm": 0.5408186912536621, "learning_rate": 9.101978578661738e-05, "loss": 1.8422, "step": 3718 }, { "epoch": 0.20729056351373948, "grad_norm": 0.5199152231216431, "learning_rate": 9.101469372789874e-05, "loss": 1.6269, "step": 3719 }, { "epoch": 0.20734630176690264, "grad_norm": 0.4990878105163574, "learning_rate": 9.100960036843317e-05, "loss": 1.6431, "step": 3720 }, { "epoch": 0.20740204002006576, "grad_norm": 0.539283812046051, "learning_rate": 9.100450570838216e-05, "loss": 1.6332, "step": 3721 }, { "epoch": 0.20745777827322892, "grad_norm": 0.4963357150554657, "learning_rate": 9.09994097479073e-05, "loss": 1.4083, "step": 3722 }, { "epoch": 0.20751351652639205, "grad_norm": 0.5257975459098816, "learning_rate": 9.099431248717022e-05, "loss": 1.673, "step": 3723 }, { "epoch": 0.2075692547795552, "grad_norm": 0.5869825482368469, "learning_rate": 9.098921392633255e-05, "loss": 1.8618, "step": 3724 }, { "epoch": 0.20762499303271836, "grad_norm": 0.5818216800689697, "learning_rate": 9.0984114065556e-05, "loss": 1.761, "step": 3725 }, { "epoch": 0.2076807312858815, "grad_norm": 0.5281986594200134, "learning_rate": 9.097901290500231e-05, "loss": 1.5652, "step": 3726 }, { "epoch": 0.20773646953904465, "grad_norm": 0.5425719618797302, "learning_rate": 9.097391044483325e-05, "loss": 1.6899, "step": 3727 }, { "epoch": 0.2077922077922078, "grad_norm": 0.5924318432807922, "learning_rate": 9.096880668521066e-05, "loss": 2.0674, "step": 3728 }, { "epoch": 0.20784794604537093, "grad_norm": 0.5444379448890686, "learning_rate": 9.096370162629637e-05, "loss": 1.8427, "step": 3729 }, { "epoch": 0.2079036842985341, "grad_norm": 0.5292965769767761, "learning_rate": 9.09585952682523e-05, "loss": 1.6487, "step": 3730 }, { "epoch": 0.20795942255169722, "grad_norm": 0.5337923765182495, "learning_rate": 9.09534876112404e-05, "loss": 1.7153, "step": 3731 }, { "epoch": 0.20801516080486038, "grad_norm": 0.5366414785385132, "learning_rate": 9.094837865542265e-05, "loss": 1.7336, "step": 3732 }, { "epoch": 0.20807089905802353, "grad_norm": 0.5158184766769409, "learning_rate": 9.094326840096106e-05, "loss": 1.4747, "step": 3733 }, { "epoch": 0.20812663731118666, "grad_norm": 0.5793300867080688, "learning_rate": 9.093815684801772e-05, "loss": 1.67, "step": 3734 }, { "epoch": 0.20818237556434982, "grad_norm": 0.57293701171875, "learning_rate": 9.093304399675474e-05, "loss": 1.8801, "step": 3735 }, { "epoch": 0.20823811381751295, "grad_norm": 0.514213502407074, "learning_rate": 9.092792984733425e-05, "loss": 1.5878, "step": 3736 }, { "epoch": 0.2082938520706761, "grad_norm": 0.5890586376190186, "learning_rate": 9.092281439991846e-05, "loss": 1.9247, "step": 3737 }, { "epoch": 0.20834959032383926, "grad_norm": 0.5602766275405884, "learning_rate": 9.091769765466959e-05, "loss": 1.7421, "step": 3738 }, { "epoch": 0.2084053285770024, "grad_norm": 0.586161196231842, "learning_rate": 9.091257961174991e-05, "loss": 2.0567, "step": 3739 }, { "epoch": 0.20846106683016555, "grad_norm": 0.5134695768356323, "learning_rate": 9.090746027132175e-05, "loss": 1.6464, "step": 3740 }, { "epoch": 0.20851680508332868, "grad_norm": 0.5447134375572205, "learning_rate": 9.090233963354746e-05, "loss": 1.8313, "step": 3741 }, { "epoch": 0.20857254333649183, "grad_norm": 0.5118534564971924, "learning_rate": 9.089721769858943e-05, "loss": 1.629, "step": 3742 }, { "epoch": 0.208628281589655, "grad_norm": 0.5482544898986816, "learning_rate": 9.08920944666101e-05, "loss": 1.6353, "step": 3743 }, { "epoch": 0.20868401984281812, "grad_norm": 0.542334258556366, "learning_rate": 9.088696993777194e-05, "loss": 1.6882, "step": 3744 }, { "epoch": 0.20873975809598128, "grad_norm": 0.527746319770813, "learning_rate": 9.08818441122375e-05, "loss": 1.5986, "step": 3745 }, { "epoch": 0.2087954963491444, "grad_norm": 0.5480045080184937, "learning_rate": 9.08767169901693e-05, "loss": 1.6445, "step": 3746 }, { "epoch": 0.20885123460230756, "grad_norm": 0.5573908686637878, "learning_rate": 9.087158857172999e-05, "loss": 1.851, "step": 3747 }, { "epoch": 0.20890697285547072, "grad_norm": 0.5698862671852112, "learning_rate": 9.086645885708218e-05, "loss": 1.6359, "step": 3748 }, { "epoch": 0.20896271110863385, "grad_norm": 0.557510256767273, "learning_rate": 9.086132784638857e-05, "loss": 1.7563, "step": 3749 }, { "epoch": 0.209018449361797, "grad_norm": 0.5576832890510559, "learning_rate": 9.085619553981187e-05, "loss": 1.8104, "step": 3750 }, { "epoch": 0.20907418761496016, "grad_norm": 0.5342584848403931, "learning_rate": 9.085106193751485e-05, "loss": 1.4561, "step": 3751 }, { "epoch": 0.2091299258681233, "grad_norm": 0.5547382235527039, "learning_rate": 9.084592703966033e-05, "loss": 1.6986, "step": 3752 }, { "epoch": 0.20918566412128645, "grad_norm": 0.5614180564880371, "learning_rate": 9.084079084641115e-05, "loss": 1.7837, "step": 3753 }, { "epoch": 0.20924140237444958, "grad_norm": 0.5065221786499023, "learning_rate": 9.083565335793021e-05, "loss": 1.7262, "step": 3754 }, { "epoch": 0.20929714062761273, "grad_norm": 0.5504621863365173, "learning_rate": 9.083051457438043e-05, "loss": 1.7848, "step": 3755 }, { "epoch": 0.2093528788807759, "grad_norm": 0.5882393717765808, "learning_rate": 9.082537449592479e-05, "loss": 2.0356, "step": 3756 }, { "epoch": 0.20940861713393902, "grad_norm": 0.6157543063163757, "learning_rate": 9.08202331227263e-05, "loss": 1.9959, "step": 3757 }, { "epoch": 0.20946435538710217, "grad_norm": 0.5493510961532593, "learning_rate": 9.0815090454948e-05, "loss": 1.7899, "step": 3758 }, { "epoch": 0.2095200936402653, "grad_norm": 0.5107924938201904, "learning_rate": 9.0809946492753e-05, "loss": 1.4062, "step": 3759 }, { "epoch": 0.20957583189342846, "grad_norm": 0.5571010112762451, "learning_rate": 9.080480123630444e-05, "loss": 1.6807, "step": 3760 }, { "epoch": 0.20963157014659162, "grad_norm": 0.5510861277580261, "learning_rate": 9.07996546857655e-05, "loss": 1.9714, "step": 3761 }, { "epoch": 0.20968730839975475, "grad_norm": 0.531609296798706, "learning_rate": 9.07945068412994e-05, "loss": 1.7811, "step": 3762 }, { "epoch": 0.2097430466529179, "grad_norm": 0.5203907489776611, "learning_rate": 9.078935770306938e-05, "loss": 1.7003, "step": 3763 }, { "epoch": 0.20979878490608103, "grad_norm": 0.5677714347839355, "learning_rate": 9.078420727123874e-05, "loss": 2.0188, "step": 3764 }, { "epoch": 0.2098545231592442, "grad_norm": 0.5568066239356995, "learning_rate": 9.077905554597086e-05, "loss": 1.7745, "step": 3765 }, { "epoch": 0.20991026141240735, "grad_norm": 0.564201831817627, "learning_rate": 9.077390252742907e-05, "loss": 1.7723, "step": 3766 }, { "epoch": 0.20996599966557047, "grad_norm": 0.569828450679779, "learning_rate": 9.076874821577683e-05, "loss": 1.7731, "step": 3767 }, { "epoch": 0.21002173791873363, "grad_norm": 0.5601812601089478, "learning_rate": 9.07635926111776e-05, "loss": 1.6495, "step": 3768 }, { "epoch": 0.21007747617189676, "grad_norm": 0.6098621487617493, "learning_rate": 9.075843571379488e-05, "loss": 1.9732, "step": 3769 }, { "epoch": 0.21013321442505992, "grad_norm": 0.5688888430595398, "learning_rate": 9.075327752379221e-05, "loss": 1.8381, "step": 3770 }, { "epoch": 0.21018895267822307, "grad_norm": 0.5635893940925598, "learning_rate": 9.074811804133318e-05, "loss": 1.7662, "step": 3771 }, { "epoch": 0.2102446909313862, "grad_norm": 0.5132915377616882, "learning_rate": 9.074295726658144e-05, "loss": 1.6434, "step": 3772 }, { "epoch": 0.21030042918454936, "grad_norm": 0.5504310727119446, "learning_rate": 9.073779519970065e-05, "loss": 1.7831, "step": 3773 }, { "epoch": 0.21035616743771252, "grad_norm": 0.5861356258392334, "learning_rate": 9.07326318408545e-05, "loss": 1.9085, "step": 3774 }, { "epoch": 0.21041190569087564, "grad_norm": 0.5746229887008667, "learning_rate": 9.072746719020676e-05, "loss": 1.851, "step": 3775 }, { "epoch": 0.2104676439440388, "grad_norm": 0.5618278980255127, "learning_rate": 9.072230124792121e-05, "loss": 1.9007, "step": 3776 }, { "epoch": 0.21052338219720193, "grad_norm": 0.5574671030044556, "learning_rate": 9.07171340141617e-05, "loss": 1.7664, "step": 3777 }, { "epoch": 0.2105791204503651, "grad_norm": 0.5418394207954407, "learning_rate": 9.071196548909208e-05, "loss": 1.5912, "step": 3778 }, { "epoch": 0.21063485870352824, "grad_norm": 0.5579066872596741, "learning_rate": 9.070679567287631e-05, "loss": 1.8595, "step": 3779 }, { "epoch": 0.21069059695669137, "grad_norm": 0.5038254261016846, "learning_rate": 9.07016245656783e-05, "loss": 1.5864, "step": 3780 }, { "epoch": 0.21074633520985453, "grad_norm": 0.5985908508300781, "learning_rate": 9.069645216766208e-05, "loss": 1.8166, "step": 3781 }, { "epoch": 0.21080207346301766, "grad_norm": 0.5343535542488098, "learning_rate": 9.069127847899166e-05, "loss": 1.7482, "step": 3782 }, { "epoch": 0.21085781171618082, "grad_norm": 0.513039231300354, "learning_rate": 9.068610349983113e-05, "loss": 1.7065, "step": 3783 }, { "epoch": 0.21091354996934397, "grad_norm": 0.5761904716491699, "learning_rate": 9.068092723034462e-05, "loss": 1.7781, "step": 3784 }, { "epoch": 0.2109692882225071, "grad_norm": 0.5832051634788513, "learning_rate": 9.067574967069628e-05, "loss": 1.7871, "step": 3785 }, { "epoch": 0.21102502647567026, "grad_norm": 0.9756948947906494, "learning_rate": 9.067057082105032e-05, "loss": 1.9512, "step": 3786 }, { "epoch": 0.2110807647288334, "grad_norm": 0.5692908763885498, "learning_rate": 9.066539068157098e-05, "loss": 1.4585, "step": 3787 }, { "epoch": 0.21113650298199654, "grad_norm": 0.5954088568687439, "learning_rate": 9.066020925242256e-05, "loss": 1.9236, "step": 3788 }, { "epoch": 0.2111922412351597, "grad_norm": 0.5660640597343445, "learning_rate": 9.065502653376935e-05, "loss": 1.67, "step": 3789 }, { "epoch": 0.21124797948832283, "grad_norm": 0.5779823064804077, "learning_rate": 9.064984252577573e-05, "loss": 1.8769, "step": 3790 }, { "epoch": 0.21130371774148599, "grad_norm": 0.5380722880363464, "learning_rate": 9.064465722860611e-05, "loss": 1.6658, "step": 3791 }, { "epoch": 0.21135945599464911, "grad_norm": 0.5925493836402893, "learning_rate": 9.063947064242495e-05, "loss": 1.7003, "step": 3792 }, { "epoch": 0.21141519424781227, "grad_norm": 0.5475820899009705, "learning_rate": 9.063428276739671e-05, "loss": 1.7658, "step": 3793 }, { "epoch": 0.21147093250097543, "grad_norm": 0.5608733296394348, "learning_rate": 9.062909360368595e-05, "loss": 1.7443, "step": 3794 }, { "epoch": 0.21152667075413856, "grad_norm": 0.5024067163467407, "learning_rate": 9.062390315145723e-05, "loss": 1.4051, "step": 3795 }, { "epoch": 0.2115824090073017, "grad_norm": 0.5922258496284485, "learning_rate": 9.061871141087515e-05, "loss": 1.6788, "step": 3796 }, { "epoch": 0.21163814726046487, "grad_norm": 0.5388537645339966, "learning_rate": 9.061351838210434e-05, "loss": 1.7306, "step": 3797 }, { "epoch": 0.211693885513628, "grad_norm": 0.5710194706916809, "learning_rate": 9.060832406530955e-05, "loss": 1.7759, "step": 3798 }, { "epoch": 0.21174962376679116, "grad_norm": 0.5648775696754456, "learning_rate": 9.060312846065548e-05, "loss": 1.8155, "step": 3799 }, { "epoch": 0.21180536201995429, "grad_norm": 0.5653148293495178, "learning_rate": 9.059793156830691e-05, "loss": 1.9103, "step": 3800 }, { "epoch": 0.21186110027311744, "grad_norm": 0.5372900366783142, "learning_rate": 9.059273338842864e-05, "loss": 1.6333, "step": 3801 }, { "epoch": 0.2119168385262806, "grad_norm": 0.6031267046928406, "learning_rate": 9.058753392118555e-05, "loss": 1.9328, "step": 3802 }, { "epoch": 0.21197257677944373, "grad_norm": 0.5510583519935608, "learning_rate": 9.058233316674252e-05, "loss": 1.6296, "step": 3803 }, { "epoch": 0.21202831503260688, "grad_norm": 0.5591006875038147, "learning_rate": 9.057713112526449e-05, "loss": 1.6512, "step": 3804 }, { "epoch": 0.21208405328577, "grad_norm": 0.5050638318061829, "learning_rate": 9.057192779691645e-05, "loss": 1.793, "step": 3805 }, { "epoch": 0.21213979153893317, "grad_norm": 0.5485342144966125, "learning_rate": 9.05667231818634e-05, "loss": 1.8989, "step": 3806 }, { "epoch": 0.21219552979209633, "grad_norm": 0.536729633808136, "learning_rate": 9.056151728027042e-05, "loss": 1.7235, "step": 3807 }, { "epoch": 0.21225126804525946, "grad_norm": 0.5280648469924927, "learning_rate": 9.055631009230262e-05, "loss": 1.779, "step": 3808 }, { "epoch": 0.2123070062984226, "grad_norm": 0.5353644490242004, "learning_rate": 9.05511016181251e-05, "loss": 1.9023, "step": 3809 }, { "epoch": 0.21236274455158574, "grad_norm": 0.5420893430709839, "learning_rate": 9.054589185790305e-05, "loss": 1.6484, "step": 3810 }, { "epoch": 0.2124184828047489, "grad_norm": 0.49997881054878235, "learning_rate": 9.054068081180173e-05, "loss": 1.6866, "step": 3811 }, { "epoch": 0.21247422105791205, "grad_norm": 0.540344774723053, "learning_rate": 9.05354684799864e-05, "loss": 1.6013, "step": 3812 }, { "epoch": 0.21252995931107518, "grad_norm": 0.5512065291404724, "learning_rate": 9.053025486262231e-05, "loss": 1.7741, "step": 3813 }, { "epoch": 0.21258569756423834, "grad_norm": 0.5562829375267029, "learning_rate": 9.052503995987488e-05, "loss": 1.7829, "step": 3814 }, { "epoch": 0.21264143581740147, "grad_norm": 0.528271496295929, "learning_rate": 9.051982377190944e-05, "loss": 1.6395, "step": 3815 }, { "epoch": 0.21269717407056463, "grad_norm": 0.5158810019493103, "learning_rate": 9.051460629889144e-05, "loss": 1.4752, "step": 3816 }, { "epoch": 0.21275291232372778, "grad_norm": 0.5320451855659485, "learning_rate": 9.050938754098635e-05, "loss": 1.7896, "step": 3817 }, { "epoch": 0.2128086505768909, "grad_norm": 0.503190279006958, "learning_rate": 9.050416749835968e-05, "loss": 1.5488, "step": 3818 }, { "epoch": 0.21286438883005407, "grad_norm": 0.561086893081665, "learning_rate": 9.049894617117696e-05, "loss": 1.9669, "step": 3819 }, { "epoch": 0.21292012708321723, "grad_norm": 0.5414785146713257, "learning_rate": 9.04937235596038e-05, "loss": 1.761, "step": 3820 }, { "epoch": 0.21297586533638035, "grad_norm": 0.5729870796203613, "learning_rate": 9.04884996638058e-05, "loss": 1.7399, "step": 3821 }, { "epoch": 0.2130316035895435, "grad_norm": 0.5905429720878601, "learning_rate": 9.048327448394868e-05, "loss": 1.863, "step": 3822 }, { "epoch": 0.21308734184270664, "grad_norm": 0.5414051413536072, "learning_rate": 9.047804802019813e-05, "loss": 1.4662, "step": 3823 }, { "epoch": 0.2131430800958698, "grad_norm": 0.5677713751792908, "learning_rate": 9.047282027271988e-05, "loss": 1.7569, "step": 3824 }, { "epoch": 0.21319881834903295, "grad_norm": 0.5208271145820618, "learning_rate": 9.046759124167976e-05, "loss": 1.5647, "step": 3825 }, { "epoch": 0.21325455660219608, "grad_norm": 0.5930595397949219, "learning_rate": 9.046236092724357e-05, "loss": 1.8287, "step": 3826 }, { "epoch": 0.21331029485535924, "grad_norm": 0.5405799150466919, "learning_rate": 9.045712932957722e-05, "loss": 1.7175, "step": 3827 }, { "epoch": 0.21336603310852237, "grad_norm": 0.5690011382102966, "learning_rate": 9.045189644884661e-05, "loss": 1.9759, "step": 3828 }, { "epoch": 0.21342177136168552, "grad_norm": 0.5739631652832031, "learning_rate": 9.04466622852177e-05, "loss": 1.7102, "step": 3829 }, { "epoch": 0.21347750961484868, "grad_norm": 0.5377629399299622, "learning_rate": 9.044142683885645e-05, "loss": 1.647, "step": 3830 }, { "epoch": 0.2135332478680118, "grad_norm": 0.6439347267150879, "learning_rate": 9.043619010992897e-05, "loss": 2.2611, "step": 3831 }, { "epoch": 0.21358898612117497, "grad_norm": 0.527803897857666, "learning_rate": 9.043095209860128e-05, "loss": 1.7279, "step": 3832 }, { "epoch": 0.2136447243743381, "grad_norm": 0.549921452999115, "learning_rate": 9.042571280503951e-05, "loss": 1.7293, "step": 3833 }, { "epoch": 0.21370046262750125, "grad_norm": 0.5477808713912964, "learning_rate": 9.042047222940985e-05, "loss": 1.8327, "step": 3834 }, { "epoch": 0.2137562008806644, "grad_norm": 0.6052964329719543, "learning_rate": 9.041523037187847e-05, "loss": 1.6961, "step": 3835 }, { "epoch": 0.21381193913382754, "grad_norm": 0.5640259385108948, "learning_rate": 9.04099872326116e-05, "loss": 1.8019, "step": 3836 }, { "epoch": 0.2138676773869907, "grad_norm": 0.5238528251647949, "learning_rate": 9.040474281177557e-05, "loss": 1.7182, "step": 3837 }, { "epoch": 0.21392341564015382, "grad_norm": 0.561298668384552, "learning_rate": 9.039949710953665e-05, "loss": 1.903, "step": 3838 }, { "epoch": 0.21397915389331698, "grad_norm": 0.5629448890686035, "learning_rate": 9.039425012606125e-05, "loss": 1.6652, "step": 3839 }, { "epoch": 0.21403489214648014, "grad_norm": 0.5578324794769287, "learning_rate": 9.038900186151575e-05, "loss": 1.8062, "step": 3840 }, { "epoch": 0.21409063039964327, "grad_norm": 0.5517327785491943, "learning_rate": 9.038375231606659e-05, "loss": 1.7376, "step": 3841 }, { "epoch": 0.21414636865280642, "grad_norm": 0.5653707385063171, "learning_rate": 9.037850148988026e-05, "loss": 1.7724, "step": 3842 }, { "epoch": 0.21420210690596958, "grad_norm": 0.6022188663482666, "learning_rate": 9.037324938312327e-05, "loss": 1.9338, "step": 3843 }, { "epoch": 0.2142578451591327, "grad_norm": 0.5128300189971924, "learning_rate": 9.036799599596222e-05, "loss": 1.6685, "step": 3844 }, { "epoch": 0.21431358341229587, "grad_norm": 0.5680099129676819, "learning_rate": 9.036274132856368e-05, "loss": 1.6111, "step": 3845 }, { "epoch": 0.214369321665459, "grad_norm": 0.5332833528518677, "learning_rate": 9.035748538109433e-05, "loss": 1.7406, "step": 3846 }, { "epoch": 0.21442505991862215, "grad_norm": 0.5845235586166382, "learning_rate": 9.035222815372084e-05, "loss": 2.0365, "step": 3847 }, { "epoch": 0.2144807981717853, "grad_norm": 0.536208987236023, "learning_rate": 9.034696964660996e-05, "loss": 1.7869, "step": 3848 }, { "epoch": 0.21453653642494844, "grad_norm": 0.6078736782073975, "learning_rate": 9.034170985992843e-05, "loss": 1.9884, "step": 3849 }, { "epoch": 0.2145922746781116, "grad_norm": 0.5227762460708618, "learning_rate": 9.033644879384307e-05, "loss": 1.7483, "step": 3850 }, { "epoch": 0.21464801293127472, "grad_norm": 0.555255115032196, "learning_rate": 9.033118644852073e-05, "loss": 1.7319, "step": 3851 }, { "epoch": 0.21470375118443788, "grad_norm": 0.5747233033180237, "learning_rate": 9.032592282412831e-05, "loss": 1.806, "step": 3852 }, { "epoch": 0.21475948943760104, "grad_norm": 0.5099679231643677, "learning_rate": 9.032065792083271e-05, "loss": 1.7784, "step": 3853 }, { "epoch": 0.21481522769076417, "grad_norm": 0.583080530166626, "learning_rate": 9.031539173880095e-05, "loss": 1.8283, "step": 3854 }, { "epoch": 0.21487096594392732, "grad_norm": 0.5755245089530945, "learning_rate": 9.031012427820003e-05, "loss": 1.8088, "step": 3855 }, { "epoch": 0.21492670419709045, "grad_norm": 0.6300316452980042, "learning_rate": 9.030485553919696e-05, "loss": 2.021, "step": 3856 }, { "epoch": 0.2149824424502536, "grad_norm": 0.48787984251976013, "learning_rate": 9.029958552195889e-05, "loss": 1.7416, "step": 3857 }, { "epoch": 0.21503818070341676, "grad_norm": 0.5602289438247681, "learning_rate": 9.029431422665292e-05, "loss": 1.7158, "step": 3858 }, { "epoch": 0.2150939189565799, "grad_norm": 0.6266565918922424, "learning_rate": 9.028904165344622e-05, "loss": 1.904, "step": 3859 }, { "epoch": 0.21514965720974305, "grad_norm": 0.5256897211074829, "learning_rate": 9.028376780250605e-05, "loss": 1.5227, "step": 3860 }, { "epoch": 0.21520539546290618, "grad_norm": 0.5775957107543945, "learning_rate": 9.027849267399962e-05, "loss": 1.8613, "step": 3861 }, { "epoch": 0.21526113371606934, "grad_norm": 0.5759565830230713, "learning_rate": 9.027321626809425e-05, "loss": 1.7657, "step": 3862 }, { "epoch": 0.2153168719692325, "grad_norm": 0.5388328433036804, "learning_rate": 9.026793858495727e-05, "loss": 1.7117, "step": 3863 }, { "epoch": 0.21537261022239562, "grad_norm": 0.5749773383140564, "learning_rate": 9.026265962475605e-05, "loss": 1.781, "step": 3864 }, { "epoch": 0.21542834847555878, "grad_norm": 0.5567165017127991, "learning_rate": 9.025737938765801e-05, "loss": 1.8402, "step": 3865 }, { "epoch": 0.21548408672872194, "grad_norm": 0.5531468987464905, "learning_rate": 9.025209787383062e-05, "loss": 1.637, "step": 3866 }, { "epoch": 0.21553982498188506, "grad_norm": 0.5598788261413574, "learning_rate": 9.024681508344138e-05, "loss": 1.7487, "step": 3867 }, { "epoch": 0.21559556323504822, "grad_norm": 0.5670254826545715, "learning_rate": 9.024153101665782e-05, "loss": 1.8556, "step": 3868 }, { "epoch": 0.21565130148821135, "grad_norm": 0.5822195410728455, "learning_rate": 9.02362456736475e-05, "loss": 1.8144, "step": 3869 }, { "epoch": 0.2157070397413745, "grad_norm": 0.5438206791877747, "learning_rate": 9.023095905457807e-05, "loss": 1.7709, "step": 3870 }, { "epoch": 0.21576277799453766, "grad_norm": 0.5582990646362305, "learning_rate": 9.022567115961718e-05, "loss": 1.7373, "step": 3871 }, { "epoch": 0.2158185162477008, "grad_norm": 0.5481442809104919, "learning_rate": 9.022038198893254e-05, "loss": 1.642, "step": 3872 }, { "epoch": 0.21587425450086395, "grad_norm": 0.5365943312644958, "learning_rate": 9.021509154269187e-05, "loss": 1.7393, "step": 3873 }, { "epoch": 0.21592999275402708, "grad_norm": 0.5595213174819946, "learning_rate": 9.0209799821063e-05, "loss": 1.7803, "step": 3874 }, { "epoch": 0.21598573100719023, "grad_norm": 0.5171288251876831, "learning_rate": 9.020450682421368e-05, "loss": 1.6007, "step": 3875 }, { "epoch": 0.2160414692603534, "grad_norm": 0.5536861419677734, "learning_rate": 9.019921255231183e-05, "loss": 1.7964, "step": 3876 }, { "epoch": 0.21609720751351652, "grad_norm": 0.5218709707260132, "learning_rate": 9.019391700552533e-05, "loss": 1.7572, "step": 3877 }, { "epoch": 0.21615294576667968, "grad_norm": 0.5276560187339783, "learning_rate": 9.018862018402214e-05, "loss": 1.7768, "step": 3878 }, { "epoch": 0.2162086840198428, "grad_norm": 0.509636640548706, "learning_rate": 9.018332208797023e-05, "loss": 1.8262, "step": 3879 }, { "epoch": 0.21626442227300596, "grad_norm": 0.5426955819129944, "learning_rate": 9.017802271753763e-05, "loss": 1.7966, "step": 3880 }, { "epoch": 0.21632016052616912, "grad_norm": 0.5915662050247192, "learning_rate": 9.017272207289241e-05, "loss": 1.7047, "step": 3881 }, { "epoch": 0.21637589877933225, "grad_norm": 0.5025647282600403, "learning_rate": 9.016742015420264e-05, "loss": 1.6662, "step": 3882 }, { "epoch": 0.2164316370324954, "grad_norm": 0.5097705125808716, "learning_rate": 9.016211696163651e-05, "loss": 1.6667, "step": 3883 }, { "epoch": 0.21648737528565853, "grad_norm": 0.5540134310722351, "learning_rate": 9.015681249536219e-05, "loss": 1.7085, "step": 3884 }, { "epoch": 0.2165431135388217, "grad_norm": 0.5509772300720215, "learning_rate": 9.015150675554791e-05, "loss": 1.7739, "step": 3885 }, { "epoch": 0.21659885179198485, "grad_norm": 0.519534170627594, "learning_rate": 9.014619974236195e-05, "loss": 1.5412, "step": 3886 }, { "epoch": 0.21665459004514798, "grad_norm": 0.5313923954963684, "learning_rate": 9.014089145597259e-05, "loss": 1.6956, "step": 3887 }, { "epoch": 0.21671032829831113, "grad_norm": 0.5057397484779358, "learning_rate": 9.013558189654819e-05, "loss": 1.6772, "step": 3888 }, { "epoch": 0.2167660665514743, "grad_norm": 0.5538941621780396, "learning_rate": 9.013027106425713e-05, "loss": 1.7071, "step": 3889 }, { "epoch": 0.21682180480463742, "grad_norm": 0.5932080149650574, "learning_rate": 9.012495895926786e-05, "loss": 1.9886, "step": 3890 }, { "epoch": 0.21687754305780058, "grad_norm": 0.5497404932975769, "learning_rate": 9.011964558174884e-05, "loss": 1.6111, "step": 3891 }, { "epoch": 0.2169332813109637, "grad_norm": 0.5296292304992676, "learning_rate": 9.011433093186856e-05, "loss": 1.7192, "step": 3892 }, { "epoch": 0.21698901956412686, "grad_norm": 0.5682234168052673, "learning_rate": 9.01090150097956e-05, "loss": 1.727, "step": 3893 }, { "epoch": 0.21704475781729002, "grad_norm": 0.49014294147491455, "learning_rate": 9.010369781569854e-05, "loss": 1.5865, "step": 3894 }, { "epoch": 0.21710049607045315, "grad_norm": 0.5291064381599426, "learning_rate": 9.009837934974598e-05, "loss": 1.6708, "step": 3895 }, { "epoch": 0.2171562343236163, "grad_norm": 0.5380057096481323, "learning_rate": 9.009305961210664e-05, "loss": 1.816, "step": 3896 }, { "epoch": 0.21721197257677943, "grad_norm": 0.5304032564163208, "learning_rate": 9.008773860294921e-05, "loss": 1.6085, "step": 3897 }, { "epoch": 0.2172677108299426, "grad_norm": 0.5649582147598267, "learning_rate": 9.008241632244243e-05, "loss": 2.0664, "step": 3898 }, { "epoch": 0.21732344908310575, "grad_norm": 0.5284783840179443, "learning_rate": 9.00770927707551e-05, "loss": 1.6078, "step": 3899 }, { "epoch": 0.21737918733626888, "grad_norm": 0.5097172856330872, "learning_rate": 9.007176794805606e-05, "loss": 1.6985, "step": 3900 }, { "epoch": 0.21743492558943203, "grad_norm": 0.5433828830718994, "learning_rate": 9.006644185451416e-05, "loss": 1.824, "step": 3901 }, { "epoch": 0.21749066384259516, "grad_norm": 0.5155694484710693, "learning_rate": 9.006111449029835e-05, "loss": 1.674, "step": 3902 }, { "epoch": 0.21754640209575832, "grad_norm": 0.4952467978000641, "learning_rate": 9.005578585557754e-05, "loss": 1.5491, "step": 3903 }, { "epoch": 0.21760214034892147, "grad_norm": 0.5352423191070557, "learning_rate": 9.005045595052077e-05, "loss": 1.7583, "step": 3904 }, { "epoch": 0.2176578786020846, "grad_norm": 0.5036554336547852, "learning_rate": 9.004512477529702e-05, "loss": 1.6147, "step": 3905 }, { "epoch": 0.21771361685524776, "grad_norm": 0.5414397120475769, "learning_rate": 9.003979233007541e-05, "loss": 1.7576, "step": 3906 }, { "epoch": 0.2177693551084109, "grad_norm": 0.51963871717453, "learning_rate": 9.003445861502502e-05, "loss": 1.7114, "step": 3907 }, { "epoch": 0.21782509336157405, "grad_norm": 0.5667458176612854, "learning_rate": 9.002912363031504e-05, "loss": 1.904, "step": 3908 }, { "epoch": 0.2178808316147372, "grad_norm": 0.5066022872924805, "learning_rate": 9.002378737611463e-05, "loss": 1.5851, "step": 3909 }, { "epoch": 0.21793656986790033, "grad_norm": 0.5155694484710693, "learning_rate": 9.001844985259303e-05, "loss": 1.6766, "step": 3910 }, { "epoch": 0.2179923081210635, "grad_norm": 0.5910778641700745, "learning_rate": 9.001311105991954e-05, "loss": 1.6309, "step": 3911 }, { "epoch": 0.21804804637422665, "grad_norm": 0.5524371862411499, "learning_rate": 9.000777099826345e-05, "loss": 1.5347, "step": 3912 }, { "epoch": 0.21810378462738977, "grad_norm": 0.5852683186531067, "learning_rate": 9.000242966779412e-05, "loss": 1.7077, "step": 3913 }, { "epoch": 0.21815952288055293, "grad_norm": 0.511112630367279, "learning_rate": 8.999708706868097e-05, "loss": 1.4288, "step": 3914 }, { "epoch": 0.21821526113371606, "grad_norm": 0.553582489490509, "learning_rate": 8.999174320109343e-05, "loss": 1.6114, "step": 3915 }, { "epoch": 0.21827099938687922, "grad_norm": 0.5207599401473999, "learning_rate": 8.998639806520092e-05, "loss": 1.6002, "step": 3916 }, { "epoch": 0.21832673764004237, "grad_norm": 0.520836591720581, "learning_rate": 8.998105166117304e-05, "loss": 1.7308, "step": 3917 }, { "epoch": 0.2183824758932055, "grad_norm": 0.5346881151199341, "learning_rate": 8.99757039891793e-05, "loss": 1.7732, "step": 3918 }, { "epoch": 0.21843821414636866, "grad_norm": 0.5407224893569946, "learning_rate": 8.997035504938928e-05, "loss": 1.6927, "step": 3919 }, { "epoch": 0.2184939523995318, "grad_norm": 0.6079891324043274, "learning_rate": 8.996500484197266e-05, "loss": 1.7503, "step": 3920 }, { "epoch": 0.21854969065269494, "grad_norm": 0.5896045565605164, "learning_rate": 8.995965336709908e-05, "loss": 1.8189, "step": 3921 }, { "epoch": 0.2186054289058581, "grad_norm": 0.5681061148643494, "learning_rate": 8.99543006249383e-05, "loss": 1.9138, "step": 3922 }, { "epoch": 0.21866116715902123, "grad_norm": 0.5397033095359802, "learning_rate": 8.994894661566004e-05, "loss": 1.6947, "step": 3923 }, { "epoch": 0.2187169054121844, "grad_norm": 0.5442162752151489, "learning_rate": 8.994359133943411e-05, "loss": 1.7947, "step": 3924 }, { "epoch": 0.21877264366534752, "grad_norm": 0.5366693735122681, "learning_rate": 8.993823479643036e-05, "loss": 1.8557, "step": 3925 }, { "epoch": 0.21882838191851067, "grad_norm": 0.5018730163574219, "learning_rate": 8.993287698681867e-05, "loss": 1.6033, "step": 3926 }, { "epoch": 0.21888412017167383, "grad_norm": 0.5234804749488831, "learning_rate": 8.992751791076893e-05, "loss": 1.6927, "step": 3927 }, { "epoch": 0.21893985842483696, "grad_norm": 0.5351289510726929, "learning_rate": 8.992215756845111e-05, "loss": 1.6108, "step": 3928 }, { "epoch": 0.21899559667800012, "grad_norm": 0.5499307513237, "learning_rate": 8.991679596003521e-05, "loss": 1.821, "step": 3929 }, { "epoch": 0.21905133493116324, "grad_norm": 0.5461710691452026, "learning_rate": 8.991143308569129e-05, "loss": 1.6755, "step": 3930 }, { "epoch": 0.2191070731843264, "grad_norm": 0.557220458984375, "learning_rate": 8.990606894558942e-05, "loss": 1.7568, "step": 3931 }, { "epoch": 0.21916281143748956, "grad_norm": 0.5313843488693237, "learning_rate": 8.99007035398997e-05, "loss": 1.5701, "step": 3932 }, { "epoch": 0.2192185496906527, "grad_norm": 0.5466028451919556, "learning_rate": 8.98953368687923e-05, "loss": 1.7533, "step": 3933 }, { "epoch": 0.21927428794381584, "grad_norm": 0.5278179049491882, "learning_rate": 8.988996893243742e-05, "loss": 1.6604, "step": 3934 }, { "epoch": 0.219330026196979, "grad_norm": 0.5555846095085144, "learning_rate": 8.988459973100529e-05, "loss": 1.9101, "step": 3935 }, { "epoch": 0.21938576445014213, "grad_norm": 0.5475595593452454, "learning_rate": 8.987922926466621e-05, "loss": 1.6784, "step": 3936 }, { "epoch": 0.21944150270330529, "grad_norm": 0.5606985092163086, "learning_rate": 8.98738575335905e-05, "loss": 1.8496, "step": 3937 }, { "epoch": 0.21949724095646841, "grad_norm": 0.5272994041442871, "learning_rate": 8.986848453794849e-05, "loss": 1.6477, "step": 3938 }, { "epoch": 0.21955297920963157, "grad_norm": 0.5808579325675964, "learning_rate": 8.986311027791061e-05, "loss": 1.9312, "step": 3939 }, { "epoch": 0.21960871746279473, "grad_norm": 0.5892482399940491, "learning_rate": 8.985773475364729e-05, "loss": 1.8278, "step": 3940 }, { "epoch": 0.21966445571595786, "grad_norm": 0.5204423069953918, "learning_rate": 8.9852357965329e-05, "loss": 1.5689, "step": 3941 }, { "epoch": 0.219720193969121, "grad_norm": 0.5408873558044434, "learning_rate": 8.984697991312629e-05, "loss": 1.6719, "step": 3942 }, { "epoch": 0.21977593222228414, "grad_norm": 0.4690547585487366, "learning_rate": 8.98416005972097e-05, "loss": 1.4167, "step": 3943 }, { "epoch": 0.2198316704754473, "grad_norm": 0.5128321647644043, "learning_rate": 8.98362200177498e-05, "loss": 1.5936, "step": 3944 }, { "epoch": 0.21988740872861046, "grad_norm": 0.5651824474334717, "learning_rate": 8.98308381749173e-05, "loss": 1.7715, "step": 3945 }, { "epoch": 0.21994314698177359, "grad_norm": 0.49932271242141724, "learning_rate": 8.982545506888282e-05, "loss": 1.5167, "step": 3946 }, { "epoch": 0.21999888523493674, "grad_norm": 0.5488872528076172, "learning_rate": 8.982007069981711e-05, "loss": 1.6694, "step": 3947 }, { "epoch": 0.22005462348809987, "grad_norm": 0.5529676079750061, "learning_rate": 8.981468506789093e-05, "loss": 1.7098, "step": 3948 }, { "epoch": 0.22011036174126303, "grad_norm": 0.555151104927063, "learning_rate": 8.980929817327509e-05, "loss": 1.8188, "step": 3949 }, { "epoch": 0.22016609999442618, "grad_norm": 0.5413922667503357, "learning_rate": 8.980391001614039e-05, "loss": 1.6947, "step": 3950 }, { "epoch": 0.2202218382475893, "grad_norm": 0.5880113244056702, "learning_rate": 8.979852059665774e-05, "loss": 1.8565, "step": 3951 }, { "epoch": 0.22027757650075247, "grad_norm": 0.5404399037361145, "learning_rate": 8.979312991499807e-05, "loss": 1.6119, "step": 3952 }, { "epoch": 0.2203333147539156, "grad_norm": 0.5193542838096619, "learning_rate": 8.97877379713323e-05, "loss": 1.5012, "step": 3953 }, { "epoch": 0.22038905300707876, "grad_norm": 0.5563862323760986, "learning_rate": 8.97823447658315e-05, "loss": 1.7968, "step": 3954 }, { "epoch": 0.2204447912602419, "grad_norm": 0.5796663165092468, "learning_rate": 8.977695029866665e-05, "loss": 1.6924, "step": 3955 }, { "epoch": 0.22050052951340504, "grad_norm": 0.5060169100761414, "learning_rate": 8.977155457000886e-05, "loss": 1.6837, "step": 3956 }, { "epoch": 0.2205562677665682, "grad_norm": 0.5254307389259338, "learning_rate": 8.976615758002925e-05, "loss": 1.5339, "step": 3957 }, { "epoch": 0.22061200601973135, "grad_norm": 0.4909488260746002, "learning_rate": 8.976075932889896e-05, "loss": 1.406, "step": 3958 }, { "epoch": 0.22066774427289448, "grad_norm": 0.521052896976471, "learning_rate": 8.97553598167892e-05, "loss": 1.6203, "step": 3959 }, { "epoch": 0.22072348252605764, "grad_norm": 0.5382006764411926, "learning_rate": 8.974995904387123e-05, "loss": 1.6984, "step": 3960 }, { "epoch": 0.22077922077922077, "grad_norm": 0.5354267954826355, "learning_rate": 8.97445570103163e-05, "loss": 1.7722, "step": 3961 }, { "epoch": 0.22083495903238393, "grad_norm": 0.5725782513618469, "learning_rate": 8.973915371629577e-05, "loss": 1.8308, "step": 3962 }, { "epoch": 0.22089069728554708, "grad_norm": 0.5183130502700806, "learning_rate": 8.973374916198096e-05, "loss": 1.6487, "step": 3963 }, { "epoch": 0.2209464355387102, "grad_norm": 0.5026050209999084, "learning_rate": 8.972834334754331e-05, "loss": 1.4931, "step": 3964 }, { "epoch": 0.22100217379187337, "grad_norm": 0.5589287281036377, "learning_rate": 8.972293627315424e-05, "loss": 1.9263, "step": 3965 }, { "epoch": 0.2210579120450365, "grad_norm": 0.5776212811470032, "learning_rate": 8.971752793898522e-05, "loss": 1.8374, "step": 3966 }, { "epoch": 0.22111365029819965, "grad_norm": 0.5569107532501221, "learning_rate": 8.971211834520779e-05, "loss": 1.7221, "step": 3967 }, { "epoch": 0.2211693885513628, "grad_norm": 0.527186930179596, "learning_rate": 8.970670749199351e-05, "loss": 1.713, "step": 3968 }, { "epoch": 0.22122512680452594, "grad_norm": 0.5234454274177551, "learning_rate": 8.970129537951395e-05, "loss": 1.6519, "step": 3969 }, { "epoch": 0.2212808650576891, "grad_norm": 0.5419970154762268, "learning_rate": 8.969588200794079e-05, "loss": 1.5816, "step": 3970 }, { "epoch": 0.22133660331085223, "grad_norm": 0.5328260660171509, "learning_rate": 8.969046737744571e-05, "loss": 1.8442, "step": 3971 }, { "epoch": 0.22139234156401538, "grad_norm": 0.5527640581130981, "learning_rate": 8.968505148820039e-05, "loss": 1.5886, "step": 3972 }, { "epoch": 0.22144807981717854, "grad_norm": 0.5386121869087219, "learning_rate": 8.967963434037663e-05, "loss": 1.8938, "step": 3973 }, { "epoch": 0.22150381807034167, "grad_norm": 0.60856693983078, "learning_rate": 8.967421593414622e-05, "loss": 1.7739, "step": 3974 }, { "epoch": 0.22155955632350482, "grad_norm": 0.5383316278457642, "learning_rate": 8.966879626968099e-05, "loss": 1.5916, "step": 3975 }, { "epoch": 0.22161529457666795, "grad_norm": 0.5469935536384583, "learning_rate": 8.966337534715284e-05, "loss": 1.6879, "step": 3976 }, { "epoch": 0.2216710328298311, "grad_norm": 0.5624483227729797, "learning_rate": 8.965795316673366e-05, "loss": 1.5465, "step": 3977 }, { "epoch": 0.22172677108299427, "grad_norm": 0.571090817451477, "learning_rate": 8.965252972859545e-05, "loss": 1.8477, "step": 3978 }, { "epoch": 0.2217825093361574, "grad_norm": 0.5622638463973999, "learning_rate": 8.964710503291018e-05, "loss": 1.7961, "step": 3979 }, { "epoch": 0.22183824758932055, "grad_norm": 0.54639732837677, "learning_rate": 8.964167907984988e-05, "loss": 1.7795, "step": 3980 }, { "epoch": 0.2218939858424837, "grad_norm": 0.5762872099876404, "learning_rate": 8.963625186958666e-05, "loss": 1.7824, "step": 3981 }, { "epoch": 0.22194972409564684, "grad_norm": 0.5208929777145386, "learning_rate": 8.963082340229263e-05, "loss": 1.7521, "step": 3982 }, { "epoch": 0.22200546234881, "grad_norm": 0.49496889114379883, "learning_rate": 8.962539367813993e-05, "loss": 1.5493, "step": 3983 }, { "epoch": 0.22206120060197312, "grad_norm": 0.4936692714691162, "learning_rate": 8.961996269730078e-05, "loss": 1.5015, "step": 3984 }, { "epoch": 0.22211693885513628, "grad_norm": 0.5555882453918457, "learning_rate": 8.961453045994742e-05, "loss": 1.7563, "step": 3985 }, { "epoch": 0.22217267710829944, "grad_norm": 0.5514853596687317, "learning_rate": 8.960909696625213e-05, "loss": 1.6671, "step": 3986 }, { "epoch": 0.22222841536146257, "grad_norm": 0.5259945392608643, "learning_rate": 8.960366221638721e-05, "loss": 1.7181, "step": 3987 }, { "epoch": 0.22228415361462572, "grad_norm": 0.5564213395118713, "learning_rate": 8.959822621052502e-05, "loss": 1.8017, "step": 3988 }, { "epoch": 0.22233989186778885, "grad_norm": 0.5879985094070435, "learning_rate": 8.959278894883797e-05, "loss": 1.8768, "step": 3989 }, { "epoch": 0.222395630120952, "grad_norm": 0.5429808497428894, "learning_rate": 8.958735043149852e-05, "loss": 1.6246, "step": 3990 }, { "epoch": 0.22245136837411517, "grad_norm": 0.5388792753219604, "learning_rate": 8.958191065867912e-05, "loss": 1.8083, "step": 3991 }, { "epoch": 0.2225071066272783, "grad_norm": 0.5783261060714722, "learning_rate": 8.957646963055227e-05, "loss": 1.9074, "step": 3992 }, { "epoch": 0.22256284488044145, "grad_norm": 0.5076984167098999, "learning_rate": 8.957102734729057e-05, "loss": 1.6518, "step": 3993 }, { "epoch": 0.22261858313360458, "grad_norm": 0.6677889823913574, "learning_rate": 8.956558380906659e-05, "loss": 2.3105, "step": 3994 }, { "epoch": 0.22267432138676774, "grad_norm": 0.5451659560203552, "learning_rate": 8.956013901605299e-05, "loss": 1.7229, "step": 3995 }, { "epoch": 0.2227300596399309, "grad_norm": 0.5508718490600586, "learning_rate": 8.955469296842241e-05, "loss": 1.641, "step": 3996 }, { "epoch": 0.22278579789309402, "grad_norm": 0.5317922234535217, "learning_rate": 8.95492456663476e-05, "loss": 1.6717, "step": 3997 }, { "epoch": 0.22284153614625718, "grad_norm": 0.5446794033050537, "learning_rate": 8.954379711000129e-05, "loss": 1.7382, "step": 3998 }, { "epoch": 0.2228972743994203, "grad_norm": 0.5360628962516785, "learning_rate": 8.95383472995563e-05, "loss": 1.7489, "step": 3999 }, { "epoch": 0.22295301265258347, "grad_norm": 0.5646945238113403, "learning_rate": 8.953289623518545e-05, "loss": 1.7241, "step": 4000 }, { "epoch": 0.22300875090574662, "grad_norm": 0.5079129338264465, "learning_rate": 8.952744391706165e-05, "loss": 1.6683, "step": 4001 }, { "epoch": 0.22306448915890975, "grad_norm": 0.5274491906166077, "learning_rate": 8.952199034535778e-05, "loss": 1.6086, "step": 4002 }, { "epoch": 0.2231202274120729, "grad_norm": 0.5475561618804932, "learning_rate": 8.95165355202468e-05, "loss": 1.9497, "step": 4003 }, { "epoch": 0.22317596566523606, "grad_norm": 0.5520079135894775, "learning_rate": 8.951107944190171e-05, "loss": 1.9735, "step": 4004 }, { "epoch": 0.2232317039183992, "grad_norm": 0.5097377300262451, "learning_rate": 8.950562211049556e-05, "loss": 1.5424, "step": 4005 }, { "epoch": 0.22328744217156235, "grad_norm": 0.5405047535896301, "learning_rate": 8.950016352620139e-05, "loss": 1.6966, "step": 4006 }, { "epoch": 0.22334318042472548, "grad_norm": 0.5254392027854919, "learning_rate": 8.949470368919235e-05, "loss": 1.6651, "step": 4007 }, { "epoch": 0.22339891867788864, "grad_norm": 0.5582841634750366, "learning_rate": 8.948924259964157e-05, "loss": 1.7668, "step": 4008 }, { "epoch": 0.2234546569310518, "grad_norm": 0.5375759601593018, "learning_rate": 8.948378025772227e-05, "loss": 1.7271, "step": 4009 }, { "epoch": 0.22351039518421492, "grad_norm": 0.5370509028434753, "learning_rate": 8.947831666360765e-05, "loss": 1.7851, "step": 4010 }, { "epoch": 0.22356613343737808, "grad_norm": 0.5874437093734741, "learning_rate": 8.947285181747098e-05, "loss": 1.8569, "step": 4011 }, { "epoch": 0.2236218716905412, "grad_norm": 0.566886305809021, "learning_rate": 8.946738571948562e-05, "loss": 1.6114, "step": 4012 }, { "epoch": 0.22367760994370436, "grad_norm": 0.5747610926628113, "learning_rate": 8.946191836982489e-05, "loss": 1.8552, "step": 4013 }, { "epoch": 0.22373334819686752, "grad_norm": 0.5414125919342041, "learning_rate": 8.945644976866219e-05, "loss": 1.5846, "step": 4014 }, { "epoch": 0.22378908645003065, "grad_norm": 0.5818209648132324, "learning_rate": 8.945097991617096e-05, "loss": 1.8305, "step": 4015 }, { "epoch": 0.2238448247031938, "grad_norm": 0.5896833539009094, "learning_rate": 8.944550881252465e-05, "loss": 1.6642, "step": 4016 }, { "epoch": 0.22390056295635694, "grad_norm": 0.5750831365585327, "learning_rate": 8.944003645789678e-05, "loss": 1.7286, "step": 4017 }, { "epoch": 0.2239563012095201, "grad_norm": 0.514319896697998, "learning_rate": 8.943456285246091e-05, "loss": 1.6254, "step": 4018 }, { "epoch": 0.22401203946268325, "grad_norm": 0.48393240571022034, "learning_rate": 8.942908799639062e-05, "loss": 1.4306, "step": 4019 }, { "epoch": 0.22406777771584638, "grad_norm": 0.5655490756034851, "learning_rate": 8.942361188985957e-05, "loss": 1.8686, "step": 4020 }, { "epoch": 0.22412351596900953, "grad_norm": 0.7101614475250244, "learning_rate": 8.941813453304138e-05, "loss": 1.6244, "step": 4021 }, { "epoch": 0.22417925422217266, "grad_norm": 0.5121461153030396, "learning_rate": 8.941265592610979e-05, "loss": 1.5336, "step": 4022 }, { "epoch": 0.22423499247533582, "grad_norm": 0.5167136192321777, "learning_rate": 8.940717606923857e-05, "loss": 1.5896, "step": 4023 }, { "epoch": 0.22429073072849898, "grad_norm": 0.5683619379997253, "learning_rate": 8.940169496260144e-05, "loss": 1.8004, "step": 4024 }, { "epoch": 0.2243464689816621, "grad_norm": 0.5303056240081787, "learning_rate": 8.939621260637231e-05, "loss": 1.6034, "step": 4025 }, { "epoch": 0.22440220723482526, "grad_norm": 0.5514824986457825, "learning_rate": 8.9390729000725e-05, "loss": 1.7099, "step": 4026 }, { "epoch": 0.22445794548798842, "grad_norm": 0.5117455720901489, "learning_rate": 8.938524414583343e-05, "loss": 1.8367, "step": 4027 }, { "epoch": 0.22451368374115155, "grad_norm": 0.5556350946426392, "learning_rate": 8.937975804187156e-05, "loss": 1.6737, "step": 4028 }, { "epoch": 0.2245694219943147, "grad_norm": 0.5511283874511719, "learning_rate": 8.937427068901335e-05, "loss": 1.7541, "step": 4029 }, { "epoch": 0.22462516024747783, "grad_norm": 0.5651305317878723, "learning_rate": 8.936878208743285e-05, "loss": 1.7383, "step": 4030 }, { "epoch": 0.224680898500641, "grad_norm": 0.5192481875419617, "learning_rate": 8.93632922373041e-05, "loss": 1.5392, "step": 4031 }, { "epoch": 0.22473663675380415, "grad_norm": 0.5942433476448059, "learning_rate": 8.935780113880125e-05, "loss": 1.9703, "step": 4032 }, { "epoch": 0.22479237500696728, "grad_norm": 0.5313376188278198, "learning_rate": 8.93523087920984e-05, "loss": 1.7827, "step": 4033 }, { "epoch": 0.22484811326013043, "grad_norm": 0.5464789271354675, "learning_rate": 8.934681519736977e-05, "loss": 1.8036, "step": 4034 }, { "epoch": 0.22490385151329356, "grad_norm": 0.5823439955711365, "learning_rate": 8.934132035478955e-05, "loss": 1.9969, "step": 4035 }, { "epoch": 0.22495958976645672, "grad_norm": 0.5518758296966553, "learning_rate": 8.933582426453205e-05, "loss": 1.7836, "step": 4036 }, { "epoch": 0.22501532801961988, "grad_norm": 0.529864490032196, "learning_rate": 8.933032692677153e-05, "loss": 1.8767, "step": 4037 }, { "epoch": 0.225071066272783, "grad_norm": 0.5450250506401062, "learning_rate": 8.932482834168237e-05, "loss": 1.6584, "step": 4038 }, { "epoch": 0.22512680452594616, "grad_norm": 0.5210989713668823, "learning_rate": 8.931932850943892e-05, "loss": 1.6707, "step": 4039 }, { "epoch": 0.2251825427791093, "grad_norm": 0.5319432616233826, "learning_rate": 8.931382743021562e-05, "loss": 1.5798, "step": 4040 }, { "epoch": 0.22523828103227245, "grad_norm": 0.502311110496521, "learning_rate": 8.930832510418692e-05, "loss": 1.5718, "step": 4041 }, { "epoch": 0.2252940192854356, "grad_norm": 0.5432561635971069, "learning_rate": 8.930282153152734e-05, "loss": 1.7996, "step": 4042 }, { "epoch": 0.22534975753859873, "grad_norm": 0.5339439511299133, "learning_rate": 8.92973167124114e-05, "loss": 1.8783, "step": 4043 }, { "epoch": 0.2254054957917619, "grad_norm": 0.5929161310195923, "learning_rate": 8.92918106470137e-05, "loss": 1.9278, "step": 4044 }, { "epoch": 0.22546123404492502, "grad_norm": 0.5356025695800781, "learning_rate": 8.928630333550886e-05, "loss": 1.6555, "step": 4045 }, { "epoch": 0.22551697229808818, "grad_norm": 0.6173697113990784, "learning_rate": 8.928079477807155e-05, "loss": 1.6326, "step": 4046 }, { "epoch": 0.22557271055125133, "grad_norm": 0.5391169786453247, "learning_rate": 8.927528497487642e-05, "loss": 1.7983, "step": 4047 }, { "epoch": 0.22562844880441446, "grad_norm": 0.541691780090332, "learning_rate": 8.926977392609826e-05, "loss": 1.9013, "step": 4048 }, { "epoch": 0.22568418705757762, "grad_norm": 0.5518167018890381, "learning_rate": 8.926426163191182e-05, "loss": 1.8038, "step": 4049 }, { "epoch": 0.22573992531074077, "grad_norm": 0.5680546164512634, "learning_rate": 8.925874809249193e-05, "loss": 1.893, "step": 4050 }, { "epoch": 0.2257956635639039, "grad_norm": 0.531597912311554, "learning_rate": 8.925323330801345e-05, "loss": 1.6987, "step": 4051 }, { "epoch": 0.22585140181706706, "grad_norm": 0.5005265474319458, "learning_rate": 8.924771727865126e-05, "loss": 1.4703, "step": 4052 }, { "epoch": 0.2259071400702302, "grad_norm": 0.4409901201725006, "learning_rate": 8.924220000458032e-05, "loss": 1.1188, "step": 4053 }, { "epoch": 0.22596287832339335, "grad_norm": 0.5583540797233582, "learning_rate": 8.92366814859756e-05, "loss": 1.8899, "step": 4054 }, { "epoch": 0.2260186165765565, "grad_norm": 0.5503487586975098, "learning_rate": 8.923116172301208e-05, "loss": 1.7006, "step": 4055 }, { "epoch": 0.22607435482971963, "grad_norm": 0.5401930212974548, "learning_rate": 8.922564071586487e-05, "loss": 1.7435, "step": 4056 }, { "epoch": 0.2261300930828828, "grad_norm": 0.5470068454742432, "learning_rate": 8.922011846470903e-05, "loss": 1.7926, "step": 4057 }, { "epoch": 0.22618583133604592, "grad_norm": 0.5655896663665771, "learning_rate": 8.921459496971971e-05, "loss": 1.8028, "step": 4058 }, { "epoch": 0.22624156958920907, "grad_norm": 0.520338237285614, "learning_rate": 8.920907023107208e-05, "loss": 1.7713, "step": 4059 }, { "epoch": 0.22629730784237223, "grad_norm": 0.5628316402435303, "learning_rate": 8.920354424894133e-05, "loss": 1.8308, "step": 4060 }, { "epoch": 0.22635304609553536, "grad_norm": 0.5436638593673706, "learning_rate": 8.919801702350272e-05, "loss": 1.7824, "step": 4061 }, { "epoch": 0.22640878434869852, "grad_norm": 0.6150013208389282, "learning_rate": 8.919248855493156e-05, "loss": 1.6801, "step": 4062 }, { "epoch": 0.22646452260186165, "grad_norm": 0.5413832068443298, "learning_rate": 8.918695884340318e-05, "loss": 1.7266, "step": 4063 }, { "epoch": 0.2265202608550248, "grad_norm": 0.6004742980003357, "learning_rate": 8.918142788909294e-05, "loss": 1.9331, "step": 4064 }, { "epoch": 0.22657599910818796, "grad_norm": 0.5428612232208252, "learning_rate": 8.917589569217624e-05, "loss": 1.8074, "step": 4065 }, { "epoch": 0.2266317373613511, "grad_norm": 0.5653241276741028, "learning_rate": 8.917036225282855e-05, "loss": 1.8719, "step": 4066 }, { "epoch": 0.22668747561451424, "grad_norm": 0.5411580801010132, "learning_rate": 8.916482757122535e-05, "loss": 1.7155, "step": 4067 }, { "epoch": 0.22674321386767737, "grad_norm": 0.5733420252799988, "learning_rate": 8.915929164754215e-05, "loss": 1.8401, "step": 4068 }, { "epoch": 0.22679895212084053, "grad_norm": 0.5870828032493591, "learning_rate": 8.915375448195455e-05, "loss": 1.6825, "step": 4069 }, { "epoch": 0.2268546903740037, "grad_norm": 0.5373989939689636, "learning_rate": 8.914821607463814e-05, "loss": 1.6471, "step": 4070 }, { "epoch": 0.22691042862716682, "grad_norm": 0.5650984048843384, "learning_rate": 8.914267642576857e-05, "loss": 2.0078, "step": 4071 }, { "epoch": 0.22696616688032997, "grad_norm": 0.5647602677345276, "learning_rate": 8.91371355355215e-05, "loss": 1.8949, "step": 4072 }, { "epoch": 0.22702190513349313, "grad_norm": 0.5225738286972046, "learning_rate": 8.913159340407269e-05, "loss": 1.787, "step": 4073 }, { "epoch": 0.22707764338665626, "grad_norm": 0.4927429258823395, "learning_rate": 8.912605003159788e-05, "loss": 1.6022, "step": 4074 }, { "epoch": 0.22713338163981941, "grad_norm": 0.5242977738380432, "learning_rate": 8.912050541827291e-05, "loss": 1.6286, "step": 4075 }, { "epoch": 0.22718911989298254, "grad_norm": 0.5272535681724548, "learning_rate": 8.911495956427357e-05, "loss": 1.8091, "step": 4076 }, { "epoch": 0.2272448581461457, "grad_norm": 0.5660970211029053, "learning_rate": 8.910941246977577e-05, "loss": 1.7518, "step": 4077 }, { "epoch": 0.22730059639930886, "grad_norm": 0.5166184902191162, "learning_rate": 8.910386413495544e-05, "loss": 1.7051, "step": 4078 }, { "epoch": 0.227356334652472, "grad_norm": 0.5315423607826233, "learning_rate": 8.909831455998854e-05, "loss": 1.5667, "step": 4079 }, { "epoch": 0.22741207290563514, "grad_norm": 0.5121911764144897, "learning_rate": 8.909276374505104e-05, "loss": 1.6594, "step": 4080 }, { "epoch": 0.22746781115879827, "grad_norm": 0.5725307464599609, "learning_rate": 8.908721169031901e-05, "loss": 1.7931, "step": 4081 }, { "epoch": 0.22752354941196143, "grad_norm": 0.6129924058914185, "learning_rate": 8.908165839596852e-05, "loss": 2.0539, "step": 4082 }, { "epoch": 0.22757928766512459, "grad_norm": 0.6019653677940369, "learning_rate": 8.907610386217568e-05, "loss": 2.1055, "step": 4083 }, { "epoch": 0.22763502591828771, "grad_norm": 0.5589843392372131, "learning_rate": 8.907054808911668e-05, "loss": 1.8536, "step": 4084 }, { "epoch": 0.22769076417145087, "grad_norm": 0.5030215382575989, "learning_rate": 8.906499107696766e-05, "loss": 1.5868, "step": 4085 }, { "epoch": 0.227746502424614, "grad_norm": 0.5388656258583069, "learning_rate": 8.90594328259049e-05, "loss": 1.611, "step": 4086 }, { "epoch": 0.22780224067777716, "grad_norm": 0.5835996270179749, "learning_rate": 8.905387333610466e-05, "loss": 1.3946, "step": 4087 }, { "epoch": 0.2278579789309403, "grad_norm": 0.5778213739395142, "learning_rate": 8.904831260774327e-05, "loss": 1.9145, "step": 4088 }, { "epoch": 0.22791371718410344, "grad_norm": 0.5685307383537292, "learning_rate": 8.904275064099708e-05, "loss": 1.8516, "step": 4089 }, { "epoch": 0.2279694554372666, "grad_norm": 0.5906243324279785, "learning_rate": 8.903718743604244e-05, "loss": 1.7872, "step": 4090 }, { "epoch": 0.22802519369042973, "grad_norm": 0.5142653584480286, "learning_rate": 8.903162299305585e-05, "loss": 1.5771, "step": 4091 }, { "epoch": 0.22808093194359289, "grad_norm": 0.5752720832824707, "learning_rate": 8.902605731221373e-05, "loss": 1.7952, "step": 4092 }, { "epoch": 0.22813667019675604, "grad_norm": 0.5666948556900024, "learning_rate": 8.902049039369261e-05, "loss": 1.7417, "step": 4093 }, { "epoch": 0.22819240844991917, "grad_norm": 0.5241186618804932, "learning_rate": 8.901492223766906e-05, "loss": 1.6605, "step": 4094 }, { "epoch": 0.22824814670308233, "grad_norm": 0.548561155796051, "learning_rate": 8.900935284431961e-05, "loss": 1.8027, "step": 4095 }, { "epoch": 0.22830388495624548, "grad_norm": 0.5435733795166016, "learning_rate": 8.900378221382097e-05, "loss": 1.6941, "step": 4096 }, { "epoch": 0.2283596232094086, "grad_norm": 0.5925113558769226, "learning_rate": 8.899821034634974e-05, "loss": 1.9182, "step": 4097 }, { "epoch": 0.22841536146257177, "grad_norm": 0.5289484262466431, "learning_rate": 8.899263724208266e-05, "loss": 1.7512, "step": 4098 }, { "epoch": 0.2284710997157349, "grad_norm": 0.5516422390937805, "learning_rate": 8.898706290119647e-05, "loss": 1.8606, "step": 4099 }, { "epoch": 0.22852683796889806, "grad_norm": 0.5578961372375488, "learning_rate": 8.898148732386795e-05, "loss": 1.7136, "step": 4100 }, { "epoch": 0.2285825762220612, "grad_norm": 0.5643925666809082, "learning_rate": 8.897591051027394e-05, "loss": 1.8315, "step": 4101 }, { "epoch": 0.22863831447522434, "grad_norm": 0.4974330961704254, "learning_rate": 8.89703324605913e-05, "loss": 1.4505, "step": 4102 }, { "epoch": 0.2286940527283875, "grad_norm": 0.5316607356071472, "learning_rate": 8.896475317499691e-05, "loss": 1.662, "step": 4103 }, { "epoch": 0.22874979098155063, "grad_norm": 0.48880115151405334, "learning_rate": 8.895917265366773e-05, "loss": 1.6713, "step": 4104 }, { "epoch": 0.22880552923471378, "grad_norm": 0.5647329092025757, "learning_rate": 8.895359089678075e-05, "loss": 1.6645, "step": 4105 }, { "epoch": 0.22886126748787694, "grad_norm": 0.588045060634613, "learning_rate": 8.894800790451298e-05, "loss": 1.7344, "step": 4106 }, { "epoch": 0.22891700574104007, "grad_norm": 0.5201917290687561, "learning_rate": 8.894242367704149e-05, "loss": 1.7137, "step": 4107 }, { "epoch": 0.22897274399420323, "grad_norm": 0.5581889152526855, "learning_rate": 8.893683821454335e-05, "loss": 1.689, "step": 4108 }, { "epoch": 0.22902848224736636, "grad_norm": 0.533208429813385, "learning_rate": 8.893125151719574e-05, "loss": 1.7345, "step": 4109 }, { "epoch": 0.2290842205005295, "grad_norm": 0.5409815907478333, "learning_rate": 8.89256635851758e-05, "loss": 1.6921, "step": 4110 }, { "epoch": 0.22913995875369267, "grad_norm": 0.5371890664100647, "learning_rate": 8.892007441866076e-05, "loss": 1.7282, "step": 4111 }, { "epoch": 0.2291956970068558, "grad_norm": 0.5628719925880432, "learning_rate": 8.89144840178279e-05, "loss": 1.6771, "step": 4112 }, { "epoch": 0.22925143526001895, "grad_norm": 0.5631751418113708, "learning_rate": 8.89088923828545e-05, "loss": 1.9474, "step": 4113 }, { "epoch": 0.22930717351318208, "grad_norm": 0.5464017987251282, "learning_rate": 8.890329951391787e-05, "loss": 1.7969, "step": 4114 }, { "epoch": 0.22936291176634524, "grad_norm": 0.5662708878517151, "learning_rate": 8.88977054111954e-05, "loss": 1.6611, "step": 4115 }, { "epoch": 0.2294186500195084, "grad_norm": 0.607832670211792, "learning_rate": 8.889211007486451e-05, "loss": 1.6558, "step": 4116 }, { "epoch": 0.22947438827267153, "grad_norm": 0.5683878064155579, "learning_rate": 8.888651350510265e-05, "loss": 1.712, "step": 4117 }, { "epoch": 0.22953012652583468, "grad_norm": 0.5762284398078918, "learning_rate": 8.888091570208729e-05, "loss": 1.8012, "step": 4118 }, { "epoch": 0.22958586477899784, "grad_norm": 0.5987650752067566, "learning_rate": 8.887531666599598e-05, "loss": 2.0303, "step": 4119 }, { "epoch": 0.22964160303216097, "grad_norm": 0.5141220092773438, "learning_rate": 8.88697163970063e-05, "loss": 1.6133, "step": 4120 }, { "epoch": 0.22969734128532412, "grad_norm": 0.5571396946907043, "learning_rate": 8.886411489529583e-05, "loss": 1.6117, "step": 4121 }, { "epoch": 0.22975307953848725, "grad_norm": 0.5717421770095825, "learning_rate": 8.885851216104222e-05, "loss": 1.8159, "step": 4122 }, { "epoch": 0.2298088177916504, "grad_norm": 0.5314472913742065, "learning_rate": 8.885290819442319e-05, "loss": 1.8198, "step": 4123 }, { "epoch": 0.22986455604481357, "grad_norm": 0.5760038495063782, "learning_rate": 8.884730299561642e-05, "loss": 1.8839, "step": 4124 }, { "epoch": 0.2299202942979767, "grad_norm": 0.5187524557113647, "learning_rate": 8.88416965647997e-05, "loss": 1.5981, "step": 4125 }, { "epoch": 0.22997603255113985, "grad_norm": 0.5539306998252869, "learning_rate": 8.883608890215083e-05, "loss": 1.5802, "step": 4126 }, { "epoch": 0.23003177080430298, "grad_norm": 0.5440337061882019, "learning_rate": 8.883048000784764e-05, "loss": 1.7884, "step": 4127 }, { "epoch": 0.23008750905746614, "grad_norm": 0.6190919876098633, "learning_rate": 8.882486988206803e-05, "loss": 1.8968, "step": 4128 }, { "epoch": 0.2301432473106293, "grad_norm": 0.5481730103492737, "learning_rate": 8.881925852498991e-05, "loss": 1.5026, "step": 4129 }, { "epoch": 0.23019898556379242, "grad_norm": 0.5920677185058594, "learning_rate": 8.881364593679124e-05, "loss": 2.02, "step": 4130 }, { "epoch": 0.23025472381695558, "grad_norm": 0.580629289150238, "learning_rate": 8.880803211765003e-05, "loss": 1.8447, "step": 4131 }, { "epoch": 0.2303104620701187, "grad_norm": 0.5800060033798218, "learning_rate": 8.880241706774431e-05, "loss": 1.8952, "step": 4132 }, { "epoch": 0.23036620032328187, "grad_norm": 0.5633650422096252, "learning_rate": 8.879680078725214e-05, "loss": 1.79, "step": 4133 }, { "epoch": 0.23042193857644502, "grad_norm": 0.503121554851532, "learning_rate": 8.879118327635165e-05, "loss": 1.31, "step": 4134 }, { "epoch": 0.23047767682960815, "grad_norm": 0.5033895373344421, "learning_rate": 8.8785564535221e-05, "loss": 1.388, "step": 4135 }, { "epoch": 0.2305334150827713, "grad_norm": 0.5460697412490845, "learning_rate": 8.877994456403838e-05, "loss": 1.8455, "step": 4136 }, { "epoch": 0.23058915333593444, "grad_norm": 0.5005971193313599, "learning_rate": 8.877432336298201e-05, "loss": 1.513, "step": 4137 }, { "epoch": 0.2306448915890976, "grad_norm": 0.5267760753631592, "learning_rate": 8.876870093223019e-05, "loss": 1.6449, "step": 4138 }, { "epoch": 0.23070062984226075, "grad_norm": 0.5714914202690125, "learning_rate": 8.87630772719612e-05, "loss": 2.0891, "step": 4139 }, { "epoch": 0.23075636809542388, "grad_norm": 0.5814961194992065, "learning_rate": 8.875745238235341e-05, "loss": 1.6314, "step": 4140 }, { "epoch": 0.23081210634858704, "grad_norm": 0.5237919092178345, "learning_rate": 8.87518262635852e-05, "loss": 1.5437, "step": 4141 }, { "epoch": 0.2308678446017502, "grad_norm": 0.5390162467956543, "learning_rate": 8.8746198915835e-05, "loss": 1.8075, "step": 4142 }, { "epoch": 0.23092358285491332, "grad_norm": 0.5281346440315247, "learning_rate": 8.874057033928128e-05, "loss": 1.7196, "step": 4143 }, { "epoch": 0.23097932110807648, "grad_norm": 0.5769410133361816, "learning_rate": 8.873494053410254e-05, "loss": 1.7623, "step": 4144 }, { "epoch": 0.2310350593612396, "grad_norm": 0.5773770213127136, "learning_rate": 8.872930950047733e-05, "loss": 1.6683, "step": 4145 }, { "epoch": 0.23109079761440277, "grad_norm": 0.5479909777641296, "learning_rate": 8.872367723858422e-05, "loss": 1.8277, "step": 4146 }, { "epoch": 0.23114653586756592, "grad_norm": 0.5558038949966431, "learning_rate": 8.871804374860185e-05, "loss": 1.9413, "step": 4147 }, { "epoch": 0.23120227412072905, "grad_norm": 0.5571532249450684, "learning_rate": 8.871240903070888e-05, "loss": 1.7471, "step": 4148 }, { "epoch": 0.2312580123738922, "grad_norm": 0.63371741771698, "learning_rate": 8.870677308508399e-05, "loss": 2.0195, "step": 4149 }, { "epoch": 0.23131375062705534, "grad_norm": 0.5300304889678955, "learning_rate": 8.870113591190595e-05, "loss": 1.5686, "step": 4150 }, { "epoch": 0.2313694888802185, "grad_norm": 0.6006084680557251, "learning_rate": 8.869549751135352e-05, "loss": 1.7178, "step": 4151 }, { "epoch": 0.23142522713338165, "grad_norm": 0.5930531024932861, "learning_rate": 8.868985788360551e-05, "loss": 1.6998, "step": 4152 }, { "epoch": 0.23148096538654478, "grad_norm": 0.5450523495674133, "learning_rate": 8.868421702884077e-05, "loss": 1.5045, "step": 4153 }, { "epoch": 0.23153670363970794, "grad_norm": 0.519468367099762, "learning_rate": 8.867857494723824e-05, "loss": 1.6035, "step": 4154 }, { "epoch": 0.23159244189287106, "grad_norm": 0.5567930936813354, "learning_rate": 8.867293163897681e-05, "loss": 1.8108, "step": 4155 }, { "epoch": 0.23164818014603422, "grad_norm": 0.5138580799102783, "learning_rate": 8.866728710423547e-05, "loss": 1.5952, "step": 4156 }, { "epoch": 0.23170391839919738, "grad_norm": 0.5398350954055786, "learning_rate": 8.866164134319323e-05, "loss": 1.8621, "step": 4157 }, { "epoch": 0.2317596566523605, "grad_norm": 0.5708958506584167, "learning_rate": 8.865599435602915e-05, "loss": 1.5408, "step": 4158 }, { "epoch": 0.23181539490552366, "grad_norm": 0.62980717420578, "learning_rate": 8.86503461429223e-05, "loss": 2.2779, "step": 4159 }, { "epoch": 0.2318711331586868, "grad_norm": 0.5782346129417419, "learning_rate": 8.86446967040518e-05, "loss": 1.6574, "step": 4160 }, { "epoch": 0.23192687141184995, "grad_norm": 0.5406448841094971, "learning_rate": 8.863904603959686e-05, "loss": 1.6591, "step": 4161 }, { "epoch": 0.2319826096650131, "grad_norm": 0.533285915851593, "learning_rate": 8.863339414973664e-05, "loss": 1.7869, "step": 4162 }, { "epoch": 0.23203834791817624, "grad_norm": 0.5359031558036804, "learning_rate": 8.862774103465042e-05, "loss": 1.8322, "step": 4163 }, { "epoch": 0.2320940861713394, "grad_norm": 0.5305787920951843, "learning_rate": 8.862208669451748e-05, "loss": 1.5869, "step": 4164 }, { "epoch": 0.23214982442450255, "grad_norm": 0.5482218861579895, "learning_rate": 8.861643112951712e-05, "loss": 1.9482, "step": 4165 }, { "epoch": 0.23220556267766568, "grad_norm": 0.5915202498435974, "learning_rate": 8.86107743398287e-05, "loss": 1.9292, "step": 4166 }, { "epoch": 0.23226130093082883, "grad_norm": 0.5175179243087769, "learning_rate": 8.860511632563166e-05, "loss": 1.5677, "step": 4167 }, { "epoch": 0.23231703918399196, "grad_norm": 0.5698404908180237, "learning_rate": 8.85994570871054e-05, "loss": 1.8537, "step": 4168 }, { "epoch": 0.23237277743715512, "grad_norm": 0.5476871728897095, "learning_rate": 8.859379662442941e-05, "loss": 1.7031, "step": 4169 }, { "epoch": 0.23242851569031828, "grad_norm": 0.5611745119094849, "learning_rate": 8.858813493778322e-05, "loss": 1.9365, "step": 4170 }, { "epoch": 0.2324842539434814, "grad_norm": 0.5908852219581604, "learning_rate": 8.858247202734637e-05, "loss": 1.7084, "step": 4171 }, { "epoch": 0.23253999219664456, "grad_norm": 0.5042490363121033, "learning_rate": 8.857680789329844e-05, "loss": 1.6353, "step": 4172 }, { "epoch": 0.2325957304498077, "grad_norm": 0.535675048828125, "learning_rate": 8.85711425358191e-05, "loss": 1.523, "step": 4173 }, { "epoch": 0.23265146870297085, "grad_norm": 0.5372074246406555, "learning_rate": 8.8565475955088e-05, "loss": 1.38, "step": 4174 }, { "epoch": 0.232707206956134, "grad_norm": 0.554507315158844, "learning_rate": 8.855980815128486e-05, "loss": 1.5261, "step": 4175 }, { "epoch": 0.23276294520929713, "grad_norm": 0.5450062155723572, "learning_rate": 8.85541391245894e-05, "loss": 1.7725, "step": 4176 }, { "epoch": 0.2328186834624603, "grad_norm": 0.5121927857398987, "learning_rate": 8.854846887518147e-05, "loss": 1.4857, "step": 4177 }, { "epoch": 0.23287442171562342, "grad_norm": 0.5284276008605957, "learning_rate": 8.854279740324086e-05, "loss": 1.8393, "step": 4178 }, { "epoch": 0.23293015996878658, "grad_norm": 0.5464218258857727, "learning_rate": 8.85371247089474e-05, "loss": 1.6455, "step": 4179 }, { "epoch": 0.23298589822194973, "grad_norm": 0.515756368637085, "learning_rate": 8.853145079248106e-05, "loss": 1.6739, "step": 4180 }, { "epoch": 0.23304163647511286, "grad_norm": 0.5167007446289062, "learning_rate": 8.852577565402175e-05, "loss": 1.6312, "step": 4181 }, { "epoch": 0.23309737472827602, "grad_norm": 0.5863040089607239, "learning_rate": 8.852009929374945e-05, "loss": 1.8519, "step": 4182 }, { "epoch": 0.23315311298143915, "grad_norm": 0.5061371922492981, "learning_rate": 8.851442171184418e-05, "loss": 1.6562, "step": 4183 }, { "epoch": 0.2332088512346023, "grad_norm": 0.5501469969749451, "learning_rate": 8.850874290848603e-05, "loss": 1.7597, "step": 4184 }, { "epoch": 0.23326458948776546, "grad_norm": 0.5034657716751099, "learning_rate": 8.850306288385505e-05, "loss": 1.7217, "step": 4185 }, { "epoch": 0.2333203277409286, "grad_norm": 0.5563570857048035, "learning_rate": 8.849738163813143e-05, "loss": 1.7315, "step": 4186 }, { "epoch": 0.23337606599409175, "grad_norm": 0.5461295247077942, "learning_rate": 8.849169917149531e-05, "loss": 1.7419, "step": 4187 }, { "epoch": 0.2334318042472549, "grad_norm": 0.5286983251571655, "learning_rate": 8.848601548412691e-05, "loss": 1.8088, "step": 4188 }, { "epoch": 0.23348754250041803, "grad_norm": 0.5308994650840759, "learning_rate": 8.848033057620651e-05, "loss": 1.6436, "step": 4189 }, { "epoch": 0.2335432807535812, "grad_norm": 0.5667473673820496, "learning_rate": 8.847464444791435e-05, "loss": 1.6382, "step": 4190 }, { "epoch": 0.23359901900674432, "grad_norm": 0.5432576537132263, "learning_rate": 8.846895709943082e-05, "loss": 1.8993, "step": 4191 }, { "epoch": 0.23365475725990748, "grad_norm": 0.6006546020507812, "learning_rate": 8.846326853093623e-05, "loss": 1.7459, "step": 4192 }, { "epoch": 0.23371049551307063, "grad_norm": 0.5638506412506104, "learning_rate": 8.845757874261104e-05, "loss": 1.618, "step": 4193 }, { "epoch": 0.23376623376623376, "grad_norm": 0.5464212894439697, "learning_rate": 8.845188773463566e-05, "loss": 1.6731, "step": 4194 }, { "epoch": 0.23382197201939692, "grad_norm": 0.5781604051589966, "learning_rate": 8.84461955071906e-05, "loss": 1.8368, "step": 4195 }, { "epoch": 0.23387771027256005, "grad_norm": 0.5308955907821655, "learning_rate": 8.844050206045637e-05, "loss": 1.805, "step": 4196 }, { "epoch": 0.2339334485257232, "grad_norm": 0.5154343843460083, "learning_rate": 8.843480739461356e-05, "loss": 1.4806, "step": 4197 }, { "epoch": 0.23398918677888636, "grad_norm": 0.5477091073989868, "learning_rate": 8.842911150984272e-05, "loss": 1.7506, "step": 4198 }, { "epoch": 0.2340449250320495, "grad_norm": 0.5401119589805603, "learning_rate": 8.842341440632454e-05, "loss": 1.8434, "step": 4199 }, { "epoch": 0.23410066328521265, "grad_norm": 0.5683028697967529, "learning_rate": 8.841771608423967e-05, "loss": 1.6289, "step": 4200 }, { "epoch": 0.23415640153837577, "grad_norm": 0.5980592370033264, "learning_rate": 8.841201654376883e-05, "loss": 1.782, "step": 4201 }, { "epoch": 0.23421213979153893, "grad_norm": 0.5431941151618958, "learning_rate": 8.84063157850928e-05, "loss": 1.7904, "step": 4202 }, { "epoch": 0.2342678780447021, "grad_norm": 0.6389545202255249, "learning_rate": 8.840061380839235e-05, "loss": 1.5506, "step": 4203 }, { "epoch": 0.23432361629786522, "grad_norm": 0.5594901442527771, "learning_rate": 8.839491061384832e-05, "loss": 1.7914, "step": 4204 }, { "epoch": 0.23437935455102837, "grad_norm": 0.5211427211761475, "learning_rate": 8.838920620164157e-05, "loss": 1.5682, "step": 4205 }, { "epoch": 0.23443509280419153, "grad_norm": 0.5244554281234741, "learning_rate": 8.838350057195304e-05, "loss": 1.6598, "step": 4206 }, { "epoch": 0.23449083105735466, "grad_norm": 0.5590394735336304, "learning_rate": 8.837779372496367e-05, "loss": 1.6682, "step": 4207 }, { "epoch": 0.23454656931051782, "grad_norm": 0.5445299744606018, "learning_rate": 8.837208566085441e-05, "loss": 1.8047, "step": 4208 }, { "epoch": 0.23460230756368095, "grad_norm": 0.5209025144577026, "learning_rate": 8.836637637980636e-05, "loss": 1.6225, "step": 4209 }, { "epoch": 0.2346580458168441, "grad_norm": 0.5524556040763855, "learning_rate": 8.836066588200051e-05, "loss": 1.7139, "step": 4210 }, { "epoch": 0.23471378407000726, "grad_norm": 0.5641475915908813, "learning_rate": 8.8354954167618e-05, "loss": 1.7928, "step": 4211 }, { "epoch": 0.2347695223231704, "grad_norm": 0.57920241355896, "learning_rate": 8.834924123683998e-05, "loss": 1.7035, "step": 4212 }, { "epoch": 0.23482526057633354, "grad_norm": 0.5374131202697754, "learning_rate": 8.834352708984762e-05, "loss": 1.6887, "step": 4213 }, { "epoch": 0.23488099882949667, "grad_norm": 0.5739797353744507, "learning_rate": 8.833781172682214e-05, "loss": 1.7476, "step": 4214 }, { "epoch": 0.23493673708265983, "grad_norm": 0.5460266470909119, "learning_rate": 8.833209514794479e-05, "loss": 1.569, "step": 4215 }, { "epoch": 0.234992475335823, "grad_norm": 0.5776944160461426, "learning_rate": 8.832637735339688e-05, "loss": 1.6762, "step": 4216 }, { "epoch": 0.23504821358898612, "grad_norm": 0.593519926071167, "learning_rate": 8.832065834335973e-05, "loss": 1.6699, "step": 4217 }, { "epoch": 0.23510395184214927, "grad_norm": 0.5690516233444214, "learning_rate": 8.831493811801472e-05, "loss": 1.8292, "step": 4218 }, { "epoch": 0.2351596900953124, "grad_norm": 0.5436887741088867, "learning_rate": 8.830921667754328e-05, "loss": 1.6958, "step": 4219 }, { "epoch": 0.23521542834847556, "grad_norm": 0.54433673620224, "learning_rate": 8.830349402212683e-05, "loss": 1.7544, "step": 4220 }, { "epoch": 0.23527116660163871, "grad_norm": 0.5694179534912109, "learning_rate": 8.82977701519469e-05, "loss": 1.676, "step": 4221 }, { "epoch": 0.23532690485480184, "grad_norm": 0.5544805526733398, "learning_rate": 8.829204506718496e-05, "loss": 1.7395, "step": 4222 }, { "epoch": 0.235382643107965, "grad_norm": 0.586121141910553, "learning_rate": 8.828631876802263e-05, "loss": 1.8418, "step": 4223 }, { "epoch": 0.23543838136112813, "grad_norm": 0.5376494526863098, "learning_rate": 8.828059125464148e-05, "loss": 1.5981, "step": 4224 }, { "epoch": 0.2354941196142913, "grad_norm": 0.5764834880828857, "learning_rate": 8.827486252722316e-05, "loss": 1.9862, "step": 4225 }, { "epoch": 0.23554985786745444, "grad_norm": 0.6348791122436523, "learning_rate": 8.826913258594937e-05, "loss": 1.9931, "step": 4226 }, { "epoch": 0.23560559612061757, "grad_norm": 0.5736886262893677, "learning_rate": 8.826340143100182e-05, "loss": 1.8651, "step": 4227 }, { "epoch": 0.23566133437378073, "grad_norm": 0.5940203070640564, "learning_rate": 8.825766906256228e-05, "loss": 1.6837, "step": 4228 }, { "epoch": 0.23571707262694389, "grad_norm": 0.5036525726318359, "learning_rate": 8.825193548081252e-05, "loss": 1.4064, "step": 4229 }, { "epoch": 0.23577281088010701, "grad_norm": 0.5096335411071777, "learning_rate": 8.824620068593439e-05, "loss": 1.7501, "step": 4230 }, { "epoch": 0.23582854913327017, "grad_norm": 0.5474448204040527, "learning_rate": 8.824046467810976e-05, "loss": 1.7263, "step": 4231 }, { "epoch": 0.2358842873864333, "grad_norm": 0.5364823937416077, "learning_rate": 8.823472745752055e-05, "loss": 1.7752, "step": 4232 }, { "epoch": 0.23594002563959646, "grad_norm": 0.5261183977127075, "learning_rate": 8.822898902434873e-05, "loss": 1.7809, "step": 4233 }, { "epoch": 0.2359957638927596, "grad_norm": 0.5040357708930969, "learning_rate": 8.822324937877624e-05, "loss": 1.5033, "step": 4234 }, { "epoch": 0.23605150214592274, "grad_norm": 0.534517228603363, "learning_rate": 8.821750852098515e-05, "loss": 1.735, "step": 4235 }, { "epoch": 0.2361072403990859, "grad_norm": 0.5336146950721741, "learning_rate": 8.821176645115752e-05, "loss": 1.8211, "step": 4236 }, { "epoch": 0.23616297865224903, "grad_norm": 0.5576988458633423, "learning_rate": 8.820602316947544e-05, "loss": 1.6501, "step": 4237 }, { "epoch": 0.23621871690541218, "grad_norm": 0.6140468716621399, "learning_rate": 8.820027867612107e-05, "loss": 1.9297, "step": 4238 }, { "epoch": 0.23627445515857534, "grad_norm": 0.6102777123451233, "learning_rate": 8.819453297127657e-05, "loss": 1.7881, "step": 4239 }, { "epoch": 0.23633019341173847, "grad_norm": 0.5396928787231445, "learning_rate": 8.818878605512418e-05, "loss": 1.7629, "step": 4240 }, { "epoch": 0.23638593166490163, "grad_norm": 0.5476622581481934, "learning_rate": 8.818303792784615e-05, "loss": 1.939, "step": 4241 }, { "epoch": 0.23644166991806476, "grad_norm": 0.5725302696228027, "learning_rate": 8.817728858962478e-05, "loss": 1.7058, "step": 4242 }, { "epoch": 0.2364974081712279, "grad_norm": 0.5522921085357666, "learning_rate": 8.817153804064241e-05, "loss": 1.6284, "step": 4243 }, { "epoch": 0.23655314642439107, "grad_norm": 0.5554071664810181, "learning_rate": 8.81657862810814e-05, "loss": 1.7203, "step": 4244 }, { "epoch": 0.2366088846775542, "grad_norm": 0.6202051639556885, "learning_rate": 8.816003331112419e-05, "loss": 2.0629, "step": 4245 }, { "epoch": 0.23666462293071736, "grad_norm": 0.5647374391555786, "learning_rate": 8.81542791309532e-05, "loss": 1.7256, "step": 4246 }, { "epoch": 0.23672036118388048, "grad_norm": 0.5261071920394897, "learning_rate": 8.814852374075093e-05, "loss": 1.6476, "step": 4247 }, { "epoch": 0.23677609943704364, "grad_norm": 0.5051866173744202, "learning_rate": 8.81427671406999e-05, "loss": 1.57, "step": 4248 }, { "epoch": 0.2368318376902068, "grad_norm": 0.5553388595581055, "learning_rate": 8.81370093309827e-05, "loss": 1.497, "step": 4249 }, { "epoch": 0.23688757594336993, "grad_norm": 0.6159742474555969, "learning_rate": 8.813125031178191e-05, "loss": 1.9324, "step": 4250 }, { "epoch": 0.23694331419653308, "grad_norm": 0.5158507227897644, "learning_rate": 8.812549008328017e-05, "loss": 1.7841, "step": 4251 }, { "epoch": 0.23699905244969624, "grad_norm": 0.5447210073471069, "learning_rate": 8.811972864566018e-05, "loss": 1.6966, "step": 4252 }, { "epoch": 0.23705479070285937, "grad_norm": 0.5115744471549988, "learning_rate": 8.811396599910467e-05, "loss": 1.6449, "step": 4253 }, { "epoch": 0.23711052895602253, "grad_norm": 0.5265628695487976, "learning_rate": 8.810820214379636e-05, "loss": 1.8372, "step": 4254 }, { "epoch": 0.23716626720918565, "grad_norm": 0.5546838045120239, "learning_rate": 8.810243707991805e-05, "loss": 1.9996, "step": 4255 }, { "epoch": 0.2372220054623488, "grad_norm": 0.5540011525154114, "learning_rate": 8.809667080765262e-05, "loss": 1.7619, "step": 4256 }, { "epoch": 0.23727774371551197, "grad_norm": 0.5753396153450012, "learning_rate": 8.809090332718288e-05, "loss": 1.8621, "step": 4257 }, { "epoch": 0.2373334819686751, "grad_norm": 0.5528965592384338, "learning_rate": 8.808513463869179e-05, "loss": 1.6625, "step": 4258 }, { "epoch": 0.23738922022183825, "grad_norm": 0.5542230010032654, "learning_rate": 8.80793647423623e-05, "loss": 1.5929, "step": 4259 }, { "epoch": 0.23744495847500138, "grad_norm": 0.6071727275848389, "learning_rate": 8.807359363837734e-05, "loss": 1.7551, "step": 4260 }, { "epoch": 0.23750069672816454, "grad_norm": 0.5722533464431763, "learning_rate": 8.806782132691999e-05, "loss": 1.9474, "step": 4261 }, { "epoch": 0.2375564349813277, "grad_norm": 0.5362473130226135, "learning_rate": 8.806204780817331e-05, "loss": 1.6914, "step": 4262 }, { "epoch": 0.23761217323449083, "grad_norm": 0.519892156124115, "learning_rate": 8.805627308232036e-05, "loss": 1.4148, "step": 4263 }, { "epoch": 0.23766791148765398, "grad_norm": 0.5315799713134766, "learning_rate": 8.805049714954434e-05, "loss": 1.8304, "step": 4264 }, { "epoch": 0.2377236497408171, "grad_norm": 0.5093747973442078, "learning_rate": 8.804472001002839e-05, "loss": 1.4575, "step": 4265 }, { "epoch": 0.23777938799398027, "grad_norm": 0.5335510969161987, "learning_rate": 8.803894166395574e-05, "loss": 1.515, "step": 4266 }, { "epoch": 0.23783512624714342, "grad_norm": 0.5546256303787231, "learning_rate": 8.803316211150964e-05, "loss": 1.657, "step": 4267 }, { "epoch": 0.23789086450030655, "grad_norm": 0.5256768465042114, "learning_rate": 8.802738135287338e-05, "loss": 1.5228, "step": 4268 }, { "epoch": 0.2379466027534697, "grad_norm": 0.5291659235954285, "learning_rate": 8.802159938823031e-05, "loss": 1.5667, "step": 4269 }, { "epoch": 0.23800234100663284, "grad_norm": 0.5859813094139099, "learning_rate": 8.801581621776379e-05, "loss": 1.9385, "step": 4270 }, { "epoch": 0.238058079259796, "grad_norm": 0.6084904670715332, "learning_rate": 8.801003184165722e-05, "loss": 1.9139, "step": 4271 }, { "epoch": 0.23811381751295915, "grad_norm": 0.5245258212089539, "learning_rate": 8.800424626009407e-05, "loss": 1.8107, "step": 4272 }, { "epoch": 0.23816955576612228, "grad_norm": 0.5182399749755859, "learning_rate": 8.799845947325777e-05, "loss": 1.72, "step": 4273 }, { "epoch": 0.23822529401928544, "grad_norm": 0.5252156257629395, "learning_rate": 8.799267148133192e-05, "loss": 1.6711, "step": 4274 }, { "epoch": 0.2382810322724486, "grad_norm": 0.49757280945777893, "learning_rate": 8.798688228450002e-05, "loss": 1.5716, "step": 4275 }, { "epoch": 0.23833677052561172, "grad_norm": 0.5291200876235962, "learning_rate": 8.798109188294572e-05, "loss": 1.6498, "step": 4276 }, { "epoch": 0.23839250877877488, "grad_norm": 0.5830451250076294, "learning_rate": 8.797530027685261e-05, "loss": 1.8761, "step": 4277 }, { "epoch": 0.238448247031938, "grad_norm": 0.5453559756278992, "learning_rate": 8.796950746640439e-05, "loss": 1.6984, "step": 4278 }, { "epoch": 0.23850398528510117, "grad_norm": 0.5068353414535522, "learning_rate": 8.796371345178476e-05, "loss": 1.3414, "step": 4279 }, { "epoch": 0.23855972353826432, "grad_norm": 0.5567828416824341, "learning_rate": 8.79579182331775e-05, "loss": 1.716, "step": 4280 }, { "epoch": 0.23861546179142745, "grad_norm": 0.5418634414672852, "learning_rate": 8.795212181076638e-05, "loss": 1.6889, "step": 4281 }, { "epoch": 0.2386712000445906, "grad_norm": 0.5291851162910461, "learning_rate": 8.794632418473522e-05, "loss": 1.6941, "step": 4282 }, { "epoch": 0.23872693829775374, "grad_norm": 0.5776856541633606, "learning_rate": 8.794052535526792e-05, "loss": 1.756, "step": 4283 }, { "epoch": 0.2387826765509169, "grad_norm": 0.5982547998428345, "learning_rate": 8.793472532254836e-05, "loss": 1.8349, "step": 4284 }, { "epoch": 0.23883841480408005, "grad_norm": 0.5404837727546692, "learning_rate": 8.792892408676048e-05, "loss": 1.6617, "step": 4285 }, { "epoch": 0.23889415305724318, "grad_norm": 0.5049643516540527, "learning_rate": 8.792312164808827e-05, "loss": 1.5132, "step": 4286 }, { "epoch": 0.23894989131040634, "grad_norm": 0.5474380254745483, "learning_rate": 8.791731800671575e-05, "loss": 1.7937, "step": 4287 }, { "epoch": 0.23900562956356947, "grad_norm": 0.5853757858276367, "learning_rate": 8.791151316282698e-05, "loss": 1.8488, "step": 4288 }, { "epoch": 0.23906136781673262, "grad_norm": 0.574220597743988, "learning_rate": 8.790570711660604e-05, "loss": 1.7211, "step": 4289 }, { "epoch": 0.23911710606989578, "grad_norm": 0.580944836139679, "learning_rate": 8.789989986823707e-05, "loss": 1.6015, "step": 4290 }, { "epoch": 0.2391728443230589, "grad_norm": 0.5716251730918884, "learning_rate": 8.789409141790426e-05, "loss": 1.7375, "step": 4291 }, { "epoch": 0.23922858257622207, "grad_norm": 0.5204554200172424, "learning_rate": 8.788828176579182e-05, "loss": 1.7231, "step": 4292 }, { "epoch": 0.2392843208293852, "grad_norm": 0.529961884021759, "learning_rate": 8.788247091208397e-05, "loss": 1.7355, "step": 4293 }, { "epoch": 0.23934005908254835, "grad_norm": 0.5950244665145874, "learning_rate": 8.787665885696502e-05, "loss": 2.0786, "step": 4294 }, { "epoch": 0.2393957973357115, "grad_norm": 0.5200558304786682, "learning_rate": 8.78708456006193e-05, "loss": 1.6045, "step": 4295 }, { "epoch": 0.23945153558887464, "grad_norm": 0.5256621241569519, "learning_rate": 8.786503114323113e-05, "loss": 1.6679, "step": 4296 }, { "epoch": 0.2395072738420378, "grad_norm": 0.5340785980224609, "learning_rate": 8.785921548498494e-05, "loss": 1.6646, "step": 4297 }, { "epoch": 0.23956301209520095, "grad_norm": 0.5381552577018738, "learning_rate": 8.785339862606521e-05, "loss": 1.7888, "step": 4298 }, { "epoch": 0.23961875034836408, "grad_norm": 0.6692368984222412, "learning_rate": 8.784758056665634e-05, "loss": 1.9363, "step": 4299 }, { "epoch": 0.23967448860152724, "grad_norm": 0.5429602265357971, "learning_rate": 8.784176130694289e-05, "loss": 1.8477, "step": 4300 }, { "epoch": 0.23973022685469036, "grad_norm": 0.5760909914970398, "learning_rate": 8.783594084710941e-05, "loss": 1.9106, "step": 4301 }, { "epoch": 0.23978596510785352, "grad_norm": 0.5410770773887634, "learning_rate": 8.783011918734048e-05, "loss": 1.7685, "step": 4302 }, { "epoch": 0.23984170336101668, "grad_norm": 0.6343144774436951, "learning_rate": 8.782429632782073e-05, "loss": 1.6641, "step": 4303 }, { "epoch": 0.2398974416141798, "grad_norm": 0.5951781868934631, "learning_rate": 8.781847226873484e-05, "loss": 1.8908, "step": 4304 }, { "epoch": 0.23995317986734296, "grad_norm": 0.5187268257141113, "learning_rate": 8.78126470102675e-05, "loss": 1.5571, "step": 4305 }, { "epoch": 0.2400089181205061, "grad_norm": 0.5376867651939392, "learning_rate": 8.780682055260348e-05, "loss": 1.514, "step": 4306 }, { "epoch": 0.24006465637366925, "grad_norm": 0.5534177422523499, "learning_rate": 8.780099289592751e-05, "loss": 1.581, "step": 4307 }, { "epoch": 0.2401203946268324, "grad_norm": 0.5672261714935303, "learning_rate": 8.779516404042446e-05, "loss": 1.7344, "step": 4308 }, { "epoch": 0.24017613287999554, "grad_norm": 0.5509449243545532, "learning_rate": 8.778933398627915e-05, "loss": 1.7162, "step": 4309 }, { "epoch": 0.2402318711331587, "grad_norm": 0.5842772126197815, "learning_rate": 8.778350273367653e-05, "loss": 1.7958, "step": 4310 }, { "epoch": 0.24028760938632182, "grad_norm": 0.50345379114151, "learning_rate": 8.777767028280145e-05, "loss": 1.4958, "step": 4311 }, { "epoch": 0.24034334763948498, "grad_norm": 0.5337620377540588, "learning_rate": 8.777183663383896e-05, "loss": 1.6876, "step": 4312 }, { "epoch": 0.24039908589264813, "grad_norm": 0.5183177590370178, "learning_rate": 8.776600178697402e-05, "loss": 1.7538, "step": 4313 }, { "epoch": 0.24045482414581126, "grad_norm": 0.5510264039039612, "learning_rate": 8.776016574239171e-05, "loss": 1.7722, "step": 4314 }, { "epoch": 0.24051056239897442, "grad_norm": 0.5638562440872192, "learning_rate": 8.77543285002771e-05, "loss": 1.8447, "step": 4315 }, { "epoch": 0.24056630065213755, "grad_norm": 0.6304780840873718, "learning_rate": 8.774849006081529e-05, "loss": 2.111, "step": 4316 }, { "epoch": 0.2406220389053007, "grad_norm": 0.5731822848320007, "learning_rate": 8.774265042419148e-05, "loss": 1.9022, "step": 4317 }, { "epoch": 0.24067777715846386, "grad_norm": 0.5105111002922058, "learning_rate": 8.773680959059086e-05, "loss": 1.4723, "step": 4318 }, { "epoch": 0.240733515411627, "grad_norm": 0.5694832801818848, "learning_rate": 8.773096756019866e-05, "loss": 1.8138, "step": 4319 }, { "epoch": 0.24078925366479015, "grad_norm": 0.5039976835250854, "learning_rate": 8.772512433320014e-05, "loss": 1.5152, "step": 4320 }, { "epoch": 0.2408449919179533, "grad_norm": 0.5481953024864197, "learning_rate": 8.771927990978063e-05, "loss": 1.7373, "step": 4321 }, { "epoch": 0.24090073017111643, "grad_norm": 0.5046210885047913, "learning_rate": 8.771343429012549e-05, "loss": 1.3736, "step": 4322 }, { "epoch": 0.2409564684242796, "grad_norm": 0.5144927501678467, "learning_rate": 8.77075874744201e-05, "loss": 1.7854, "step": 4323 }, { "epoch": 0.24101220667744272, "grad_norm": 0.5863038301467896, "learning_rate": 8.770173946284987e-05, "loss": 1.9596, "step": 4324 }, { "epoch": 0.24106794493060588, "grad_norm": 0.5546390414237976, "learning_rate": 8.76958902556003e-05, "loss": 1.6905, "step": 4325 }, { "epoch": 0.24112368318376903, "grad_norm": 0.5615156888961792, "learning_rate": 8.769003985285686e-05, "loss": 1.8015, "step": 4326 }, { "epoch": 0.24117942143693216, "grad_norm": 0.5112027525901794, "learning_rate": 8.76841882548051e-05, "loss": 1.7408, "step": 4327 }, { "epoch": 0.24123515969009532, "grad_norm": 0.523891270160675, "learning_rate": 8.767833546163062e-05, "loss": 1.6473, "step": 4328 }, { "epoch": 0.24129089794325845, "grad_norm": 0.5263711214065552, "learning_rate": 8.767248147351902e-05, "loss": 1.724, "step": 4329 }, { "epoch": 0.2413466361964216, "grad_norm": 0.5724520683288574, "learning_rate": 8.766662629065594e-05, "loss": 1.7469, "step": 4330 }, { "epoch": 0.24140237444958476, "grad_norm": 0.5471790432929993, "learning_rate": 8.76607699132271e-05, "loss": 1.7262, "step": 4331 }, { "epoch": 0.2414581127027479, "grad_norm": 0.6246349215507507, "learning_rate": 8.76549123414182e-05, "loss": 2.0055, "step": 4332 }, { "epoch": 0.24151385095591105, "grad_norm": 0.5492396354675293, "learning_rate": 8.764905357541505e-05, "loss": 1.7602, "step": 4333 }, { "epoch": 0.24156958920907418, "grad_norm": 0.5340796113014221, "learning_rate": 8.76431936154034e-05, "loss": 1.7666, "step": 4334 }, { "epoch": 0.24162532746223733, "grad_norm": 0.5311720967292786, "learning_rate": 8.763733246156913e-05, "loss": 1.5892, "step": 4335 }, { "epoch": 0.2416810657154005, "grad_norm": 0.5926803350448608, "learning_rate": 8.763147011409811e-05, "loss": 1.9398, "step": 4336 }, { "epoch": 0.24173680396856362, "grad_norm": 0.5204175710678101, "learning_rate": 8.762560657317629e-05, "loss": 1.4019, "step": 4337 }, { "epoch": 0.24179254222172678, "grad_norm": 0.5834428071975708, "learning_rate": 8.761974183898957e-05, "loss": 1.7063, "step": 4338 }, { "epoch": 0.2418482804748899, "grad_norm": 0.5776971578598022, "learning_rate": 8.7613875911724e-05, "loss": 1.7957, "step": 4339 }, { "epoch": 0.24190401872805306, "grad_norm": 0.5160627365112305, "learning_rate": 8.760800879156558e-05, "loss": 1.5686, "step": 4340 }, { "epoch": 0.24195975698121622, "grad_norm": 0.5783469676971436, "learning_rate": 8.760214047870039e-05, "loss": 2.0046, "step": 4341 }, { "epoch": 0.24201549523437935, "grad_norm": 0.5625891089439392, "learning_rate": 8.759627097331455e-05, "loss": 1.6902, "step": 4342 }, { "epoch": 0.2420712334875425, "grad_norm": 0.5326409935951233, "learning_rate": 8.759040027559418e-05, "loss": 1.9046, "step": 4343 }, { "epoch": 0.24212697174070566, "grad_norm": 0.5869771838188171, "learning_rate": 8.758452838572551e-05, "loss": 1.7593, "step": 4344 }, { "epoch": 0.2421827099938688, "grad_norm": 0.6008633971214294, "learning_rate": 8.75786553038947e-05, "loss": 2.0021, "step": 4345 }, { "epoch": 0.24223844824703195, "grad_norm": 0.48187822103500366, "learning_rate": 8.757278103028806e-05, "loss": 1.1718, "step": 4346 }, { "epoch": 0.24229418650019507, "grad_norm": 0.5490634441375732, "learning_rate": 8.756690556509186e-05, "loss": 1.6083, "step": 4347 }, { "epoch": 0.24234992475335823, "grad_norm": 0.5408362746238708, "learning_rate": 8.756102890849246e-05, "loss": 1.6982, "step": 4348 }, { "epoch": 0.2424056630065214, "grad_norm": 0.5706157684326172, "learning_rate": 8.75551510606762e-05, "loss": 1.8505, "step": 4349 }, { "epoch": 0.24246140125968452, "grad_norm": 0.573557436466217, "learning_rate": 8.754927202182953e-05, "loss": 1.8455, "step": 4350 }, { "epoch": 0.24251713951284767, "grad_norm": 0.5338667035102844, "learning_rate": 8.754339179213886e-05, "loss": 1.5964, "step": 4351 }, { "epoch": 0.2425728777660108, "grad_norm": 0.5258156061172485, "learning_rate": 8.753751037179073e-05, "loss": 1.7428, "step": 4352 }, { "epoch": 0.24262861601917396, "grad_norm": 0.5279545783996582, "learning_rate": 8.75316277609716e-05, "loss": 1.7279, "step": 4353 }, { "epoch": 0.24268435427233712, "grad_norm": 0.5074349045753479, "learning_rate": 8.752574395986806e-05, "loss": 1.508, "step": 4354 }, { "epoch": 0.24274009252550025, "grad_norm": 0.5738914012908936, "learning_rate": 8.751985896866672e-05, "loss": 1.8978, "step": 4355 }, { "epoch": 0.2427958307786634, "grad_norm": 0.6244510412216187, "learning_rate": 8.75139727875542e-05, "loss": 1.94, "step": 4356 }, { "epoch": 0.24285156903182653, "grad_norm": 0.5642906427383423, "learning_rate": 8.75080854167172e-05, "loss": 1.9239, "step": 4357 }, { "epoch": 0.2429073072849897, "grad_norm": 0.5614916086196899, "learning_rate": 8.75021968563424e-05, "loss": 1.6965, "step": 4358 }, { "epoch": 0.24296304553815284, "grad_norm": 0.5800240635871887, "learning_rate": 8.749630710661658e-05, "loss": 1.7979, "step": 4359 }, { "epoch": 0.24301878379131597, "grad_norm": 0.5255259871482849, "learning_rate": 8.749041616772653e-05, "loss": 1.753, "step": 4360 }, { "epoch": 0.24307452204447913, "grad_norm": 0.5205305814743042, "learning_rate": 8.748452403985905e-05, "loss": 1.518, "step": 4361 }, { "epoch": 0.24313026029764226, "grad_norm": 0.5705804824829102, "learning_rate": 8.747863072320102e-05, "loss": 1.7267, "step": 4362 }, { "epoch": 0.24318599855080542, "grad_norm": 0.5209723114967346, "learning_rate": 8.747273621793932e-05, "loss": 1.6697, "step": 4363 }, { "epoch": 0.24324173680396857, "grad_norm": 0.5164801478385925, "learning_rate": 8.746684052426093e-05, "loss": 1.628, "step": 4364 }, { "epoch": 0.2432974750571317, "grad_norm": 0.6018537282943726, "learning_rate": 8.74609436423528e-05, "loss": 1.8611, "step": 4365 }, { "epoch": 0.24335321331029486, "grad_norm": 0.5693862438201904, "learning_rate": 8.745504557240195e-05, "loss": 1.8587, "step": 4366 }, { "epoch": 0.24340895156345801, "grad_norm": 0.5834870338439941, "learning_rate": 8.744914631459544e-05, "loss": 1.82, "step": 4367 }, { "epoch": 0.24346468981662114, "grad_norm": 0.5055362582206726, "learning_rate": 8.744324586912033e-05, "loss": 1.5662, "step": 4368 }, { "epoch": 0.2435204280697843, "grad_norm": 0.5283217430114746, "learning_rate": 8.74373442361638e-05, "loss": 1.618, "step": 4369 }, { "epoch": 0.24357616632294743, "grad_norm": 0.5035987496376038, "learning_rate": 8.743144141591297e-05, "loss": 1.6436, "step": 4370 }, { "epoch": 0.2436319045761106, "grad_norm": 0.5793476700782776, "learning_rate": 8.742553740855506e-05, "loss": 1.9764, "step": 4371 }, { "epoch": 0.24368764282927374, "grad_norm": 0.5031444430351257, "learning_rate": 8.741963221427732e-05, "loss": 1.4643, "step": 4372 }, { "epoch": 0.24374338108243687, "grad_norm": 0.5925171971321106, "learning_rate": 8.7413725833267e-05, "loss": 1.7132, "step": 4373 }, { "epoch": 0.24379911933560003, "grad_norm": 0.5252764225006104, "learning_rate": 8.740781826571144e-05, "loss": 1.613, "step": 4374 }, { "epoch": 0.24385485758876316, "grad_norm": 0.5435476899147034, "learning_rate": 8.740190951179799e-05, "loss": 1.7225, "step": 4375 }, { "epoch": 0.24391059584192631, "grad_norm": 0.5505743026733398, "learning_rate": 8.739599957171404e-05, "loss": 1.7796, "step": 4376 }, { "epoch": 0.24396633409508947, "grad_norm": 0.5711907148361206, "learning_rate": 8.7390088445647e-05, "loss": 1.8918, "step": 4377 }, { "epoch": 0.2440220723482526, "grad_norm": 0.617215096950531, "learning_rate": 8.738417613378439e-05, "loss": 1.6408, "step": 4378 }, { "epoch": 0.24407781060141576, "grad_norm": 0.5194396376609802, "learning_rate": 8.737826263631363e-05, "loss": 1.5007, "step": 4379 }, { "epoch": 0.24413354885457889, "grad_norm": NaN, "learning_rate": 8.737826263631363e-05, "loss": 1.8818, "step": 4380 }, { "epoch": 0.24418928710774204, "grad_norm": 0.5449255704879761, "learning_rate": 8.737234795342234e-05, "loss": 1.6008, "step": 4381 }, { "epoch": 0.2442450253609052, "grad_norm": 0.517254650592804, "learning_rate": 8.736643208529807e-05, "loss": 1.5589, "step": 4382 }, { "epoch": 0.24430076361406833, "grad_norm": 0.5613778829574585, "learning_rate": 8.736051503212843e-05, "loss": 1.8349, "step": 4383 }, { "epoch": 0.24435650186723148, "grad_norm": 0.5578374266624451, "learning_rate": 8.735459679410108e-05, "loss": 1.6444, "step": 4384 }, { "epoch": 0.2444122401203946, "grad_norm": 0.5179364681243896, "learning_rate": 8.734867737140371e-05, "loss": 1.5685, "step": 4385 }, { "epoch": 0.24446797837355777, "grad_norm": 0.5676231980323792, "learning_rate": 8.734275676422406e-05, "loss": 1.7138, "step": 4386 }, { "epoch": 0.24452371662672093, "grad_norm": 0.5979743599891663, "learning_rate": 8.73368349727499e-05, "loss": 1.8035, "step": 4387 }, { "epoch": 0.24457945487988406, "grad_norm": 0.566631555557251, "learning_rate": 8.733091199716899e-05, "loss": 1.7692, "step": 4388 }, { "epoch": 0.2446351931330472, "grad_norm": 0.5594037175178528, "learning_rate": 8.732498783766923e-05, "loss": 1.7145, "step": 4389 }, { "epoch": 0.24469093138621037, "grad_norm": 0.47728872299194336, "learning_rate": 8.731906249443847e-05, "loss": 1.3759, "step": 4390 }, { "epoch": 0.2447466696393735, "grad_norm": 0.5077241063117981, "learning_rate": 8.731313596766461e-05, "loss": 1.6403, "step": 4391 }, { "epoch": 0.24480240789253666, "grad_norm": 0.51840740442276, "learning_rate": 8.730720825753567e-05, "loss": 1.7304, "step": 4392 }, { "epoch": 0.24485814614569978, "grad_norm": 0.555458664894104, "learning_rate": 8.730127936423957e-05, "loss": 1.7039, "step": 4393 }, { "epoch": 0.24491388439886294, "grad_norm": 0.530720591545105, "learning_rate": 8.729534928796438e-05, "loss": 1.87, "step": 4394 }, { "epoch": 0.2449696226520261, "grad_norm": 0.5183333158493042, "learning_rate": 8.728941802889816e-05, "loss": 1.6194, "step": 4395 }, { "epoch": 0.24502536090518923, "grad_norm": 0.5418990254402161, "learning_rate": 8.728348558722901e-05, "loss": 1.6804, "step": 4396 }, { "epoch": 0.24508109915835238, "grad_norm": 0.5377148985862732, "learning_rate": 8.727755196314507e-05, "loss": 1.5289, "step": 4397 }, { "epoch": 0.2451368374115155, "grad_norm": 0.5729206800460815, "learning_rate": 8.727161715683452e-05, "loss": 1.7488, "step": 4398 }, { "epoch": 0.24519257566467867, "grad_norm": 0.5957255363464355, "learning_rate": 8.726568116848559e-05, "loss": 1.4552, "step": 4399 }, { "epoch": 0.24524831391784183, "grad_norm": 0.6279282569885254, "learning_rate": 8.725974399828653e-05, "loss": 1.8822, "step": 4400 }, { "epoch": 0.24530405217100495, "grad_norm": 0.5379980802536011, "learning_rate": 8.725380564642563e-05, "loss": 1.7286, "step": 4401 }, { "epoch": 0.2453597904241681, "grad_norm": 0.506988525390625, "learning_rate": 8.724786611309123e-05, "loss": 1.5182, "step": 4402 }, { "epoch": 0.24541552867733124, "grad_norm": 0.5806999206542969, "learning_rate": 8.724192539847167e-05, "loss": 1.7967, "step": 4403 }, { "epoch": 0.2454712669304944, "grad_norm": 0.6368009448051453, "learning_rate": 8.723598350275537e-05, "loss": 1.8081, "step": 4404 }, { "epoch": 0.24552700518365755, "grad_norm": 0.6073201894760132, "learning_rate": 8.723004042613079e-05, "loss": 1.8369, "step": 4405 }, { "epoch": 0.24558274343682068, "grad_norm": 0.5500373244285583, "learning_rate": 8.722409616878637e-05, "loss": 1.6556, "step": 4406 }, { "epoch": 0.24563848168998384, "grad_norm": 0.5122720003128052, "learning_rate": 8.721815073091068e-05, "loss": 1.5745, "step": 4407 }, { "epoch": 0.24569421994314697, "grad_norm": 0.5759167671203613, "learning_rate": 8.721220411269222e-05, "loss": 1.8282, "step": 4408 }, { "epoch": 0.24574995819631013, "grad_norm": 0.5656915307044983, "learning_rate": 8.720625631431963e-05, "loss": 1.6782, "step": 4409 }, { "epoch": 0.24580569644947328, "grad_norm": 0.5352250933647156, "learning_rate": 8.72003073359815e-05, "loss": 1.7703, "step": 4410 }, { "epoch": 0.2458614347026364, "grad_norm": 0.6013755798339844, "learning_rate": 8.719435717786653e-05, "loss": 1.4931, "step": 4411 }, { "epoch": 0.24591717295579957, "grad_norm": 0.5831592082977295, "learning_rate": 8.718840584016339e-05, "loss": 1.8267, "step": 4412 }, { "epoch": 0.24597291120896272, "grad_norm": 0.5686485767364502, "learning_rate": 8.718245332306086e-05, "loss": 1.7073, "step": 4413 }, { "epoch": 0.24602864946212585, "grad_norm": 0.5540615320205688, "learning_rate": 8.717649962674768e-05, "loss": 1.7481, "step": 4414 }, { "epoch": 0.246084387715289, "grad_norm": 0.4984779953956604, "learning_rate": 8.71705447514127e-05, "loss": 1.4674, "step": 4415 }, { "epoch": 0.24614012596845214, "grad_norm": 0.5658791065216064, "learning_rate": 8.716458869724475e-05, "loss": 1.7044, "step": 4416 }, { "epoch": 0.2461958642216153, "grad_norm": 0.6222524046897888, "learning_rate": 8.715863146443273e-05, "loss": 1.9216, "step": 4417 }, { "epoch": 0.24625160247477845, "grad_norm": 0.5234952569007874, "learning_rate": 8.715267305316558e-05, "loss": 1.3814, "step": 4418 }, { "epoch": 0.24630734072794158, "grad_norm": 0.5298272371292114, "learning_rate": 8.714671346363226e-05, "loss": 1.7245, "step": 4419 }, { "epoch": 0.24636307898110474, "grad_norm": 0.5426690578460693, "learning_rate": 8.714075269602176e-05, "loss": 1.7225, "step": 4420 }, { "epoch": 0.24641881723426787, "grad_norm": 0.5064488649368286, "learning_rate": 8.713479075052312e-05, "loss": 1.637, "step": 4421 }, { "epoch": 0.24647455548743102, "grad_norm": 0.6294771432876587, "learning_rate": 8.712882762732543e-05, "loss": 2.0957, "step": 4422 }, { "epoch": 0.24653029374059418, "grad_norm": 0.5518829226493835, "learning_rate": 8.712286332661783e-05, "loss": 1.8551, "step": 4423 }, { "epoch": 0.2465860319937573, "grad_norm": 0.5775428414344788, "learning_rate": 8.711689784858943e-05, "loss": 2.0364, "step": 4424 }, { "epoch": 0.24664177024692047, "grad_norm": 0.585757851600647, "learning_rate": 8.711093119342944e-05, "loss": 1.9078, "step": 4425 }, { "epoch": 0.2466975085000836, "grad_norm": 0.49010977149009705, "learning_rate": 8.710496336132707e-05, "loss": 1.7235, "step": 4426 }, { "epoch": 0.24675324675324675, "grad_norm": 0.4925966262817383, "learning_rate": 8.709899435247162e-05, "loss": 1.5281, "step": 4427 }, { "epoch": 0.2468089850064099, "grad_norm": 0.5210297107696533, "learning_rate": 8.709302416705235e-05, "loss": 1.6194, "step": 4428 }, { "epoch": 0.24686472325957304, "grad_norm": 0.5486511588096619, "learning_rate": 8.708705280525863e-05, "loss": 1.8987, "step": 4429 }, { "epoch": 0.2469204615127362, "grad_norm": 0.5911165475845337, "learning_rate": 8.708108026727983e-05, "loss": 1.8762, "step": 4430 }, { "epoch": 0.24697619976589932, "grad_norm": 0.557861864566803, "learning_rate": 8.707510655330535e-05, "loss": 1.7246, "step": 4431 }, { "epoch": 0.24703193801906248, "grad_norm": 0.5598505139350891, "learning_rate": 8.706913166352468e-05, "loss": 1.7012, "step": 4432 }, { "epoch": 0.24708767627222564, "grad_norm": 0.523493230342865, "learning_rate": 8.706315559812725e-05, "loss": 1.6476, "step": 4433 }, { "epoch": 0.24714341452538877, "grad_norm": 0.5727233290672302, "learning_rate": 8.705717835730263e-05, "loss": 1.7085, "step": 4434 }, { "epoch": 0.24719915277855192, "grad_norm": 0.5231149792671204, "learning_rate": 8.705119994124038e-05, "loss": 1.6553, "step": 4435 }, { "epoch": 0.24725489103171508, "grad_norm": 0.5807697176933289, "learning_rate": 8.70452203501301e-05, "loss": 1.9495, "step": 4436 }, { "epoch": 0.2473106292848782, "grad_norm": 0.538212239742279, "learning_rate": 8.703923958416141e-05, "loss": 1.6201, "step": 4437 }, { "epoch": 0.24736636753804137, "grad_norm": 0.5267363786697388, "learning_rate": 8.703325764352397e-05, "loss": 1.6372, "step": 4438 }, { "epoch": 0.2474221057912045, "grad_norm": 0.5570881962776184, "learning_rate": 8.702727452840753e-05, "loss": 1.7135, "step": 4439 }, { "epoch": 0.24747784404436765, "grad_norm": 0.5702007412910461, "learning_rate": 8.702129023900184e-05, "loss": 1.7636, "step": 4440 }, { "epoch": 0.2475335822975308, "grad_norm": 0.5725893974304199, "learning_rate": 8.701530477549666e-05, "loss": 1.7144, "step": 4441 }, { "epoch": 0.24758932055069394, "grad_norm": 0.5385577082633972, "learning_rate": 8.700931813808182e-05, "loss": 1.7915, "step": 4442 }, { "epoch": 0.2476450588038571, "grad_norm": 0.625249981880188, "learning_rate": 8.700333032694721e-05, "loss": 1.8956, "step": 4443 }, { "epoch": 0.24770079705702022, "grad_norm": 0.6568485498428345, "learning_rate": 8.69973413422827e-05, "loss": 2.0, "step": 4444 }, { "epoch": 0.24775653531018338, "grad_norm": 0.5595792531967163, "learning_rate": 8.699135118427821e-05, "loss": 1.7215, "step": 4445 }, { "epoch": 0.24781227356334654, "grad_norm": 0.5085048675537109, "learning_rate": 8.698535985312376e-05, "loss": 1.5958, "step": 4446 }, { "epoch": 0.24786801181650966, "grad_norm": 0.5155544281005859, "learning_rate": 8.697936734900932e-05, "loss": 1.7741, "step": 4447 }, { "epoch": 0.24792375006967282, "grad_norm": 0.5145729780197144, "learning_rate": 8.697337367212496e-05, "loss": 1.7966, "step": 4448 }, { "epoch": 0.24797948832283595, "grad_norm": 0.5527476072311401, "learning_rate": 8.696737882266076e-05, "loss": 1.8026, "step": 4449 }, { "epoch": 0.2480352265759991, "grad_norm": 0.5763035416603088, "learning_rate": 8.696138280080684e-05, "loss": 1.7823, "step": 4450 }, { "epoch": 0.24809096482916226, "grad_norm": 0.5513672828674316, "learning_rate": 8.695538560675334e-05, "loss": 1.5817, "step": 4451 }, { "epoch": 0.2481467030823254, "grad_norm": 0.5553067922592163, "learning_rate": 8.694938724069048e-05, "loss": 1.8425, "step": 4452 }, { "epoch": 0.24820244133548855, "grad_norm": 0.49385184049606323, "learning_rate": 8.69433877028085e-05, "loss": 1.6939, "step": 4453 }, { "epoch": 0.24825817958865168, "grad_norm": 0.5889978408813477, "learning_rate": 8.693738699329765e-05, "loss": 1.6874, "step": 4454 }, { "epoch": 0.24831391784181484, "grad_norm": 0.556433916091919, "learning_rate": 8.693138511234825e-05, "loss": 1.7339, "step": 4455 }, { "epoch": 0.248369656094978, "grad_norm": 0.5483202338218689, "learning_rate": 8.692538206015062e-05, "loss": 1.8301, "step": 4456 }, { "epoch": 0.24842539434814112, "grad_norm": 0.5633078813552856, "learning_rate": 8.691937783689518e-05, "loss": 1.7435, "step": 4457 }, { "epoch": 0.24848113260130428, "grad_norm": 0.5544833540916443, "learning_rate": 8.691337244277231e-05, "loss": 1.6348, "step": 4458 }, { "epoch": 0.24853687085446743, "grad_norm": 0.5703203082084656, "learning_rate": 8.69073658779725e-05, "loss": 1.6839, "step": 4459 }, { "epoch": 0.24859260910763056, "grad_norm": 0.5441849231719971, "learning_rate": 8.690135814268623e-05, "loss": 1.7292, "step": 4460 }, { "epoch": 0.24864834736079372, "grad_norm": 0.5759615302085876, "learning_rate": 8.689534923710403e-05, "loss": 1.8113, "step": 4461 }, { "epoch": 0.24870408561395685, "grad_norm": 0.568762481212616, "learning_rate": 8.688933916141647e-05, "loss": 1.9261, "step": 4462 }, { "epoch": 0.24875982386712, "grad_norm": 0.5397505164146423, "learning_rate": 8.688332791581415e-05, "loss": 1.8136, "step": 4463 }, { "epoch": 0.24881556212028316, "grad_norm": 0.5890788435935974, "learning_rate": 8.68773155004877e-05, "loss": 1.6383, "step": 4464 }, { "epoch": 0.2488713003734463, "grad_norm": 0.5507654547691345, "learning_rate": 8.687130191562782e-05, "loss": 1.7313, "step": 4465 }, { "epoch": 0.24892703862660945, "grad_norm": 0.5670168399810791, "learning_rate": 8.686528716142523e-05, "loss": 1.7355, "step": 4466 }, { "epoch": 0.24898277687977258, "grad_norm": 0.5866429805755615, "learning_rate": 8.685927123807065e-05, "loss": 1.7786, "step": 4467 }, { "epoch": 0.24903851513293573, "grad_norm": 0.5706139206886292, "learning_rate": 8.68532541457549e-05, "loss": 1.8995, "step": 4468 }, { "epoch": 0.2490942533860989, "grad_norm": 0.5574220418930054, "learning_rate": 8.68472358846688e-05, "loss": 1.86, "step": 4469 }, { "epoch": 0.24914999163926202, "grad_norm": 0.5442642569541931, "learning_rate": 8.684121645500322e-05, "loss": 1.803, "step": 4470 }, { "epoch": 0.24920572989242518, "grad_norm": 0.5070736408233643, "learning_rate": 8.683519585694903e-05, "loss": 1.5786, "step": 4471 }, { "epoch": 0.2492614681455883, "grad_norm": 0.5622973442077637, "learning_rate": 8.682917409069721e-05, "loss": 1.8524, "step": 4472 }, { "epoch": 0.24931720639875146, "grad_norm": 0.5547112226486206, "learning_rate": 8.682315115643872e-05, "loss": 1.7891, "step": 4473 }, { "epoch": 0.24937294465191462, "grad_norm": 0.5251905918121338, "learning_rate": 8.681712705436455e-05, "loss": 1.3104, "step": 4474 }, { "epoch": 0.24942868290507775, "grad_norm": 0.5507151484489441, "learning_rate": 8.68111017846658e-05, "loss": 1.7571, "step": 4475 }, { "epoch": 0.2494844211582409, "grad_norm": 0.628353476524353, "learning_rate": 8.68050753475335e-05, "loss": 1.7915, "step": 4476 }, { "epoch": 0.24954015941140403, "grad_norm": 0.5899398922920227, "learning_rate": 8.67990477431588e-05, "loss": 1.7928, "step": 4477 }, { "epoch": 0.2495958976645672, "grad_norm": 0.5376555919647217, "learning_rate": 8.679301897173287e-05, "loss": 1.6592, "step": 4478 }, { "epoch": 0.24965163591773035, "grad_norm": 0.5241808891296387, "learning_rate": 8.678698903344689e-05, "loss": 1.6482, "step": 4479 }, { "epoch": 0.24970737417089348, "grad_norm": 0.6054913997650146, "learning_rate": 8.67809579284921e-05, "loss": 1.7838, "step": 4480 }, { "epoch": 0.24976311242405663, "grad_norm": 0.56617671251297, "learning_rate": 8.677492565705976e-05, "loss": 1.7705, "step": 4481 }, { "epoch": 0.2498188506772198, "grad_norm": 0.549431324005127, "learning_rate": 8.676889221934121e-05, "loss": 1.8349, "step": 4482 }, { "epoch": 0.24987458893038292, "grad_norm": 0.5290791392326355, "learning_rate": 8.676285761552775e-05, "loss": 1.6761, "step": 4483 }, { "epoch": 0.24993032718354607, "grad_norm": 0.7188482880592346, "learning_rate": 8.675682184581081e-05, "loss": 1.6409, "step": 4484 }, { "epoch": 0.2499860654367092, "grad_norm": 0.5338848233222961, "learning_rate": 8.67507849103818e-05, "loss": 1.4604, "step": 4485 }, { "epoch": 0.25004180368987233, "grad_norm": 0.5384326577186584, "learning_rate": 8.674474680943215e-05, "loss": 1.5605, "step": 4486 }, { "epoch": 0.2500975419430355, "grad_norm": 0.521425724029541, "learning_rate": 8.673870754315336e-05, "loss": 1.625, "step": 4487 }, { "epoch": 0.25015328019619865, "grad_norm": 0.5739718079566956, "learning_rate": 8.673266711173698e-05, "loss": 1.7826, "step": 4488 }, { "epoch": 0.2502090184493618, "grad_norm": 0.5505213141441345, "learning_rate": 8.672662551537457e-05, "loss": 1.595, "step": 4489 }, { "epoch": 0.25026475670252496, "grad_norm": 0.5271283388137817, "learning_rate": 8.672058275425772e-05, "loss": 1.5468, "step": 4490 }, { "epoch": 0.2503204949556881, "grad_norm": 0.5678611993789673, "learning_rate": 8.671453882857808e-05, "loss": 1.8862, "step": 4491 }, { "epoch": 0.2503762332088512, "grad_norm": 0.6000241041183472, "learning_rate": 8.670849373852734e-05, "loss": 1.6133, "step": 4492 }, { "epoch": 0.2504319714620144, "grad_norm": 0.5662490129470825, "learning_rate": 8.670244748429719e-05, "loss": 1.7045, "step": 4493 }, { "epoch": 0.25048770971517753, "grad_norm": 0.5680144429206848, "learning_rate": 8.66964000660794e-05, "loss": 1.6462, "step": 4494 }, { "epoch": 0.25054344796834066, "grad_norm": 0.5490357279777527, "learning_rate": 8.669035148406577e-05, "loss": 1.5736, "step": 4495 }, { "epoch": 0.25059918622150384, "grad_norm": 0.5800120234489441, "learning_rate": 8.668430173844808e-05, "loss": 1.8931, "step": 4496 }, { "epoch": 0.250654924474667, "grad_norm": 0.5286765694618225, "learning_rate": 8.667825082941826e-05, "loss": 1.6553, "step": 4497 }, { "epoch": 0.2507106627278301, "grad_norm": 0.5452672839164734, "learning_rate": 8.667219875716814e-05, "loss": 1.7692, "step": 4498 }, { "epoch": 0.25076640098099323, "grad_norm": 0.5615769028663635, "learning_rate": 8.66661455218897e-05, "loss": 1.8116, "step": 4499 }, { "epoch": 0.2508221392341564, "grad_norm": 0.5832181572914124, "learning_rate": 8.666009112377491e-05, "loss": 1.938, "step": 4500 }, { "epoch": 0.25087787748731954, "grad_norm": 0.5258188247680664, "learning_rate": 8.665403556301576e-05, "loss": 1.6026, "step": 4501 }, { "epoch": 0.2509336157404827, "grad_norm": 0.6271452307701111, "learning_rate": 8.664797883980434e-05, "loss": 1.6589, "step": 4502 }, { "epoch": 0.25098935399364586, "grad_norm": 0.5411872267723083, "learning_rate": 8.664192095433266e-05, "loss": 1.7016, "step": 4503 }, { "epoch": 0.251045092246809, "grad_norm": 0.5610190629959106, "learning_rate": 8.663586190679291e-05, "loss": 1.8425, "step": 4504 }, { "epoch": 0.2511008304999721, "grad_norm": 0.5276908278465271, "learning_rate": 8.662980169737723e-05, "loss": 1.6105, "step": 4505 }, { "epoch": 0.2511565687531353, "grad_norm": 0.5493645668029785, "learning_rate": 8.662374032627778e-05, "loss": 1.9352, "step": 4506 }, { "epoch": 0.25121230700629843, "grad_norm": 0.5296374559402466, "learning_rate": 8.661767779368683e-05, "loss": 1.7867, "step": 4507 }, { "epoch": 0.25126804525946156, "grad_norm": 0.6600750684738159, "learning_rate": 8.661161409979665e-05, "loss": 1.6947, "step": 4508 }, { "epoch": 0.2513237835126247, "grad_norm": 0.5515453815460205, "learning_rate": 8.66055492447995e-05, "loss": 1.796, "step": 4509 }, { "epoch": 0.25137952176578787, "grad_norm": 0.5651318430900574, "learning_rate": 8.659948322888777e-05, "loss": 1.6343, "step": 4510 }, { "epoch": 0.251435260018951, "grad_norm": 0.5783109664916992, "learning_rate": 8.659341605225384e-05, "loss": 1.8057, "step": 4511 }, { "epoch": 0.25149099827211413, "grad_norm": 0.5711765885353088, "learning_rate": 8.65873477150901e-05, "loss": 1.8123, "step": 4512 }, { "epoch": 0.2515467365252773, "grad_norm": 0.5652083158493042, "learning_rate": 8.658127821758899e-05, "loss": 1.7952, "step": 4513 }, { "epoch": 0.25160247477844044, "grad_norm": 0.5652216076850891, "learning_rate": 8.657520755994305e-05, "loss": 1.8295, "step": 4514 }, { "epoch": 0.2516582130316036, "grad_norm": 0.5443994998931885, "learning_rate": 8.656913574234474e-05, "loss": 1.6294, "step": 4515 }, { "epoch": 0.25171395128476676, "grad_norm": 0.5845414400100708, "learning_rate": 8.656306276498667e-05, "loss": 1.8597, "step": 4516 }, { "epoch": 0.2517696895379299, "grad_norm": 0.5372679233551025, "learning_rate": 8.655698862806143e-05, "loss": 1.7067, "step": 4517 }, { "epoch": 0.251825427791093, "grad_norm": 0.5330473780632019, "learning_rate": 8.655091333176165e-05, "loss": 1.7043, "step": 4518 }, { "epoch": 0.2518811660442562, "grad_norm": 0.5988831520080566, "learning_rate": 8.654483687628002e-05, "loss": 1.7418, "step": 4519 }, { "epoch": 0.25193690429741933, "grad_norm": 0.5914613604545593, "learning_rate": 8.65387592618092e-05, "loss": 1.6442, "step": 4520 }, { "epoch": 0.25199264255058246, "grad_norm": 0.5800835490226746, "learning_rate": 8.653268048854201e-05, "loss": 1.7816, "step": 4521 }, { "epoch": 0.2520483808037456, "grad_norm": 0.5335732102394104, "learning_rate": 8.652660055667117e-05, "loss": 1.5046, "step": 4522 }, { "epoch": 0.25210411905690877, "grad_norm": 0.48013389110565186, "learning_rate": 8.652051946638953e-05, "loss": 1.582, "step": 4523 }, { "epoch": 0.2521598573100719, "grad_norm": 0.6047071814537048, "learning_rate": 8.651443721788996e-05, "loss": 1.6199, "step": 4524 }, { "epoch": 0.25221559556323503, "grad_norm": 0.5248143672943115, "learning_rate": 8.650835381136533e-05, "loss": 1.5345, "step": 4525 }, { "epoch": 0.2522713338163982, "grad_norm": 0.5078330636024475, "learning_rate": 8.650226924700855e-05, "loss": 1.6656, "step": 4526 }, { "epoch": 0.25232707206956134, "grad_norm": 0.5320603251457214, "learning_rate": 8.649618352501264e-05, "loss": 1.598, "step": 4527 }, { "epoch": 0.25238281032272447, "grad_norm": 0.49775633215904236, "learning_rate": 8.649009664557057e-05, "loss": 1.3941, "step": 4528 }, { "epoch": 0.25243854857588766, "grad_norm": 0.5565609931945801, "learning_rate": 8.648400860887538e-05, "loss": 1.7144, "step": 4529 }, { "epoch": 0.2524942868290508, "grad_norm": 0.5529298782348633, "learning_rate": 8.647791941512016e-05, "loss": 1.8223, "step": 4530 }, { "epoch": 0.2525500250822139, "grad_norm": 0.5692974328994751, "learning_rate": 8.6471829064498e-05, "loss": 1.6577, "step": 4531 }, { "epoch": 0.25260576333537704, "grad_norm": 0.49695706367492676, "learning_rate": 8.646573755720209e-05, "loss": 1.6222, "step": 4532 }, { "epoch": 0.2526615015885402, "grad_norm": 0.5647556781768799, "learning_rate": 8.645964489342557e-05, "loss": 1.8348, "step": 4533 }, { "epoch": 0.25271723984170336, "grad_norm": 0.5597743391990662, "learning_rate": 8.645355107336171e-05, "loss": 1.7095, "step": 4534 }, { "epoch": 0.2527729780948665, "grad_norm": 0.5715233683586121, "learning_rate": 8.644745609720375e-05, "loss": 1.9243, "step": 4535 }, { "epoch": 0.25282871634802967, "grad_norm": 0.5817229747772217, "learning_rate": 8.644135996514498e-05, "loss": 1.782, "step": 4536 }, { "epoch": 0.2528844546011928, "grad_norm": 0.5697113275527954, "learning_rate": 8.643526267737873e-05, "loss": 1.6014, "step": 4537 }, { "epoch": 0.2529401928543559, "grad_norm": 0.5716366767883301, "learning_rate": 8.642916423409839e-05, "loss": 1.6435, "step": 4538 }, { "epoch": 0.2529959311075191, "grad_norm": 0.5631042718887329, "learning_rate": 8.642306463549736e-05, "loss": 1.7686, "step": 4539 }, { "epoch": 0.25305166936068224, "grad_norm": 0.596517026424408, "learning_rate": 8.641696388176907e-05, "loss": 1.8116, "step": 4540 }, { "epoch": 0.25310740761384537, "grad_norm": 0.47129639983177185, "learning_rate": 8.641086197310703e-05, "loss": 1.4985, "step": 4541 }, { "epoch": 0.25316314586700855, "grad_norm": 0.551607072353363, "learning_rate": 8.640475890970471e-05, "loss": 1.7948, "step": 4542 }, { "epoch": 0.2532188841201717, "grad_norm": 0.559027910232544, "learning_rate": 8.639865469175572e-05, "loss": 1.5825, "step": 4543 }, { "epoch": 0.2532746223733348, "grad_norm": 0.5063076019287109, "learning_rate": 8.639254931945362e-05, "loss": 1.4125, "step": 4544 }, { "epoch": 0.25333036062649794, "grad_norm": 0.5271062254905701, "learning_rate": 8.638644279299202e-05, "loss": 1.6964, "step": 4545 }, { "epoch": 0.2533860988796611, "grad_norm": 0.4922122657299042, "learning_rate": 8.638033511256462e-05, "loss": 1.6725, "step": 4546 }, { "epoch": 0.25344183713282425, "grad_norm": 0.5734017491340637, "learning_rate": 8.637422627836509e-05, "loss": 2.0334, "step": 4547 }, { "epoch": 0.2534975753859874, "grad_norm": 0.4978555738925934, "learning_rate": 8.636811629058718e-05, "loss": 1.6665, "step": 4548 }, { "epoch": 0.25355331363915057, "grad_norm": 0.5637436509132385, "learning_rate": 8.636200514942467e-05, "loss": 1.5875, "step": 4549 }, { "epoch": 0.2536090518923137, "grad_norm": 0.5382322072982788, "learning_rate": 8.635589285507135e-05, "loss": 1.838, "step": 4550 }, { "epoch": 0.2536647901454768, "grad_norm": 0.518650233745575, "learning_rate": 8.634977940772108e-05, "loss": 1.7802, "step": 4551 }, { "epoch": 0.25372052839864, "grad_norm": 0.5153575539588928, "learning_rate": 8.634366480756774e-05, "loss": 1.6153, "step": 4552 }, { "epoch": 0.25377626665180314, "grad_norm": 0.5355269908905029, "learning_rate": 8.633754905480527e-05, "loss": 1.8255, "step": 4553 }, { "epoch": 0.25383200490496627, "grad_norm": 0.5261843204498291, "learning_rate": 8.63314321496276e-05, "loss": 1.6177, "step": 4554 }, { "epoch": 0.2538877431581294, "grad_norm": 0.557314395904541, "learning_rate": 8.632531409222872e-05, "loss": 1.8342, "step": 4555 }, { "epoch": 0.2539434814112926, "grad_norm": 0.5285095572471619, "learning_rate": 8.631919488280267e-05, "loss": 1.6217, "step": 4556 }, { "epoch": 0.2539992196644557, "grad_norm": 0.5471826195716858, "learning_rate": 8.631307452154352e-05, "loss": 1.5318, "step": 4557 }, { "epoch": 0.25405495791761884, "grad_norm": 0.5375044941902161, "learning_rate": 8.630695300864536e-05, "loss": 1.7415, "step": 4558 }, { "epoch": 0.254110696170782, "grad_norm": 0.566832423210144, "learning_rate": 8.630083034430232e-05, "loss": 1.9215, "step": 4559 }, { "epoch": 0.25416643442394515, "grad_norm": 0.5262976884841919, "learning_rate": 8.629470652870861e-05, "loss": 1.5432, "step": 4560 }, { "epoch": 0.2542221726771083, "grad_norm": 0.5495408177375793, "learning_rate": 8.628858156205842e-05, "loss": 1.9161, "step": 4561 }, { "epoch": 0.25427791093027147, "grad_norm": 0.5776422023773193, "learning_rate": 8.6282455444546e-05, "loss": 1.8547, "step": 4562 }, { "epoch": 0.2543336491834346, "grad_norm": 0.5136664509773254, "learning_rate": 8.627632817636563e-05, "loss": 1.3558, "step": 4563 }, { "epoch": 0.2543893874365977, "grad_norm": 0.5449255108833313, "learning_rate": 8.627019975771165e-05, "loss": 1.7991, "step": 4564 }, { "epoch": 0.2544451256897609, "grad_norm": 0.49720707535743713, "learning_rate": 8.626407018877837e-05, "loss": 1.5515, "step": 4565 }, { "epoch": 0.25450086394292404, "grad_norm": 0.5493996739387512, "learning_rate": 8.625793946976026e-05, "loss": 1.7666, "step": 4566 }, { "epoch": 0.25455660219608717, "grad_norm": 0.5458593368530273, "learning_rate": 8.625180760085167e-05, "loss": 1.9701, "step": 4567 }, { "epoch": 0.2546123404492503, "grad_norm": 0.5866237878799438, "learning_rate": 8.624567458224713e-05, "loss": 1.7123, "step": 4568 }, { "epoch": 0.2546680787024135, "grad_norm": 0.5610763430595398, "learning_rate": 8.62395404141411e-05, "loss": 1.8511, "step": 4569 }, { "epoch": 0.2547238169555766, "grad_norm": 0.5264028906822205, "learning_rate": 8.623340509672817e-05, "loss": 1.6913, "step": 4570 }, { "epoch": 0.25477955520873974, "grad_norm": 0.5024250745773315, "learning_rate": 8.622726863020285e-05, "loss": 1.6337, "step": 4571 }, { "epoch": 0.2548352934619029, "grad_norm": 0.6130850315093994, "learning_rate": 8.622113101475982e-05, "loss": 1.8858, "step": 4572 }, { "epoch": 0.25489103171506605, "grad_norm": 0.5543071627616882, "learning_rate": 8.621499225059369e-05, "loss": 1.6353, "step": 4573 }, { "epoch": 0.2549467699682292, "grad_norm": 0.5286437273025513, "learning_rate": 8.620885233789914e-05, "loss": 1.4418, "step": 4574 }, { "epoch": 0.25500250822139237, "grad_norm": 0.5485914349555969, "learning_rate": 8.620271127687092e-05, "loss": 1.7161, "step": 4575 }, { "epoch": 0.2550582464745555, "grad_norm": 0.612994909286499, "learning_rate": 8.619656906770377e-05, "loss": 1.8467, "step": 4576 }, { "epoch": 0.2551139847277186, "grad_norm": 0.5447350740432739, "learning_rate": 8.619042571059248e-05, "loss": 1.7528, "step": 4577 }, { "epoch": 0.25516972298088175, "grad_norm": 0.5236079096794128, "learning_rate": 8.61842812057319e-05, "loss": 1.5648, "step": 4578 }, { "epoch": 0.25522546123404494, "grad_norm": 0.534354567527771, "learning_rate": 8.617813555331689e-05, "loss": 1.5093, "step": 4579 }, { "epoch": 0.25528119948720807, "grad_norm": 0.5146899819374084, "learning_rate": 8.617198875354235e-05, "loss": 1.6445, "step": 4580 }, { "epoch": 0.2553369377403712, "grad_norm": 0.5606057047843933, "learning_rate": 8.616584080660323e-05, "loss": 1.6225, "step": 4581 }, { "epoch": 0.2553926759935344, "grad_norm": 0.557131290435791, "learning_rate": 8.615969171269449e-05, "loss": 1.8017, "step": 4582 }, { "epoch": 0.2554484142466975, "grad_norm": 0.5046922564506531, "learning_rate": 8.615354147201116e-05, "loss": 1.6034, "step": 4583 }, { "epoch": 0.25550415249986064, "grad_norm": 0.5313592553138733, "learning_rate": 8.614739008474829e-05, "loss": 1.481, "step": 4584 }, { "epoch": 0.2555598907530238, "grad_norm": 0.5347174406051636, "learning_rate": 8.614123755110096e-05, "loss": 1.6323, "step": 4585 }, { "epoch": 0.25561562900618695, "grad_norm": 0.5261495113372803, "learning_rate": 8.61350838712643e-05, "loss": 1.4896, "step": 4586 }, { "epoch": 0.2556713672593501, "grad_norm": 0.5374502539634705, "learning_rate": 8.612892904543344e-05, "loss": 1.6488, "step": 4587 }, { "epoch": 0.25572710551251326, "grad_norm": 0.5835258960723877, "learning_rate": 8.612277307380361e-05, "loss": 1.7467, "step": 4588 }, { "epoch": 0.2557828437656764, "grad_norm": 0.519822359085083, "learning_rate": 8.611661595657004e-05, "loss": 1.4627, "step": 4589 }, { "epoch": 0.2558385820188395, "grad_norm": 0.5837191343307495, "learning_rate": 8.611045769392796e-05, "loss": 1.654, "step": 4590 }, { "epoch": 0.25589432027200265, "grad_norm": 0.5844641327857971, "learning_rate": 8.610429828607271e-05, "loss": 1.6177, "step": 4591 }, { "epoch": 0.25595005852516584, "grad_norm": 0.5927681922912598, "learning_rate": 8.609813773319963e-05, "loss": 1.9184, "step": 4592 }, { "epoch": 0.25600579677832896, "grad_norm": 0.6149387955665588, "learning_rate": 8.609197603550409e-05, "loss": 1.6321, "step": 4593 }, { "epoch": 0.2560615350314921, "grad_norm": 0.5619008541107178, "learning_rate": 8.608581319318148e-05, "loss": 1.6094, "step": 4594 }, { "epoch": 0.2561172732846553, "grad_norm": 0.5645739436149597, "learning_rate": 8.607964920642728e-05, "loss": 1.7111, "step": 4595 }, { "epoch": 0.2561730115378184, "grad_norm": 0.5264320373535156, "learning_rate": 8.607348407543699e-05, "loss": 1.5206, "step": 4596 }, { "epoch": 0.25622874979098154, "grad_norm": 0.5533236861228943, "learning_rate": 8.606731780040608e-05, "loss": 1.9129, "step": 4597 }, { "epoch": 0.2562844880441447, "grad_norm": 0.5276892781257629, "learning_rate": 8.606115038153015e-05, "loss": 1.7739, "step": 4598 }, { "epoch": 0.25634022629730785, "grad_norm": 0.5314942598342896, "learning_rate": 8.605498181900477e-05, "loss": 1.6853, "step": 4599 }, { "epoch": 0.256395964550471, "grad_norm": 0.540059506893158, "learning_rate": 8.604881211302559e-05, "loss": 1.8345, "step": 4600 }, { "epoch": 0.2564517028036341, "grad_norm": 0.5306822657585144, "learning_rate": 8.604264126378827e-05, "loss": 1.9012, "step": 4601 }, { "epoch": 0.2565074410567973, "grad_norm": 0.5294952988624573, "learning_rate": 8.603646927148849e-05, "loss": 1.5109, "step": 4602 }, { "epoch": 0.2565631793099604, "grad_norm": 0.5673249959945679, "learning_rate": 8.603029613632205e-05, "loss": 1.758, "step": 4603 }, { "epoch": 0.25661891756312355, "grad_norm": 0.5006965398788452, "learning_rate": 8.602412185848466e-05, "loss": 1.6211, "step": 4604 }, { "epoch": 0.25667465581628673, "grad_norm": 0.5873995423316956, "learning_rate": 8.601794643817216e-05, "loss": 1.8896, "step": 4605 }, { "epoch": 0.25673039406944986, "grad_norm": 0.56819748878479, "learning_rate": 8.601176987558041e-05, "loss": 1.6733, "step": 4606 }, { "epoch": 0.256786132322613, "grad_norm": 0.5610432624816895, "learning_rate": 8.600559217090529e-05, "loss": 1.824, "step": 4607 }, { "epoch": 0.2568418705757762, "grad_norm": 0.5451894998550415, "learning_rate": 8.599941332434269e-05, "loss": 1.7229, "step": 4608 }, { "epoch": 0.2568976088289393, "grad_norm": 0.9107519388198853, "learning_rate": 8.599323333608861e-05, "loss": 1.846, "step": 4609 }, { "epoch": 0.25695334708210243, "grad_norm": 0.5975711941719055, "learning_rate": 8.598705220633903e-05, "loss": 1.7334, "step": 4610 }, { "epoch": 0.2570090853352656, "grad_norm": 0.5969035625457764, "learning_rate": 8.598086993528996e-05, "loss": 1.9449, "step": 4611 }, { "epoch": 0.25706482358842875, "grad_norm": 0.6146485805511475, "learning_rate": 8.597468652313747e-05, "loss": 1.8884, "step": 4612 }, { "epoch": 0.2571205618415919, "grad_norm": 0.5359372496604919, "learning_rate": 8.596850197007767e-05, "loss": 1.6199, "step": 4613 }, { "epoch": 0.257176300094755, "grad_norm": 0.5491176247596741, "learning_rate": 8.596231627630671e-05, "loss": 1.5702, "step": 4614 }, { "epoch": 0.2572320383479182, "grad_norm": 0.5316644310951233, "learning_rate": 8.595612944202076e-05, "loss": 1.6538, "step": 4615 }, { "epoch": 0.2572877766010813, "grad_norm": 0.5944792032241821, "learning_rate": 8.5949941467416e-05, "loss": 1.79, "step": 4616 }, { "epoch": 0.25734351485424445, "grad_norm": 0.5629575848579407, "learning_rate": 8.594375235268872e-05, "loss": 2.0629, "step": 4617 }, { "epoch": 0.25739925310740763, "grad_norm": 0.5681300163269043, "learning_rate": 8.593756209803518e-05, "loss": 1.7105, "step": 4618 }, { "epoch": 0.25745499136057076, "grad_norm": 0.5259959697723389, "learning_rate": 8.59313707036517e-05, "loss": 1.7797, "step": 4619 }, { "epoch": 0.2575107296137339, "grad_norm": 0.5173026323318481, "learning_rate": 8.592517816973462e-05, "loss": 1.6879, "step": 4620 }, { "epoch": 0.2575664678668971, "grad_norm": 0.5310641527175903, "learning_rate": 8.591898449648035e-05, "loss": 1.6947, "step": 4621 }, { "epoch": 0.2576222061200602, "grad_norm": 0.5746062397956848, "learning_rate": 8.591278968408532e-05, "loss": 1.8276, "step": 4622 }, { "epoch": 0.25767794437322333, "grad_norm": 0.5601612329483032, "learning_rate": 8.590659373274599e-05, "loss": 1.6054, "step": 4623 }, { "epoch": 0.25773368262638646, "grad_norm": 0.5777058601379395, "learning_rate": 8.590039664265885e-05, "loss": 1.612, "step": 4624 }, { "epoch": 0.25778942087954965, "grad_norm": 0.6337921023368835, "learning_rate": 8.589419841402047e-05, "loss": 2.1569, "step": 4625 }, { "epoch": 0.2578451591327128, "grad_norm": 0.5203370451927185, "learning_rate": 8.588799904702736e-05, "loss": 1.4849, "step": 4626 }, { "epoch": 0.2579008973858759, "grad_norm": 0.55791175365448, "learning_rate": 8.588179854187616e-05, "loss": 1.882, "step": 4627 }, { "epoch": 0.2579566356390391, "grad_norm": 0.581343948841095, "learning_rate": 8.587559689876354e-05, "loss": 1.7811, "step": 4628 }, { "epoch": 0.2580123738922022, "grad_norm": 0.6163395047187805, "learning_rate": 8.586939411788615e-05, "loss": 1.8589, "step": 4629 }, { "epoch": 0.25806811214536535, "grad_norm": 0.5277383327484131, "learning_rate": 8.586319019944071e-05, "loss": 1.5817, "step": 4630 }, { "epoch": 0.25812385039852853, "grad_norm": 0.5042583346366882, "learning_rate": 8.585698514362397e-05, "loss": 1.4472, "step": 4631 }, { "epoch": 0.25817958865169166, "grad_norm": 0.5802309513092041, "learning_rate": 8.585077895063271e-05, "loss": 1.9396, "step": 4632 }, { "epoch": 0.2582353269048548, "grad_norm": 0.5798273682594299, "learning_rate": 8.58445716206638e-05, "loss": 1.6806, "step": 4633 }, { "epoch": 0.258291065158018, "grad_norm": 0.5102317333221436, "learning_rate": 8.583836315391403e-05, "loss": 1.5884, "step": 4634 }, { "epoch": 0.2583468034111811, "grad_norm": 0.6215993165969849, "learning_rate": 8.583215355058035e-05, "loss": 2.001, "step": 4635 }, { "epoch": 0.25840254166434423, "grad_norm": 0.5116714835166931, "learning_rate": 8.582594281085967e-05, "loss": 1.6639, "step": 4636 }, { "epoch": 0.25845827991750736, "grad_norm": 0.5677070617675781, "learning_rate": 8.581973093494897e-05, "loss": 1.841, "step": 4637 }, { "epoch": 0.25851401817067055, "grad_norm": 0.5552488565444946, "learning_rate": 8.581351792304524e-05, "loss": 1.6623, "step": 4638 }, { "epoch": 0.2585697564238337, "grad_norm": 0.5567041635513306, "learning_rate": 8.580730377534554e-05, "loss": 1.5144, "step": 4639 }, { "epoch": 0.2586254946769968, "grad_norm": 0.5067396759986877, "learning_rate": 8.580108849204693e-05, "loss": 1.4875, "step": 4640 }, { "epoch": 0.25868123293016, "grad_norm": 0.5226799845695496, "learning_rate": 8.579487207334653e-05, "loss": 1.7197, "step": 4641 }, { "epoch": 0.2587369711833231, "grad_norm": 0.5152204036712646, "learning_rate": 8.578865451944148e-05, "loss": 1.4488, "step": 4642 }, { "epoch": 0.25879270943648625, "grad_norm": 0.5446513295173645, "learning_rate": 8.578243583052897e-05, "loss": 1.7116, "step": 4643 }, { "epoch": 0.25884844768964943, "grad_norm": 0.5753796696662903, "learning_rate": 8.577621600680623e-05, "loss": 1.5765, "step": 4644 }, { "epoch": 0.25890418594281256, "grad_norm": 0.53980952501297, "learning_rate": 8.57699950484705e-05, "loss": 1.7881, "step": 4645 }, { "epoch": 0.2589599241959757, "grad_norm": 0.5444200038909912, "learning_rate": 8.57637729557191e-05, "loss": 1.8373, "step": 4646 }, { "epoch": 0.2590156624491388, "grad_norm": 0.5415917634963989, "learning_rate": 8.575754972874931e-05, "loss": 1.6772, "step": 4647 }, { "epoch": 0.259071400702302, "grad_norm": 0.5910305380821228, "learning_rate": 8.575132536775853e-05, "loss": 1.8558, "step": 4648 }, { "epoch": 0.25912713895546513, "grad_norm": 0.5802417397499084, "learning_rate": 8.574509987294417e-05, "loss": 1.9364, "step": 4649 }, { "epoch": 0.25918287720862826, "grad_norm": 0.573726236820221, "learning_rate": 8.573887324450364e-05, "loss": 1.8956, "step": 4650 }, { "epoch": 0.25923861546179144, "grad_norm": 0.5909465551376343, "learning_rate": 8.573264548263442e-05, "loss": 1.7338, "step": 4651 }, { "epoch": 0.2592943537149546, "grad_norm": 0.6169442534446716, "learning_rate": 8.572641658753404e-05, "loss": 1.5941, "step": 4652 }, { "epoch": 0.2593500919681177, "grad_norm": 0.5135464668273926, "learning_rate": 8.572018655940001e-05, "loss": 1.7035, "step": 4653 }, { "epoch": 0.2594058302212809, "grad_norm": 0.5379095077514648, "learning_rate": 8.571395539842992e-05, "loss": 1.7387, "step": 4654 }, { "epoch": 0.259461568474444, "grad_norm": 0.5439580678939819, "learning_rate": 8.570772310482141e-05, "loss": 1.7089, "step": 4655 }, { "epoch": 0.25951730672760714, "grad_norm": 0.5132806301116943, "learning_rate": 8.57014896787721e-05, "loss": 1.5298, "step": 4656 }, { "epoch": 0.25957304498077033, "grad_norm": 0.5612521171569824, "learning_rate": 8.569525512047969e-05, "loss": 1.7676, "step": 4657 }, { "epoch": 0.25962878323393346, "grad_norm": 0.5397217273712158, "learning_rate": 8.56890194301419e-05, "loss": 1.636, "step": 4658 }, { "epoch": 0.2596845214870966, "grad_norm": 0.6334729194641113, "learning_rate": 8.56827826079565e-05, "loss": 1.8281, "step": 4659 }, { "epoch": 0.2597402597402597, "grad_norm": 0.5931346416473389, "learning_rate": 8.56765446541213e-05, "loss": 1.7335, "step": 4660 }, { "epoch": 0.2597959979934229, "grad_norm": 0.5085331201553345, "learning_rate": 8.567030556883408e-05, "loss": 1.8524, "step": 4661 }, { "epoch": 0.25985173624658603, "grad_norm": 0.5508363246917725, "learning_rate": 8.566406535229276e-05, "loss": 1.7883, "step": 4662 }, { "epoch": 0.25990747449974916, "grad_norm": 0.5742567181587219, "learning_rate": 8.565782400469522e-05, "loss": 1.7011, "step": 4663 }, { "epoch": 0.25996321275291234, "grad_norm": 0.4922592043876648, "learning_rate": 8.56515815262394e-05, "loss": 1.4828, "step": 4664 }, { "epoch": 0.26001895100607547, "grad_norm": 0.5450266003608704, "learning_rate": 8.564533791712328e-05, "loss": 1.7885, "step": 4665 }, { "epoch": 0.2600746892592386, "grad_norm": 0.5942632555961609, "learning_rate": 8.563909317754487e-05, "loss": 1.9297, "step": 4666 }, { "epoch": 0.2601304275124018, "grad_norm": 0.5638509392738342, "learning_rate": 8.563284730770221e-05, "loss": 1.9536, "step": 4667 }, { "epoch": 0.2601861657655649, "grad_norm": 0.5848171710968018, "learning_rate": 8.56266003077934e-05, "loss": 2.003, "step": 4668 }, { "epoch": 0.26024190401872804, "grad_norm": 0.5629677176475525, "learning_rate": 8.562035217801652e-05, "loss": 2.0024, "step": 4669 }, { "epoch": 0.26029764227189117, "grad_norm": 0.5268816351890564, "learning_rate": 8.561410291856977e-05, "loss": 1.5865, "step": 4670 }, { "epoch": 0.26035338052505436, "grad_norm": 0.545254647731781, "learning_rate": 8.560785252965131e-05, "loss": 1.7586, "step": 4671 }, { "epoch": 0.2604091187782175, "grad_norm": 0.5406084060668945, "learning_rate": 8.560160101145937e-05, "loss": 1.9274, "step": 4672 }, { "epoch": 0.2604648570313806, "grad_norm": 0.5519586801528931, "learning_rate": 8.559534836419224e-05, "loss": 1.7652, "step": 4673 }, { "epoch": 0.2605205952845438, "grad_norm": 0.5398983955383301, "learning_rate": 8.558909458804818e-05, "loss": 1.9096, "step": 4674 }, { "epoch": 0.26057633353770693, "grad_norm": 0.5414653420448303, "learning_rate": 8.558283968322555e-05, "loss": 1.6586, "step": 4675 }, { "epoch": 0.26063207179087006, "grad_norm": 0.5628217458724976, "learning_rate": 8.55765836499227e-05, "loss": 1.606, "step": 4676 }, { "epoch": 0.26068781004403324, "grad_norm": 0.5232682228088379, "learning_rate": 8.557032648833804e-05, "loss": 1.698, "step": 4677 }, { "epoch": 0.26074354829719637, "grad_norm": 0.588845431804657, "learning_rate": 8.556406819867001e-05, "loss": 1.9568, "step": 4678 }, { "epoch": 0.2607992865503595, "grad_norm": 0.5363548994064331, "learning_rate": 8.55578087811171e-05, "loss": 1.6827, "step": 4679 }, { "epoch": 0.2608550248035227, "grad_norm": 0.514584481716156, "learning_rate": 8.55515482358778e-05, "loss": 1.631, "step": 4680 }, { "epoch": 0.2609107630566858, "grad_norm": 0.5446624159812927, "learning_rate": 8.554528656315069e-05, "loss": 1.7978, "step": 4681 }, { "epoch": 0.26096650130984894, "grad_norm": 0.5160642266273499, "learning_rate": 8.55390237631343e-05, "loss": 1.4935, "step": 4682 }, { "epoch": 0.26102223956301207, "grad_norm": 0.5020194053649902, "learning_rate": 8.553275983602732e-05, "loss": 1.3459, "step": 4683 }, { "epoch": 0.26107797781617526, "grad_norm": 0.5197760462760925, "learning_rate": 8.552649478202834e-05, "loss": 1.8008, "step": 4684 }, { "epoch": 0.2611337160693384, "grad_norm": 0.5080288648605347, "learning_rate": 8.55202286013361e-05, "loss": 1.5853, "step": 4685 }, { "epoch": 0.2611894543225015, "grad_norm": 0.5232203602790833, "learning_rate": 8.551396129414928e-05, "loss": 1.7352, "step": 4686 }, { "epoch": 0.2612451925756647, "grad_norm": 0.5843389630317688, "learning_rate": 8.550769286066669e-05, "loss": 1.5833, "step": 4687 }, { "epoch": 0.2613009308288278, "grad_norm": 0.5756316184997559, "learning_rate": 8.55014233010871e-05, "loss": 1.8692, "step": 4688 }, { "epoch": 0.26135666908199096, "grad_norm": 0.5456770658493042, "learning_rate": 8.549515261560937e-05, "loss": 1.6987, "step": 4689 }, { "epoch": 0.26141240733515414, "grad_norm": 0.5343070030212402, "learning_rate": 8.548888080443231e-05, "loss": 1.4492, "step": 4690 }, { "epoch": 0.26146814558831727, "grad_norm": 0.546418309211731, "learning_rate": 8.54826078677549e-05, "loss": 1.7292, "step": 4691 }, { "epoch": 0.2615238838414804, "grad_norm": 0.5571802258491516, "learning_rate": 8.547633380577604e-05, "loss": 1.9054, "step": 4692 }, { "epoch": 0.2615796220946435, "grad_norm": 0.5529661774635315, "learning_rate": 8.54700586186947e-05, "loss": 1.8537, "step": 4693 }, { "epoch": 0.2616353603478067, "grad_norm": 0.5503031611442566, "learning_rate": 8.546378230670992e-05, "loss": 1.7507, "step": 4694 }, { "epoch": 0.26169109860096984, "grad_norm": 0.5290326476097107, "learning_rate": 8.545750487002073e-05, "loss": 1.5895, "step": 4695 }, { "epoch": 0.26174683685413297, "grad_norm": 0.5247073769569397, "learning_rate": 8.54512263088262e-05, "loss": 1.5736, "step": 4696 }, { "epoch": 0.26180257510729615, "grad_norm": 0.575093686580658, "learning_rate": 8.544494662332548e-05, "loss": 1.5192, "step": 4697 }, { "epoch": 0.2618583133604593, "grad_norm": 0.5360473990440369, "learning_rate": 8.543866581371771e-05, "loss": 1.7796, "step": 4698 }, { "epoch": 0.2619140516136224, "grad_norm": 0.5478860139846802, "learning_rate": 8.54323838802021e-05, "loss": 1.756, "step": 4699 }, { "epoch": 0.2619697898667856, "grad_norm": 0.5454539060592651, "learning_rate": 8.542610082297783e-05, "loss": 1.7589, "step": 4700 }, { "epoch": 0.2620255281199487, "grad_norm": 0.5187868475914001, "learning_rate": 8.541981664224421e-05, "loss": 1.5043, "step": 4701 }, { "epoch": 0.26208126637311185, "grad_norm": 0.5362755060195923, "learning_rate": 8.54135313382005e-05, "loss": 1.731, "step": 4702 }, { "epoch": 0.26213700462627504, "grad_norm": 0.5599364638328552, "learning_rate": 8.540724491104606e-05, "loss": 1.6976, "step": 4703 }, { "epoch": 0.26219274287943817, "grad_norm": 0.5924205183982849, "learning_rate": 8.540095736098026e-05, "loss": 1.8049, "step": 4704 }, { "epoch": 0.2622484811326013, "grad_norm": 0.5288107395172119, "learning_rate": 8.539466868820247e-05, "loss": 1.5834, "step": 4705 }, { "epoch": 0.2623042193857644, "grad_norm": 0.5498400330543518, "learning_rate": 8.538837889291218e-05, "loss": 1.6546, "step": 4706 }, { "epoch": 0.2623599576389276, "grad_norm": 0.5080811381340027, "learning_rate": 8.538208797530883e-05, "loss": 1.434, "step": 4707 }, { "epoch": 0.26241569589209074, "grad_norm": 0.5125556588172913, "learning_rate": 8.537579593559195e-05, "loss": 1.6628, "step": 4708 }, { "epoch": 0.26247143414525387, "grad_norm": 0.5489838123321533, "learning_rate": 8.536950277396106e-05, "loss": 1.5702, "step": 4709 }, { "epoch": 0.26252717239841705, "grad_norm": 0.5346508622169495, "learning_rate": 8.536320849061577e-05, "loss": 1.7829, "step": 4710 }, { "epoch": 0.2625829106515802, "grad_norm": 0.5648466944694519, "learning_rate": 8.535691308575569e-05, "loss": 1.8271, "step": 4711 }, { "epoch": 0.2626386489047433, "grad_norm": 0.5875536203384399, "learning_rate": 8.535061655958048e-05, "loss": 1.888, "step": 4712 }, { "epoch": 0.2626943871579065, "grad_norm": 0.5403586626052856, "learning_rate": 8.534431891228981e-05, "loss": 1.5633, "step": 4713 }, { "epoch": 0.2627501254110696, "grad_norm": 0.5541427135467529, "learning_rate": 8.533802014408341e-05, "loss": 1.7778, "step": 4714 }, { "epoch": 0.26280586366423275, "grad_norm": 0.5390727519989014, "learning_rate": 8.533172025516106e-05, "loss": 1.6732, "step": 4715 }, { "epoch": 0.2628616019173959, "grad_norm": 0.5591700077056885, "learning_rate": 8.532541924572254e-05, "loss": 1.7714, "step": 4716 }, { "epoch": 0.26291734017055907, "grad_norm": 0.5306904911994934, "learning_rate": 8.531911711596767e-05, "loss": 1.7311, "step": 4717 }, { "epoch": 0.2629730784237222, "grad_norm": 0.5665531158447266, "learning_rate": 8.531281386609633e-05, "loss": 1.684, "step": 4718 }, { "epoch": 0.2630288166768853, "grad_norm": 0.5404395461082458, "learning_rate": 8.530650949630844e-05, "loss": 1.7727, "step": 4719 }, { "epoch": 0.2630845549300485, "grad_norm": 0.5549681782722473, "learning_rate": 8.530020400680392e-05, "loss": 1.6802, "step": 4720 }, { "epoch": 0.26314029318321164, "grad_norm": 0.5529362559318542, "learning_rate": 8.529389739778272e-05, "loss": 1.6691, "step": 4721 }, { "epoch": 0.26319603143637477, "grad_norm": 0.5257294178009033, "learning_rate": 8.528758966944489e-05, "loss": 1.6649, "step": 4722 }, { "epoch": 0.26325176968953795, "grad_norm": 0.5499683022499084, "learning_rate": 8.528128082199046e-05, "loss": 1.8637, "step": 4723 }, { "epoch": 0.2633075079427011, "grad_norm": 0.5676036477088928, "learning_rate": 8.527497085561949e-05, "loss": 1.6409, "step": 4724 }, { "epoch": 0.2633632461958642, "grad_norm": 0.5784804821014404, "learning_rate": 8.526865977053211e-05, "loss": 1.8414, "step": 4725 }, { "epoch": 0.2634189844490274, "grad_norm": 0.592461884021759, "learning_rate": 8.52623475669285e-05, "loss": 1.725, "step": 4726 }, { "epoch": 0.2634747227021905, "grad_norm": 0.5251427888870239, "learning_rate": 8.52560342450088e-05, "loss": 1.5888, "step": 4727 }, { "epoch": 0.26353046095535365, "grad_norm": 0.5062176585197449, "learning_rate": 8.524971980497325e-05, "loss": 1.5588, "step": 4728 }, { "epoch": 0.2635861992085168, "grad_norm": 0.5686171054840088, "learning_rate": 8.524340424702211e-05, "loss": 1.6186, "step": 4729 }, { "epoch": 0.26364193746167996, "grad_norm": 0.5521769523620605, "learning_rate": 8.523708757135567e-05, "loss": 1.6917, "step": 4730 }, { "epoch": 0.2636976757148431, "grad_norm": 0.5489006042480469, "learning_rate": 8.523076977817426e-05, "loss": 1.8079, "step": 4731 }, { "epoch": 0.2637534139680062, "grad_norm": 0.5295306444168091, "learning_rate": 8.522445086767826e-05, "loss": 1.6814, "step": 4732 }, { "epoch": 0.2638091522211694, "grad_norm": 0.5596312284469604, "learning_rate": 8.521813084006802e-05, "loss": 1.7971, "step": 4733 }, { "epoch": 0.26386489047433254, "grad_norm": 0.535030722618103, "learning_rate": 8.5211809695544e-05, "loss": 1.6389, "step": 4734 }, { "epoch": 0.26392062872749567, "grad_norm": 0.5560666918754578, "learning_rate": 8.520548743430673e-05, "loss": 1.8107, "step": 4735 }, { "epoch": 0.26397636698065885, "grad_norm": 0.5749865770339966, "learning_rate": 8.51991640565566e-05, "loss": 1.7698, "step": 4736 }, { "epoch": 0.264032105233822, "grad_norm": 0.603252649307251, "learning_rate": 8.519283956249424e-05, "loss": 1.9701, "step": 4737 }, { "epoch": 0.2640878434869851, "grad_norm": 0.562053918838501, "learning_rate": 8.51865139523202e-05, "loss": 1.7033, "step": 4738 }, { "epoch": 0.26414358174014824, "grad_norm": 0.5553662776947021, "learning_rate": 8.518018722623509e-05, "loss": 1.6353, "step": 4739 }, { "epoch": 0.2641993199933114, "grad_norm": 0.5916672945022583, "learning_rate": 8.517385938443955e-05, "loss": 1.8496, "step": 4740 }, { "epoch": 0.26425505824647455, "grad_norm": 0.549395740032196, "learning_rate": 8.516753042713426e-05, "loss": 1.612, "step": 4741 }, { "epoch": 0.2643107964996377, "grad_norm": 0.5560966730117798, "learning_rate": 8.516120035451996e-05, "loss": 1.5978, "step": 4742 }, { "epoch": 0.26436653475280086, "grad_norm": 0.5934261679649353, "learning_rate": 8.515486916679738e-05, "loss": 1.9667, "step": 4743 }, { "epoch": 0.264422273005964, "grad_norm": 0.5441667437553406, "learning_rate": 8.514853686416732e-05, "loss": 1.639, "step": 4744 }, { "epoch": 0.2644780112591271, "grad_norm": 0.5780582427978516, "learning_rate": 8.51422034468306e-05, "loss": 1.6839, "step": 4745 }, { "epoch": 0.2645337495122903, "grad_norm": 0.5739880204200745, "learning_rate": 8.513586891498809e-05, "loss": 1.6927, "step": 4746 }, { "epoch": 0.26458948776545343, "grad_norm": 0.5097702145576477, "learning_rate": 8.512953326884066e-05, "loss": 1.5131, "step": 4747 }, { "epoch": 0.26464522601861656, "grad_norm": 0.5593822598457336, "learning_rate": 8.512319650858926e-05, "loss": 1.8373, "step": 4748 }, { "epoch": 0.26470096427177975, "grad_norm": 0.546627938747406, "learning_rate": 8.511685863443484e-05, "loss": 1.723, "step": 4749 }, { "epoch": 0.2647567025249429, "grad_norm": 0.5196560621261597, "learning_rate": 8.511051964657842e-05, "loss": 1.6108, "step": 4750 }, { "epoch": 0.264812440778106, "grad_norm": 0.548095166683197, "learning_rate": 8.510417954522102e-05, "loss": 1.6268, "step": 4751 }, { "epoch": 0.26486817903126914, "grad_norm": 0.5570634007453918, "learning_rate": 8.509783833056373e-05, "loss": 1.828, "step": 4752 }, { "epoch": 0.2649239172844323, "grad_norm": 0.5177022814750671, "learning_rate": 8.509149600280762e-05, "loss": 1.6537, "step": 4753 }, { "epoch": 0.26497965553759545, "grad_norm": 0.5529354810714722, "learning_rate": 8.508515256215389e-05, "loss": 1.6702, "step": 4754 }, { "epoch": 0.2650353937907586, "grad_norm": 0.6287319660186768, "learning_rate": 8.507880800880364e-05, "loss": 1.7545, "step": 4755 }, { "epoch": 0.26509113204392176, "grad_norm": 0.5878986716270447, "learning_rate": 8.507246234295814e-05, "loss": 1.9199, "step": 4756 }, { "epoch": 0.2651468702970849, "grad_norm": 0.560119092464447, "learning_rate": 8.506611556481862e-05, "loss": 1.645, "step": 4757 }, { "epoch": 0.265202608550248, "grad_norm": 0.5107282996177673, "learning_rate": 8.505976767458636e-05, "loss": 1.8503, "step": 4758 }, { "epoch": 0.2652583468034112, "grad_norm": 0.5514339208602905, "learning_rate": 8.50534186724627e-05, "loss": 1.6562, "step": 4759 }, { "epoch": 0.26531408505657433, "grad_norm": 0.541807234287262, "learning_rate": 8.504706855864897e-05, "loss": 1.7167, "step": 4760 }, { "epoch": 0.26536982330973746, "grad_norm": 0.5748420357704163, "learning_rate": 8.504071733334656e-05, "loss": 1.955, "step": 4761 }, { "epoch": 0.2654255615629006, "grad_norm": 0.5451623201370239, "learning_rate": 8.503436499675687e-05, "loss": 1.7336, "step": 4762 }, { "epoch": 0.2654812998160638, "grad_norm": 0.5036576986312866, "learning_rate": 8.502801154908142e-05, "loss": 1.7619, "step": 4763 }, { "epoch": 0.2655370380692269, "grad_norm": 0.5252074003219604, "learning_rate": 8.502165699052168e-05, "loss": 1.6425, "step": 4764 }, { "epoch": 0.26559277632239003, "grad_norm": 0.5452297925949097, "learning_rate": 8.501530132127915e-05, "loss": 1.5942, "step": 4765 }, { "epoch": 0.2656485145755532, "grad_norm": 0.5282885432243347, "learning_rate": 8.500894454155541e-05, "loss": 1.4847, "step": 4766 }, { "epoch": 0.26570425282871635, "grad_norm": 0.6032153367996216, "learning_rate": 8.500258665155207e-05, "loss": 1.8069, "step": 4767 }, { "epoch": 0.2657599910818795, "grad_norm": 0.6232243776321411, "learning_rate": 8.499622765147078e-05, "loss": 1.9243, "step": 4768 }, { "epoch": 0.26581572933504266, "grad_norm": 0.5226832032203674, "learning_rate": 8.498986754151316e-05, "loss": 1.5832, "step": 4769 }, { "epoch": 0.2658714675882058, "grad_norm": 0.653657853603363, "learning_rate": 8.498350632188097e-05, "loss": 1.7387, "step": 4770 }, { "epoch": 0.2659272058413689, "grad_norm": 0.6087796688079834, "learning_rate": 8.497714399277592e-05, "loss": 1.7853, "step": 4771 }, { "epoch": 0.2659829440945321, "grad_norm": 0.5050531029701233, "learning_rate": 8.49707805543998e-05, "loss": 1.4848, "step": 4772 }, { "epoch": 0.26603868234769523, "grad_norm": 0.5245751738548279, "learning_rate": 8.496441600695441e-05, "loss": 1.615, "step": 4773 }, { "epoch": 0.26609442060085836, "grad_norm": 0.5427295565605164, "learning_rate": 8.495805035064159e-05, "loss": 1.8508, "step": 4774 }, { "epoch": 0.2661501588540215, "grad_norm": 0.5052759647369385, "learning_rate": 8.495168358566325e-05, "loss": 1.6307, "step": 4775 }, { "epoch": 0.2662058971071847, "grad_norm": 0.5618288516998291, "learning_rate": 8.494531571222128e-05, "loss": 1.7516, "step": 4776 }, { "epoch": 0.2662616353603478, "grad_norm": 0.5743941068649292, "learning_rate": 8.493894673051765e-05, "loss": 1.9439, "step": 4777 }, { "epoch": 0.26631737361351093, "grad_norm": 0.5246620178222656, "learning_rate": 8.493257664075433e-05, "loss": 1.7159, "step": 4778 }, { "epoch": 0.2663731118666741, "grad_norm": 0.5409666895866394, "learning_rate": 8.492620544313335e-05, "loss": 1.6972, "step": 4779 }, { "epoch": 0.26642885011983725, "grad_norm": 0.5137554407119751, "learning_rate": 8.491983313785676e-05, "loss": 1.6285, "step": 4780 }, { "epoch": 0.2664845883730004, "grad_norm": 0.6102763414382935, "learning_rate": 8.491345972512668e-05, "loss": 1.7433, "step": 4781 }, { "epoch": 0.26654032662616356, "grad_norm": 0.6035791039466858, "learning_rate": 8.490708520514519e-05, "loss": 1.8665, "step": 4782 }, { "epoch": 0.2665960648793267, "grad_norm": 0.5769240856170654, "learning_rate": 8.490070957811449e-05, "loss": 1.7147, "step": 4783 }, { "epoch": 0.2666518031324898, "grad_norm": 0.5191882252693176, "learning_rate": 8.489433284423678e-05, "loss": 1.5935, "step": 4784 }, { "epoch": 0.26670754138565295, "grad_norm": 0.575363039970398, "learning_rate": 8.488795500371427e-05, "loss": 1.8616, "step": 4785 }, { "epoch": 0.26676327963881613, "grad_norm": 0.5380163788795471, "learning_rate": 8.488157605674925e-05, "loss": 1.5693, "step": 4786 }, { "epoch": 0.26681901789197926, "grad_norm": 0.5527309775352478, "learning_rate": 8.487519600354399e-05, "loss": 1.797, "step": 4787 }, { "epoch": 0.2668747561451424, "grad_norm": 0.5432277321815491, "learning_rate": 8.486881484430085e-05, "loss": 1.7024, "step": 4788 }, { "epoch": 0.2669304943983056, "grad_norm": 0.5643296837806702, "learning_rate": 8.486243257922221e-05, "loss": 1.6602, "step": 4789 }, { "epoch": 0.2669862326514687, "grad_norm": 0.5539331436157227, "learning_rate": 8.485604920851049e-05, "loss": 1.7195, "step": 4790 }, { "epoch": 0.26704197090463183, "grad_norm": 0.5279936790466309, "learning_rate": 8.48496647323681e-05, "loss": 1.6503, "step": 4791 }, { "epoch": 0.267097709157795, "grad_norm": 0.5447912812232971, "learning_rate": 8.484327915099752e-05, "loss": 1.7975, "step": 4792 }, { "epoch": 0.26715344741095814, "grad_norm": 0.6047879457473755, "learning_rate": 8.48368924646013e-05, "loss": 1.8362, "step": 4793 }, { "epoch": 0.2672091856641213, "grad_norm": 0.5555823445320129, "learning_rate": 8.483050467338194e-05, "loss": 1.7033, "step": 4794 }, { "epoch": 0.26726492391728446, "grad_norm": 0.5324097871780396, "learning_rate": 8.482411577754205e-05, "loss": 1.828, "step": 4795 }, { "epoch": 0.2673206621704476, "grad_norm": 0.5133151412010193, "learning_rate": 8.481772577728426e-05, "loss": 1.6922, "step": 4796 }, { "epoch": 0.2673764004236107, "grad_norm": 0.5466338396072388, "learning_rate": 8.48113346728112e-05, "loss": 1.7228, "step": 4797 }, { "epoch": 0.26743213867677385, "grad_norm": 0.5190402269363403, "learning_rate": 8.480494246432557e-05, "loss": 1.7192, "step": 4798 }, { "epoch": 0.26748787692993703, "grad_norm": 0.4959962069988251, "learning_rate": 8.47985491520301e-05, "loss": 1.5593, "step": 4799 }, { "epoch": 0.26754361518310016, "grad_norm": 0.5530042052268982, "learning_rate": 8.479215473612754e-05, "loss": 1.7545, "step": 4800 }, { "epoch": 0.2675993534362633, "grad_norm": 0.6360591650009155, "learning_rate": 8.478575921682066e-05, "loss": 1.9369, "step": 4801 }, { "epoch": 0.26765509168942647, "grad_norm": 0.5604984164237976, "learning_rate": 8.477936259431235e-05, "loss": 1.6485, "step": 4802 }, { "epoch": 0.2677108299425896, "grad_norm": 0.568709671497345, "learning_rate": 8.477296486880541e-05, "loss": 1.6459, "step": 4803 }, { "epoch": 0.26776656819575273, "grad_norm": 0.6228764653205872, "learning_rate": 8.476656604050277e-05, "loss": 1.8825, "step": 4804 }, { "epoch": 0.2678223064489159, "grad_norm": 0.5803889036178589, "learning_rate": 8.476016610960736e-05, "loss": 1.8011, "step": 4805 }, { "epoch": 0.26787804470207904, "grad_norm": 0.5778336524963379, "learning_rate": 8.475376507632215e-05, "loss": 1.726, "step": 4806 }, { "epoch": 0.2679337829552422, "grad_norm": 0.5755890011787415, "learning_rate": 8.474736294085014e-05, "loss": 1.6394, "step": 4807 }, { "epoch": 0.2679895212084053, "grad_norm": 0.5545676350593567, "learning_rate": 8.474095970339436e-05, "loss": 1.7973, "step": 4808 }, { "epoch": 0.2680452594615685, "grad_norm": 0.5003368854522705, "learning_rate": 8.473455536415789e-05, "loss": 1.6653, "step": 4809 }, { "epoch": 0.2681009977147316, "grad_norm": 0.5292695164680481, "learning_rate": 8.472814992334386e-05, "loss": 1.7463, "step": 4810 }, { "epoch": 0.26815673596789474, "grad_norm": 0.604960560798645, "learning_rate": 8.472174338115537e-05, "loss": 1.9016, "step": 4811 }, { "epoch": 0.26821247422105793, "grad_norm": 0.5484800338745117, "learning_rate": 8.471533573779564e-05, "loss": 1.6117, "step": 4812 }, { "epoch": 0.26826821247422106, "grad_norm": 0.5383596420288086, "learning_rate": 8.470892699346786e-05, "loss": 1.6871, "step": 4813 }, { "epoch": 0.2683239507273842, "grad_norm": 0.5479928851127625, "learning_rate": 8.470251714837529e-05, "loss": 1.7255, "step": 4814 }, { "epoch": 0.26837968898054737, "grad_norm": 0.5112576484680176, "learning_rate": 8.46961062027212e-05, "loss": 1.414, "step": 4815 }, { "epoch": 0.2684354272337105, "grad_norm": 0.547825038433075, "learning_rate": 8.46896941567089e-05, "loss": 1.835, "step": 4816 }, { "epoch": 0.26849116548687363, "grad_norm": 0.5121808648109436, "learning_rate": 8.468328101054177e-05, "loss": 1.5269, "step": 4817 }, { "epoch": 0.2685469037400368, "grad_norm": 0.5761928558349609, "learning_rate": 8.467686676442318e-05, "loss": 1.7195, "step": 4818 }, { "epoch": 0.26860264199319994, "grad_norm": 0.547089159488678, "learning_rate": 8.467045141855656e-05, "loss": 1.6714, "step": 4819 }, { "epoch": 0.26865838024636307, "grad_norm": 0.5228059887886047, "learning_rate": 8.466403497314537e-05, "loss": 1.6444, "step": 4820 }, { "epoch": 0.2687141184995262, "grad_norm": 0.5589326620101929, "learning_rate": 8.465761742839307e-05, "loss": 1.9121, "step": 4821 }, { "epoch": 0.2687698567526894, "grad_norm": 0.5607814192771912, "learning_rate": 8.465119878450324e-05, "loss": 1.8351, "step": 4822 }, { "epoch": 0.2688255950058525, "grad_norm": 0.591454029083252, "learning_rate": 8.46447790416794e-05, "loss": 1.8308, "step": 4823 }, { "epoch": 0.26888133325901564, "grad_norm": 0.5167153477668762, "learning_rate": 8.463835820012517e-05, "loss": 1.6928, "step": 4824 }, { "epoch": 0.2689370715121788, "grad_norm": 0.5741368532180786, "learning_rate": 8.463193626004418e-05, "loss": 1.8407, "step": 4825 }, { "epoch": 0.26899280976534196, "grad_norm": 0.563448965549469, "learning_rate": 8.462551322164007e-05, "loss": 1.7246, "step": 4826 }, { "epoch": 0.2690485480185051, "grad_norm": 0.5690648555755615, "learning_rate": 8.461908908511657e-05, "loss": 1.7408, "step": 4827 }, { "epoch": 0.26910428627166827, "grad_norm": 0.5448554754257202, "learning_rate": 8.461266385067741e-05, "loss": 1.6012, "step": 4828 }, { "epoch": 0.2691600245248314, "grad_norm": 0.5054116249084473, "learning_rate": 8.460623751852637e-05, "loss": 1.6175, "step": 4829 }, { "epoch": 0.2692157627779945, "grad_norm": 0.5798751711845398, "learning_rate": 8.459981008886721e-05, "loss": 1.7742, "step": 4830 }, { "epoch": 0.26927150103115766, "grad_norm": 0.5339779257774353, "learning_rate": 8.459338156190384e-05, "loss": 1.6737, "step": 4831 }, { "epoch": 0.26932723928432084, "grad_norm": 0.5387359261512756, "learning_rate": 8.45869519378401e-05, "loss": 1.6606, "step": 4832 }, { "epoch": 0.26938297753748397, "grad_norm": 0.646202802658081, "learning_rate": 8.458052121687987e-05, "loss": 1.9741, "step": 4833 }, { "epoch": 0.2694387157906471, "grad_norm": 0.5640881061553955, "learning_rate": 8.457408939922715e-05, "loss": 1.7103, "step": 4834 }, { "epoch": 0.2694944540438103, "grad_norm": 0.567292332649231, "learning_rate": 8.456765648508589e-05, "loss": 1.7605, "step": 4835 }, { "epoch": 0.2695501922969734, "grad_norm": 0.6057398319244385, "learning_rate": 8.456122247466009e-05, "loss": 1.6074, "step": 4836 }, { "epoch": 0.26960593055013654, "grad_norm": 0.6216564178466797, "learning_rate": 8.455478736815385e-05, "loss": 1.6341, "step": 4837 }, { "epoch": 0.2696616688032997, "grad_norm": 0.53920978307724, "learning_rate": 8.454835116577122e-05, "loss": 1.792, "step": 4838 }, { "epoch": 0.26971740705646285, "grad_norm": 0.5827376842498779, "learning_rate": 8.45419138677163e-05, "loss": 1.5826, "step": 4839 }, { "epoch": 0.269773145309626, "grad_norm": 0.5303118228912354, "learning_rate": 8.453547547419329e-05, "loss": 1.7387, "step": 4840 }, { "epoch": 0.26982888356278917, "grad_norm": 0.5183376669883728, "learning_rate": 8.452903598540634e-05, "loss": 1.532, "step": 4841 }, { "epoch": 0.2698846218159523, "grad_norm": 0.5537537336349487, "learning_rate": 8.452259540155968e-05, "loss": 1.7955, "step": 4842 }, { "epoch": 0.2699403600691154, "grad_norm": 0.5679836273193359, "learning_rate": 8.451615372285758e-05, "loss": 1.7329, "step": 4843 }, { "epoch": 0.26999609832227855, "grad_norm": 0.5696743726730347, "learning_rate": 8.450971094950433e-05, "loss": 1.7294, "step": 4844 }, { "epoch": 0.27005183657544174, "grad_norm": 0.5818564295768738, "learning_rate": 8.450326708170426e-05, "loss": 2.0301, "step": 4845 }, { "epoch": 0.27010757482860487, "grad_norm": 0.5044540762901306, "learning_rate": 8.449682211966172e-05, "loss": 1.5171, "step": 4846 }, { "epoch": 0.270163313081768, "grad_norm": 0.5692309141159058, "learning_rate": 8.449037606358111e-05, "loss": 1.776, "step": 4847 }, { "epoch": 0.2702190513349312, "grad_norm": 0.5652437210083008, "learning_rate": 8.448392891366688e-05, "loss": 1.8956, "step": 4848 }, { "epoch": 0.2702747895880943, "grad_norm": 0.5531434416770935, "learning_rate": 8.447748067012345e-05, "loss": 1.7156, "step": 4849 }, { "epoch": 0.27033052784125744, "grad_norm": 0.5418469309806824, "learning_rate": 8.447103133315537e-05, "loss": 1.6983, "step": 4850 }, { "epoch": 0.2703862660944206, "grad_norm": 0.5276792049407959, "learning_rate": 8.446458090296716e-05, "loss": 1.6147, "step": 4851 }, { "epoch": 0.27044200434758375, "grad_norm": 0.5772181749343872, "learning_rate": 8.445812937976338e-05, "loss": 1.677, "step": 4852 }, { "epoch": 0.2704977426007469, "grad_norm": 0.5323836803436279, "learning_rate": 8.445167676374865e-05, "loss": 1.4833, "step": 4853 }, { "epoch": 0.27055348085391, "grad_norm": 0.5478299260139465, "learning_rate": 8.444522305512757e-05, "loss": 1.5832, "step": 4854 }, { "epoch": 0.2706092191070732, "grad_norm": 0.5325939655303955, "learning_rate": 8.443876825410488e-05, "loss": 1.4971, "step": 4855 }, { "epoch": 0.2706649573602363, "grad_norm": 0.5912976861000061, "learning_rate": 8.443231236088524e-05, "loss": 1.7624, "step": 4856 }, { "epoch": 0.27072069561339945, "grad_norm": 0.5368456244468689, "learning_rate": 8.44258553756734e-05, "loss": 1.5509, "step": 4857 }, { "epoch": 0.27077643386656264, "grad_norm": 0.5713909864425659, "learning_rate": 8.441939729867415e-05, "loss": 1.8286, "step": 4858 }, { "epoch": 0.27083217211972577, "grad_norm": 0.5259481072425842, "learning_rate": 8.44129381300923e-05, "loss": 1.7291, "step": 4859 }, { "epoch": 0.2708879103728889, "grad_norm": 0.5365427136421204, "learning_rate": 8.440647787013268e-05, "loss": 1.6051, "step": 4860 }, { "epoch": 0.2709436486260521, "grad_norm": 0.5223046541213989, "learning_rate": 8.44000165190002e-05, "loss": 1.5241, "step": 4861 }, { "epoch": 0.2709993868792152, "grad_norm": 0.5721556544303894, "learning_rate": 8.439355407689975e-05, "loss": 1.8138, "step": 4862 }, { "epoch": 0.27105512513237834, "grad_norm": 0.527158260345459, "learning_rate": 8.43870905440363e-05, "loss": 1.5114, "step": 4863 }, { "epoch": 0.2711108633855415, "grad_norm": 0.5364054441452026, "learning_rate": 8.438062592061485e-05, "loss": 1.5331, "step": 4864 }, { "epoch": 0.27116660163870465, "grad_norm": 0.5465856790542603, "learning_rate": 8.437416020684036e-05, "loss": 1.5122, "step": 4865 }, { "epoch": 0.2712223398918678, "grad_norm": 0.5655773282051086, "learning_rate": 8.436769340291794e-05, "loss": 1.8776, "step": 4866 }, { "epoch": 0.2712780781450309, "grad_norm": 0.5278435349464417, "learning_rate": 8.436122550905266e-05, "loss": 1.6388, "step": 4867 }, { "epoch": 0.2713338163981941, "grad_norm": 0.5141345262527466, "learning_rate": 8.435475652544967e-05, "loss": 1.5203, "step": 4868 }, { "epoch": 0.2713895546513572, "grad_norm": 0.5731988549232483, "learning_rate": 8.434828645231407e-05, "loss": 1.8796, "step": 4869 }, { "epoch": 0.27144529290452035, "grad_norm": 0.5262272357940674, "learning_rate": 8.434181528985112e-05, "loss": 1.711, "step": 4870 }, { "epoch": 0.27150103115768354, "grad_norm": 0.5410183668136597, "learning_rate": 8.4335343038266e-05, "loss": 1.5739, "step": 4871 }, { "epoch": 0.27155676941084667, "grad_norm": 0.5376774072647095, "learning_rate": 8.432886969776398e-05, "loss": 1.7037, "step": 4872 }, { "epoch": 0.2716125076640098, "grad_norm": 0.4998942017555237, "learning_rate": 8.432239526855036e-05, "loss": 1.566, "step": 4873 }, { "epoch": 0.271668245917173, "grad_norm": 0.562468945980072, "learning_rate": 8.431591975083049e-05, "loss": 1.7742, "step": 4874 }, { "epoch": 0.2717239841703361, "grad_norm": 0.5608972907066345, "learning_rate": 8.430944314480973e-05, "loss": 1.7467, "step": 4875 }, { "epoch": 0.27177972242349924, "grad_norm": 0.6075250506401062, "learning_rate": 8.430296545069345e-05, "loss": 1.5414, "step": 4876 }, { "epoch": 0.27183546067666237, "grad_norm": 0.5488311052322388, "learning_rate": 8.429648666868713e-05, "loss": 1.7401, "step": 4877 }, { "epoch": 0.27189119892982555, "grad_norm": 0.5740364193916321, "learning_rate": 8.429000679899619e-05, "loss": 1.6739, "step": 4878 }, { "epoch": 0.2719469371829887, "grad_norm": 0.5271220207214355, "learning_rate": 8.428352584182617e-05, "loss": 1.6982, "step": 4879 }, { "epoch": 0.2720026754361518, "grad_norm": 0.5354405045509338, "learning_rate": 8.42770437973826e-05, "loss": 1.6927, "step": 4880 }, { "epoch": 0.272058413689315, "grad_norm": 0.569052517414093, "learning_rate": 8.427056066587105e-05, "loss": 1.6674, "step": 4881 }, { "epoch": 0.2721141519424781, "grad_norm": 0.5651227831840515, "learning_rate": 8.426407644749711e-05, "loss": 1.8356, "step": 4882 }, { "epoch": 0.27216989019564125, "grad_norm": 0.5364747643470764, "learning_rate": 8.425759114246647e-05, "loss": 1.749, "step": 4883 }, { "epoch": 0.27222562844880444, "grad_norm": 0.48416903614997864, "learning_rate": 8.425110475098476e-05, "loss": 1.4771, "step": 4884 }, { "epoch": 0.27228136670196756, "grad_norm": 0.5686883926391602, "learning_rate": 8.42446172732577e-05, "loss": 1.6603, "step": 4885 }, { "epoch": 0.2723371049551307, "grad_norm": 0.5875502824783325, "learning_rate": 8.423812870949104e-05, "loss": 1.8797, "step": 4886 }, { "epoch": 0.2723928432082939, "grad_norm": 0.5201019644737244, "learning_rate": 8.423163905989055e-05, "loss": 1.649, "step": 4887 }, { "epoch": 0.272448581461457, "grad_norm": 0.566376268863678, "learning_rate": 8.422514832466206e-05, "loss": 1.7182, "step": 4888 }, { "epoch": 0.27250431971462014, "grad_norm": 0.5158393979072571, "learning_rate": 8.421865650401143e-05, "loss": 1.6317, "step": 4889 }, { "epoch": 0.27256005796778326, "grad_norm": 0.5439308881759644, "learning_rate": 8.421216359814451e-05, "loss": 1.7071, "step": 4890 }, { "epoch": 0.27261579622094645, "grad_norm": 0.5321268439292908, "learning_rate": 8.420566960726723e-05, "loss": 1.6561, "step": 4891 }, { "epoch": 0.2726715344741096, "grad_norm": 0.4758521616458893, "learning_rate": 8.419917453158554e-05, "loss": 1.5538, "step": 4892 }, { "epoch": 0.2727272727272727, "grad_norm": 0.4964730441570282, "learning_rate": 8.419267837130544e-05, "loss": 1.5957, "step": 4893 }, { "epoch": 0.2727830109804359, "grad_norm": 0.555168628692627, "learning_rate": 8.418618112663292e-05, "loss": 1.6552, "step": 4894 }, { "epoch": 0.272838749233599, "grad_norm": 0.5903061032295227, "learning_rate": 8.417968279777409e-05, "loss": 1.8649, "step": 4895 }, { "epoch": 0.27289448748676215, "grad_norm": 0.584933876991272, "learning_rate": 8.417318338493497e-05, "loss": 1.8317, "step": 4896 }, { "epoch": 0.27295022573992533, "grad_norm": 0.6088751554489136, "learning_rate": 8.416668288832173e-05, "loss": 1.775, "step": 4897 }, { "epoch": 0.27300596399308846, "grad_norm": 0.6300697326660156, "learning_rate": 8.41601813081405e-05, "loss": 1.9256, "step": 4898 }, { "epoch": 0.2730617022462516, "grad_norm": 0.5516534447669983, "learning_rate": 8.415367864459751e-05, "loss": 1.6553, "step": 4899 }, { "epoch": 0.2731174404994147, "grad_norm": 0.5985352993011475, "learning_rate": 8.414717489789894e-05, "loss": 1.8121, "step": 4900 }, { "epoch": 0.2731731787525779, "grad_norm": 0.5280508399009705, "learning_rate": 8.414067006825108e-05, "loss": 1.657, "step": 4901 }, { "epoch": 0.27322891700574103, "grad_norm": 0.6586048007011414, "learning_rate": 8.413416415586024e-05, "loss": 2.2447, "step": 4902 }, { "epoch": 0.27328465525890416, "grad_norm": 0.5527061223983765, "learning_rate": 8.412765716093272e-05, "loss": 1.5666, "step": 4903 }, { "epoch": 0.27334039351206735, "grad_norm": 0.5549877882003784, "learning_rate": 8.412114908367488e-05, "loss": 1.5972, "step": 4904 }, { "epoch": 0.2733961317652305, "grad_norm": 0.5879062414169312, "learning_rate": 8.411463992429314e-05, "loss": 1.8609, "step": 4905 }, { "epoch": 0.2734518700183936, "grad_norm": 0.5397518873214722, "learning_rate": 8.41081296829939e-05, "loss": 1.8211, "step": 4906 }, { "epoch": 0.2735076082715568, "grad_norm": 0.5364968776702881, "learning_rate": 8.410161835998369e-05, "loss": 1.7879, "step": 4907 }, { "epoch": 0.2735633465247199, "grad_norm": 0.5714520215988159, "learning_rate": 8.409510595546894e-05, "loss": 1.9543, "step": 4908 }, { "epoch": 0.27361908477788305, "grad_norm": 0.5671858787536621, "learning_rate": 8.408859246965623e-05, "loss": 1.8165, "step": 4909 }, { "epoch": 0.27367482303104623, "grad_norm": 0.6034393906593323, "learning_rate": 8.408207790275213e-05, "loss": 1.8084, "step": 4910 }, { "epoch": 0.27373056128420936, "grad_norm": 0.5954535007476807, "learning_rate": 8.407556225496322e-05, "loss": 1.782, "step": 4911 }, { "epoch": 0.2737862995373725, "grad_norm": 0.5597085952758789, "learning_rate": 8.406904552649614e-05, "loss": 1.7673, "step": 4912 }, { "epoch": 0.2738420377905356, "grad_norm": 0.7730258107185364, "learning_rate": 8.406252771755758e-05, "loss": 1.9742, "step": 4913 }, { "epoch": 0.2738977760436988, "grad_norm": 0.5349806547164917, "learning_rate": 8.405600882835425e-05, "loss": 1.6226, "step": 4914 }, { "epoch": 0.27395351429686193, "grad_norm": 0.5271722674369812, "learning_rate": 8.404948885909288e-05, "loss": 1.7948, "step": 4915 }, { "epoch": 0.27400925255002506, "grad_norm": 0.6604454517364502, "learning_rate": 8.404296780998022e-05, "loss": 1.5653, "step": 4916 }, { "epoch": 0.27406499080318825, "grad_norm": 0.5219733119010925, "learning_rate": 8.403644568122313e-05, "loss": 1.6596, "step": 4917 }, { "epoch": 0.2741207290563514, "grad_norm": 0.5320934653282166, "learning_rate": 8.402992247302842e-05, "loss": 1.7119, "step": 4918 }, { "epoch": 0.2741764673095145, "grad_norm": 0.5232207179069519, "learning_rate": 8.402339818560296e-05, "loss": 1.7161, "step": 4919 }, { "epoch": 0.2742322055626777, "grad_norm": 0.5363631844520569, "learning_rate": 8.401687281915371e-05, "loss": 1.7174, "step": 4920 }, { "epoch": 0.2742879438158408, "grad_norm": 0.5237067937850952, "learning_rate": 8.401034637388758e-05, "loss": 1.5517, "step": 4921 }, { "epoch": 0.27434368206900395, "grad_norm": 0.5529504418373108, "learning_rate": 8.400381885001155e-05, "loss": 1.7067, "step": 4922 }, { "epoch": 0.2743994203221671, "grad_norm": 0.5712334513664246, "learning_rate": 8.399729024773264e-05, "loss": 1.7333, "step": 4923 }, { "epoch": 0.27445515857533026, "grad_norm": 0.5530427098274231, "learning_rate": 8.39907605672579e-05, "loss": 1.7721, "step": 4924 }, { "epoch": 0.2745108968284934, "grad_norm": 0.5096892714500427, "learning_rate": 8.398422980879442e-05, "loss": 1.5788, "step": 4925 }, { "epoch": 0.2745666350816565, "grad_norm": 0.5875157713890076, "learning_rate": 8.39776979725493e-05, "loss": 1.7782, "step": 4926 }, { "epoch": 0.2746223733348197, "grad_norm": 0.5620753169059753, "learning_rate": 8.397116505872973e-05, "loss": 1.6911, "step": 4927 }, { "epoch": 0.27467811158798283, "grad_norm": 0.5037546157836914, "learning_rate": 8.396463106754285e-05, "loss": 1.7944, "step": 4928 }, { "epoch": 0.27473384984114596, "grad_norm": 0.5311979055404663, "learning_rate": 8.395809599919591e-05, "loss": 1.8542, "step": 4929 }, { "epoch": 0.27478958809430915, "grad_norm": 0.5294662714004517, "learning_rate": 8.395155985389615e-05, "loss": 1.582, "step": 4930 }, { "epoch": 0.2748453263474723, "grad_norm": 0.5880303382873535, "learning_rate": 8.394502263185087e-05, "loss": 1.8807, "step": 4931 }, { "epoch": 0.2749010646006354, "grad_norm": 0.5946251153945923, "learning_rate": 8.393848433326736e-05, "loss": 1.8139, "step": 4932 }, { "epoch": 0.2749568028537986, "grad_norm": 0.5572118759155273, "learning_rate": 8.393194495835304e-05, "loss": 1.9141, "step": 4933 }, { "epoch": 0.2750125411069617, "grad_norm": 0.5573039054870605, "learning_rate": 8.392540450731522e-05, "loss": 1.7951, "step": 4934 }, { "epoch": 0.27506827936012485, "grad_norm": 0.540758490562439, "learning_rate": 8.39188629803614e-05, "loss": 1.7804, "step": 4935 }, { "epoch": 0.275124017613288, "grad_norm": 0.5271297693252563, "learning_rate": 8.3912320377699e-05, "loss": 1.82, "step": 4936 }, { "epoch": 0.27517975586645116, "grad_norm": 0.5359855890274048, "learning_rate": 8.390577669953552e-05, "loss": 1.7678, "step": 4937 }, { "epoch": 0.2752354941196143, "grad_norm": 0.5025729537010193, "learning_rate": 8.389923194607849e-05, "loss": 1.5144, "step": 4938 }, { "epoch": 0.2752912323727774, "grad_norm": 0.5402054190635681, "learning_rate": 8.389268611753546e-05, "loss": 1.6204, "step": 4939 }, { "epoch": 0.2753469706259406, "grad_norm": 0.5499907732009888, "learning_rate": 8.388613921411404e-05, "loss": 1.6948, "step": 4940 }, { "epoch": 0.27540270887910373, "grad_norm": 0.6044038534164429, "learning_rate": 8.387959123602185e-05, "loss": 1.5522, "step": 4941 }, { "epoch": 0.27545844713226686, "grad_norm": 0.5463374853134155, "learning_rate": 8.387304218346656e-05, "loss": 1.6392, "step": 4942 }, { "epoch": 0.27551418538543004, "grad_norm": 0.5164476633071899, "learning_rate": 8.386649205665586e-05, "loss": 1.674, "step": 4943 }, { "epoch": 0.2755699236385932, "grad_norm": 0.6093559861183167, "learning_rate": 8.385994085579751e-05, "loss": 2.0767, "step": 4944 }, { "epoch": 0.2756256618917563, "grad_norm": 0.5542387366294861, "learning_rate": 8.385338858109922e-05, "loss": 1.8275, "step": 4945 }, { "epoch": 0.27568140014491943, "grad_norm": 0.5787892937660217, "learning_rate": 8.384683523276885e-05, "loss": 1.5918, "step": 4946 }, { "epoch": 0.2757371383980826, "grad_norm": 0.5294553637504578, "learning_rate": 8.38402808110142e-05, "loss": 1.6857, "step": 4947 }, { "epoch": 0.27579287665124574, "grad_norm": 0.5397957563400269, "learning_rate": 8.383372531604314e-05, "loss": 1.6894, "step": 4948 }, { "epoch": 0.2758486149044089, "grad_norm": 0.5266357660293579, "learning_rate": 8.382716874806357e-05, "loss": 1.7214, "step": 4949 }, { "epoch": 0.27590435315757206, "grad_norm": 0.5046342611312866, "learning_rate": 8.382061110728345e-05, "loss": 1.4341, "step": 4950 }, { "epoch": 0.2759600914107352, "grad_norm": 0.5609323382377625, "learning_rate": 8.381405239391074e-05, "loss": 1.7528, "step": 4951 }, { "epoch": 0.2760158296638983, "grad_norm": 0.5804145336151123, "learning_rate": 8.38074926081534e-05, "loss": 1.8709, "step": 4952 }, { "epoch": 0.2760715679170615, "grad_norm": 0.5542110204696655, "learning_rate": 8.380093175021953e-05, "loss": 1.8472, "step": 4953 }, { "epoch": 0.27612730617022463, "grad_norm": 0.5371457934379578, "learning_rate": 8.379436982031718e-05, "loss": 1.5508, "step": 4954 }, { "epoch": 0.27618304442338776, "grad_norm": 0.6307567358016968, "learning_rate": 8.378780681865445e-05, "loss": 1.7762, "step": 4955 }, { "epoch": 0.27623878267655094, "grad_norm": 0.6115426421165466, "learning_rate": 8.37812427454395e-05, "loss": 1.8666, "step": 4956 }, { "epoch": 0.27629452092971407, "grad_norm": 0.5419024229049683, "learning_rate": 8.377467760088046e-05, "loss": 1.6681, "step": 4957 }, { "epoch": 0.2763502591828772, "grad_norm": 0.5587498545646667, "learning_rate": 8.376811138518558e-05, "loss": 1.8999, "step": 4958 }, { "epoch": 0.27640599743604033, "grad_norm": 0.6416218876838684, "learning_rate": 8.376154409856309e-05, "loss": 2.1091, "step": 4959 }, { "epoch": 0.2764617356892035, "grad_norm": 0.5992975234985352, "learning_rate": 8.375497574122127e-05, "loss": 1.837, "step": 4960 }, { "epoch": 0.27651747394236664, "grad_norm": 0.5807574987411499, "learning_rate": 8.374840631336842e-05, "loss": 1.643, "step": 4961 }, { "epoch": 0.27657321219552977, "grad_norm": 0.5473943948745728, "learning_rate": 8.374183581521288e-05, "loss": 1.6044, "step": 4962 }, { "epoch": 0.27662895044869296, "grad_norm": 0.5294444561004639, "learning_rate": 8.373526424696305e-05, "loss": 1.7088, "step": 4963 }, { "epoch": 0.2766846887018561, "grad_norm": 0.5424871444702148, "learning_rate": 8.372869160882733e-05, "loss": 1.5888, "step": 4964 }, { "epoch": 0.2767404269550192, "grad_norm": 0.5405928492546082, "learning_rate": 8.372211790101414e-05, "loss": 1.6905, "step": 4965 }, { "epoch": 0.2767961652081824, "grad_norm": 0.5668782591819763, "learning_rate": 8.3715543123732e-05, "loss": 1.7584, "step": 4966 }, { "epoch": 0.2768519034613455, "grad_norm": 0.586342990398407, "learning_rate": 8.370896727718942e-05, "loss": 1.7863, "step": 4967 }, { "epoch": 0.27690764171450866, "grad_norm": 0.6017349362373352, "learning_rate": 8.370239036159493e-05, "loss": 1.8825, "step": 4968 }, { "epoch": 0.2769633799676718, "grad_norm": 0.5821561813354492, "learning_rate": 8.36958123771571e-05, "loss": 1.9587, "step": 4969 }, { "epoch": 0.27701911822083497, "grad_norm": 0.5764045119285583, "learning_rate": 8.368923332408459e-05, "loss": 1.8635, "step": 4970 }, { "epoch": 0.2770748564739981, "grad_norm": 0.595043957233429, "learning_rate": 8.368265320258598e-05, "loss": 1.7843, "step": 4971 }, { "epoch": 0.27713059472716123, "grad_norm": 0.5718355774879456, "learning_rate": 8.367607201287002e-05, "loss": 1.6231, "step": 4972 }, { "epoch": 0.2771863329803244, "grad_norm": 0.5044475793838501, "learning_rate": 8.366948975514539e-05, "loss": 1.5014, "step": 4973 }, { "epoch": 0.27724207123348754, "grad_norm": 0.5001023411750793, "learning_rate": 8.366290642962087e-05, "loss": 1.522, "step": 4974 }, { "epoch": 0.27729780948665067, "grad_norm": 0.7615741491317749, "learning_rate": 8.36563220365052e-05, "loss": 1.5344, "step": 4975 }, { "epoch": 0.27735354773981385, "grad_norm": 0.47964903712272644, "learning_rate": 8.364973657600724e-05, "loss": 1.4201, "step": 4976 }, { "epoch": 0.277409285992977, "grad_norm": 0.5713698863983154, "learning_rate": 8.364315004833583e-05, "loss": 1.7664, "step": 4977 }, { "epoch": 0.2774650242461401, "grad_norm": 0.5541187524795532, "learning_rate": 8.363656245369984e-05, "loss": 1.75, "step": 4978 }, { "epoch": 0.2775207624993033, "grad_norm": 0.543755054473877, "learning_rate": 8.362997379230822e-05, "loss": 1.6432, "step": 4979 }, { "epoch": 0.2775765007524664, "grad_norm": 0.5810009241104126, "learning_rate": 8.36233840643699e-05, "loss": 1.948, "step": 4980 }, { "epoch": 0.27763223900562956, "grad_norm": 0.5693858861923218, "learning_rate": 8.361679327009388e-05, "loss": 1.8148, "step": 4981 }, { "epoch": 0.2776879772587927, "grad_norm": 0.5942829251289368, "learning_rate": 8.361020140968919e-05, "loss": 1.9087, "step": 4982 }, { "epoch": 0.27774371551195587, "grad_norm": 0.548213541507721, "learning_rate": 8.360360848336484e-05, "loss": 1.7628, "step": 4983 }, { "epoch": 0.277799453765119, "grad_norm": 0.5708996057510376, "learning_rate": 8.359701449132998e-05, "loss": 1.8127, "step": 4984 }, { "epoch": 0.2778551920182821, "grad_norm": 0.5608772039413452, "learning_rate": 8.359041943379369e-05, "loss": 1.5508, "step": 4985 }, { "epoch": 0.2779109302714453, "grad_norm": 0.5337716937065125, "learning_rate": 8.358382331096514e-05, "loss": 1.6666, "step": 4986 }, { "epoch": 0.27796666852460844, "grad_norm": 0.5663906335830688, "learning_rate": 8.357722612305353e-05, "loss": 1.8808, "step": 4987 }, { "epoch": 0.27802240677777157, "grad_norm": 0.5678949952125549, "learning_rate": 8.357062787026805e-05, "loss": 1.7122, "step": 4988 }, { "epoch": 0.27807814503093475, "grad_norm": 0.5173599720001221, "learning_rate": 8.356402855281802e-05, "loss": 1.6552, "step": 4989 }, { "epoch": 0.2781338832840979, "grad_norm": 0.5319927334785461, "learning_rate": 8.355742817091268e-05, "loss": 1.4913, "step": 4990 }, { "epoch": 0.278189621537261, "grad_norm": 0.5666325092315674, "learning_rate": 8.355082672476136e-05, "loss": 1.7334, "step": 4991 }, { "epoch": 0.27824535979042414, "grad_norm": 0.6288278698921204, "learning_rate": 8.354422421457346e-05, "loss": 2.005, "step": 4992 }, { "epoch": 0.2783010980435873, "grad_norm": 0.4918287992477417, "learning_rate": 8.353762064055833e-05, "loss": 1.6484, "step": 4993 }, { "epoch": 0.27835683629675045, "grad_norm": 0.6033855676651001, "learning_rate": 8.353101600292541e-05, "loss": 1.7403, "step": 4994 }, { "epoch": 0.2784125745499136, "grad_norm": 0.5309021472930908, "learning_rate": 8.352441030188417e-05, "loss": 1.6779, "step": 4995 }, { "epoch": 0.27846831280307677, "grad_norm": 0.5141871571540833, "learning_rate": 8.351780353764408e-05, "loss": 1.7298, "step": 4996 }, { "epoch": 0.2785240510562399, "grad_norm": 0.5200504064559937, "learning_rate": 8.351119571041468e-05, "loss": 1.594, "step": 4997 }, { "epoch": 0.278579789309403, "grad_norm": 0.5325762033462524, "learning_rate": 8.350458682040556e-05, "loss": 1.7623, "step": 4998 }, { "epoch": 0.2786355275625662, "grad_norm": 0.539318859577179, "learning_rate": 8.349797686782627e-05, "loss": 1.6779, "step": 4999 }, { "epoch": 0.27869126581572934, "grad_norm": 0.5733089447021484, "learning_rate": 8.349136585288648e-05, "loss": 1.8159, "step": 5000 }, { "epoch": 0.27874700406889247, "grad_norm": 0.5516615509986877, "learning_rate": 8.348475377579583e-05, "loss": 1.6049, "step": 5001 }, { "epoch": 0.27880274232205565, "grad_norm": 0.5449507236480713, "learning_rate": 8.3478140636764e-05, "loss": 1.661, "step": 5002 }, { "epoch": 0.2788584805752188, "grad_norm": 0.5257706642150879, "learning_rate": 8.347152643600076e-05, "loss": 1.6633, "step": 5003 }, { "epoch": 0.2789142188283819, "grad_norm": 0.5481857657432556, "learning_rate": 8.346491117371584e-05, "loss": 1.7599, "step": 5004 }, { "epoch": 0.27896995708154504, "grad_norm": 0.5461267232894897, "learning_rate": 8.345829485011906e-05, "loss": 1.6645, "step": 5005 }, { "epoch": 0.2790256953347082, "grad_norm": 0.5450317859649658, "learning_rate": 8.345167746542024e-05, "loss": 1.7965, "step": 5006 }, { "epoch": 0.27908143358787135, "grad_norm": 0.5598206520080566, "learning_rate": 8.344505901982926e-05, "loss": 1.8171, "step": 5007 }, { "epoch": 0.2791371718410345, "grad_norm": 0.5036829113960266, "learning_rate": 8.343843951355599e-05, "loss": 1.5853, "step": 5008 }, { "epoch": 0.27919291009419767, "grad_norm": 0.5530052185058594, "learning_rate": 8.34318189468104e-05, "loss": 1.8362, "step": 5009 }, { "epoch": 0.2792486483473608, "grad_norm": 0.5920783877372742, "learning_rate": 8.34251973198024e-05, "loss": 1.7712, "step": 5010 }, { "epoch": 0.2793043866005239, "grad_norm": 0.5592779517173767, "learning_rate": 8.341857463274204e-05, "loss": 1.729, "step": 5011 }, { "epoch": 0.2793601248536871, "grad_norm": 0.5464910864830017, "learning_rate": 8.341195088583934e-05, "loss": 1.9075, "step": 5012 }, { "epoch": 0.27941586310685024, "grad_norm": 0.5421869158744812, "learning_rate": 8.340532607930435e-05, "loss": 1.6845, "step": 5013 }, { "epoch": 0.27947160136001337, "grad_norm": 0.6448494791984558, "learning_rate": 8.339870021334721e-05, "loss": 1.677, "step": 5014 }, { "epoch": 0.2795273396131765, "grad_norm": 0.551950991153717, "learning_rate": 8.339207328817801e-05, "loss": 1.7604, "step": 5015 }, { "epoch": 0.2795830778663397, "grad_norm": 0.5297108292579651, "learning_rate": 8.338544530400694e-05, "loss": 1.8327, "step": 5016 }, { "epoch": 0.2796388161195028, "grad_norm": 0.5589694976806641, "learning_rate": 8.337881626104418e-05, "loss": 1.8363, "step": 5017 }, { "epoch": 0.27969455437266594, "grad_norm": 0.5295442342758179, "learning_rate": 8.337218615949999e-05, "loss": 1.5949, "step": 5018 }, { "epoch": 0.2797502926258291, "grad_norm": 0.5680721998214722, "learning_rate": 8.336555499958463e-05, "loss": 1.7101, "step": 5019 }, { "epoch": 0.27980603087899225, "grad_norm": 0.5222816467285156, "learning_rate": 8.33589227815084e-05, "loss": 1.6419, "step": 5020 }, { "epoch": 0.2798617691321554, "grad_norm": 0.5572875142097473, "learning_rate": 8.335228950548164e-05, "loss": 1.5752, "step": 5021 }, { "epoch": 0.27991750738531856, "grad_norm": 0.5234338641166687, "learning_rate": 8.334565517171471e-05, "loss": 1.608, "step": 5022 }, { "epoch": 0.2799732456384817, "grad_norm": 0.5773409008979797, "learning_rate": 8.333901978041801e-05, "loss": 1.8295, "step": 5023 }, { "epoch": 0.2800289838916448, "grad_norm": 0.6236357092857361, "learning_rate": 8.3332383331802e-05, "loss": 2.1082, "step": 5024 }, { "epoch": 0.280084722144808, "grad_norm": 0.5226585865020752, "learning_rate": 8.332574582607712e-05, "loss": 1.5637, "step": 5025 }, { "epoch": 0.28014046039797114, "grad_norm": 0.5552464723587036, "learning_rate": 8.331910726345389e-05, "loss": 1.565, "step": 5026 }, { "epoch": 0.28019619865113427, "grad_norm": 0.5889436602592468, "learning_rate": 8.331246764414282e-05, "loss": 1.6853, "step": 5027 }, { "epoch": 0.2802519369042974, "grad_norm": 0.5935594439506531, "learning_rate": 8.330582696835453e-05, "loss": 1.8281, "step": 5028 }, { "epoch": 0.2803076751574606, "grad_norm": 0.5328096747398376, "learning_rate": 8.329918523629958e-05, "loss": 1.5658, "step": 5029 }, { "epoch": 0.2803634134106237, "grad_norm": 0.5282544493675232, "learning_rate": 8.329254244818862e-05, "loss": 1.5369, "step": 5030 }, { "epoch": 0.28041915166378684, "grad_norm": 0.5771158337593079, "learning_rate": 8.328589860423234e-05, "loss": 1.718, "step": 5031 }, { "epoch": 0.28047488991695, "grad_norm": 0.5074672698974609, "learning_rate": 8.327925370464142e-05, "loss": 1.5096, "step": 5032 }, { "epoch": 0.28053062817011315, "grad_norm": 0.5818241834640503, "learning_rate": 8.32726077496266e-05, "loss": 1.8082, "step": 5033 }, { "epoch": 0.2805863664232763, "grad_norm": 0.5617592930793762, "learning_rate": 8.326596073939865e-05, "loss": 1.885, "step": 5034 }, { "epoch": 0.28064210467643946, "grad_norm": 0.5317988991737366, "learning_rate": 8.325931267416837e-05, "loss": 1.6933, "step": 5035 }, { "epoch": 0.2806978429296026, "grad_norm": 0.5429521799087524, "learning_rate": 8.325266355414663e-05, "loss": 1.7869, "step": 5036 }, { "epoch": 0.2807535811827657, "grad_norm": 0.5846121311187744, "learning_rate": 8.324601337954427e-05, "loss": 1.8213, "step": 5037 }, { "epoch": 0.28080931943592885, "grad_norm": 0.5202860236167908, "learning_rate": 8.323936215057219e-05, "loss": 1.5685, "step": 5038 }, { "epoch": 0.28086505768909203, "grad_norm": 0.5208321213722229, "learning_rate": 8.323270986744136e-05, "loss": 1.6801, "step": 5039 }, { "epoch": 0.28092079594225516, "grad_norm": 0.5601228475570679, "learning_rate": 8.322605653036273e-05, "loss": 1.7527, "step": 5040 }, { "epoch": 0.2809765341954183, "grad_norm": 0.5703938603401184, "learning_rate": 8.32194021395473e-05, "loss": 1.7583, "step": 5041 }, { "epoch": 0.2810322724485815, "grad_norm": 0.5135952234268188, "learning_rate": 8.321274669520613e-05, "loss": 1.6603, "step": 5042 }, { "epoch": 0.2810880107017446, "grad_norm": 0.5345764756202698, "learning_rate": 8.320609019755025e-05, "loss": 1.8041, "step": 5043 }, { "epoch": 0.28114374895490774, "grad_norm": 0.5866489410400391, "learning_rate": 8.319943264679082e-05, "loss": 1.8187, "step": 5044 }, { "epoch": 0.2811994872080709, "grad_norm": 0.5317565202713013, "learning_rate": 8.319277404313895e-05, "loss": 1.627, "step": 5045 }, { "epoch": 0.28125522546123405, "grad_norm": 0.5532716512680054, "learning_rate": 8.318611438680581e-05, "loss": 1.7922, "step": 5046 }, { "epoch": 0.2813109637143972, "grad_norm": 0.5880955457687378, "learning_rate": 8.317945367800262e-05, "loss": 1.9276, "step": 5047 }, { "epoch": 0.28136670196756036, "grad_norm": 0.5237969160079956, "learning_rate": 8.31727919169406e-05, "loss": 1.6415, "step": 5048 }, { "epoch": 0.2814224402207235, "grad_norm": 0.5675956010818481, "learning_rate": 8.316612910383104e-05, "loss": 1.7371, "step": 5049 }, { "epoch": 0.2814781784738866, "grad_norm": 0.5321084260940552, "learning_rate": 8.315946523888523e-05, "loss": 1.5045, "step": 5050 }, { "epoch": 0.28153391672704975, "grad_norm": 0.5198732614517212, "learning_rate": 8.31528003223145e-05, "loss": 1.7094, "step": 5051 }, { "epoch": 0.28158965498021293, "grad_norm": 0.5548423528671265, "learning_rate": 8.314613435433025e-05, "loss": 1.7824, "step": 5052 }, { "epoch": 0.28164539323337606, "grad_norm": 0.5975722074508667, "learning_rate": 8.313946733514388e-05, "loss": 1.6823, "step": 5053 }, { "epoch": 0.2817011314865392, "grad_norm": 0.5505688190460205, "learning_rate": 8.313279926496682e-05, "loss": 1.6891, "step": 5054 }, { "epoch": 0.2817568697397024, "grad_norm": 0.535331666469574, "learning_rate": 8.312613014401053e-05, "loss": 1.6879, "step": 5055 }, { "epoch": 0.2818126079928655, "grad_norm": 0.5429748296737671, "learning_rate": 8.311945997248656e-05, "loss": 1.7741, "step": 5056 }, { "epoch": 0.28186834624602863, "grad_norm": 0.5404984354972839, "learning_rate": 8.31127887506064e-05, "loss": 1.5888, "step": 5057 }, { "epoch": 0.2819240844991918, "grad_norm": 0.6144102811813354, "learning_rate": 8.310611647858164e-05, "loss": 1.8173, "step": 5058 }, { "epoch": 0.28197982275235495, "grad_norm": 0.5709677934646606, "learning_rate": 8.30994431566239e-05, "loss": 1.6492, "step": 5059 }, { "epoch": 0.2820355610055181, "grad_norm": 0.5943745374679565, "learning_rate": 8.309276878494481e-05, "loss": 1.9265, "step": 5060 }, { "epoch": 0.28209129925868126, "grad_norm": 0.5663633942604065, "learning_rate": 8.308609336375601e-05, "loss": 1.5966, "step": 5061 }, { "epoch": 0.2821470375118444, "grad_norm": 0.5235463380813599, "learning_rate": 8.307941689326926e-05, "loss": 1.6598, "step": 5062 }, { "epoch": 0.2822027757650075, "grad_norm": 0.5473840832710266, "learning_rate": 8.307273937369627e-05, "loss": 1.3741, "step": 5063 }, { "epoch": 0.28225851401817065, "grad_norm": 0.6380063891410828, "learning_rate": 8.30660608052488e-05, "loss": 1.7855, "step": 5064 }, { "epoch": 0.28231425227133383, "grad_norm": 0.5315070748329163, "learning_rate": 8.305938118813868e-05, "loss": 1.6285, "step": 5065 }, { "epoch": 0.28236999052449696, "grad_norm": 0.571528971195221, "learning_rate": 8.305270052257773e-05, "loss": 1.8315, "step": 5066 }, { "epoch": 0.2824257287776601, "grad_norm": 0.5939456820487976, "learning_rate": 8.304601880877784e-05, "loss": 1.8598, "step": 5067 }, { "epoch": 0.2824814670308233, "grad_norm": 0.5018705129623413, "learning_rate": 8.30393360469509e-05, "loss": 1.5472, "step": 5068 }, { "epoch": 0.2825372052839864, "grad_norm": 0.5844521522521973, "learning_rate": 8.303265223730885e-05, "loss": 1.8186, "step": 5069 }, { "epoch": 0.28259294353714953, "grad_norm": 0.5360279083251953, "learning_rate": 8.302596738006367e-05, "loss": 1.7101, "step": 5070 }, { "epoch": 0.2826486817903127, "grad_norm": 0.5614787340164185, "learning_rate": 8.301928147542736e-05, "loss": 1.6207, "step": 5071 }, { "epoch": 0.28270442004347585, "grad_norm": 0.5616874098777771, "learning_rate": 8.301259452361197e-05, "loss": 1.7829, "step": 5072 }, { "epoch": 0.282760158296639, "grad_norm": 0.6129429340362549, "learning_rate": 8.300590652482954e-05, "loss": 1.844, "step": 5073 }, { "epoch": 0.2828158965498021, "grad_norm": 0.5966079831123352, "learning_rate": 8.29992174792922e-05, "loss": 1.9242, "step": 5074 }, { "epoch": 0.2828716348029653, "grad_norm": 0.5461622476577759, "learning_rate": 8.299252738721206e-05, "loss": 1.7337, "step": 5075 }, { "epoch": 0.2829273730561284, "grad_norm": 0.5274501442909241, "learning_rate": 8.298583624880135e-05, "loss": 1.6531, "step": 5076 }, { "epoch": 0.28298311130929155, "grad_norm": 0.6280329823493958, "learning_rate": 8.29791440642722e-05, "loss": 1.6198, "step": 5077 }, { "epoch": 0.28303884956245473, "grad_norm": 0.5429005026817322, "learning_rate": 8.297245083383689e-05, "loss": 1.7574, "step": 5078 }, { "epoch": 0.28309458781561786, "grad_norm": 0.586188018321991, "learning_rate": 8.296575655770768e-05, "loss": 1.7325, "step": 5079 }, { "epoch": 0.283150326068781, "grad_norm": 0.48814016580581665, "learning_rate": 8.295906123609688e-05, "loss": 1.6964, "step": 5080 }, { "epoch": 0.2832060643219442, "grad_norm": 0.518273651599884, "learning_rate": 8.295236486921685e-05, "loss": 1.6128, "step": 5081 }, { "epoch": 0.2832618025751073, "grad_norm": 0.5701366066932678, "learning_rate": 8.29456674572799e-05, "loss": 1.8898, "step": 5082 }, { "epoch": 0.28331754082827043, "grad_norm": 0.522463858127594, "learning_rate": 8.293896900049846e-05, "loss": 1.513, "step": 5083 }, { "epoch": 0.2833732790814336, "grad_norm": 0.5641170144081116, "learning_rate": 8.293226949908499e-05, "loss": 1.658, "step": 5084 }, { "epoch": 0.28342901733459674, "grad_norm": 0.5498567223548889, "learning_rate": 8.292556895325194e-05, "loss": 1.6148, "step": 5085 }, { "epoch": 0.2834847555877599, "grad_norm": 0.5941603183746338, "learning_rate": 8.29188673632118e-05, "loss": 1.7469, "step": 5086 }, { "epoch": 0.283540493840923, "grad_norm": 0.5746224522590637, "learning_rate": 8.291216472917714e-05, "loss": 1.6819, "step": 5087 }, { "epoch": 0.2835962320940862, "grad_norm": 0.6701369285583496, "learning_rate": 8.290546105136048e-05, "loss": 1.3384, "step": 5088 }, { "epoch": 0.2836519703472493, "grad_norm": 0.5807752013206482, "learning_rate": 8.289875632997446e-05, "loss": 1.6534, "step": 5089 }, { "epoch": 0.28370770860041244, "grad_norm": 0.5432621240615845, "learning_rate": 8.289205056523168e-05, "loss": 1.6963, "step": 5090 }, { "epoch": 0.28376344685357563, "grad_norm": 0.5509108901023865, "learning_rate": 8.288534375734486e-05, "loss": 1.6027, "step": 5091 }, { "epoch": 0.28381918510673876, "grad_norm": 0.5456513166427612, "learning_rate": 8.287863590652666e-05, "loss": 1.6362, "step": 5092 }, { "epoch": 0.2838749233599019, "grad_norm": 0.5441727042198181, "learning_rate": 8.287192701298982e-05, "loss": 1.5781, "step": 5093 }, { "epoch": 0.28393066161306507, "grad_norm": 0.5558503866195679, "learning_rate": 8.286521707694712e-05, "loss": 1.8077, "step": 5094 }, { "epoch": 0.2839863998662282, "grad_norm": 0.5933700799942017, "learning_rate": 8.285850609861134e-05, "loss": 1.8407, "step": 5095 }, { "epoch": 0.28404213811939133, "grad_norm": 0.557685375213623, "learning_rate": 8.285179407819534e-05, "loss": 1.579, "step": 5096 }, { "epoch": 0.28409787637255446, "grad_norm": 0.5183169841766357, "learning_rate": 8.284508101591198e-05, "loss": 1.3955, "step": 5097 }, { "epoch": 0.28415361462571764, "grad_norm": 0.5807473659515381, "learning_rate": 8.283836691197413e-05, "loss": 1.8429, "step": 5098 }, { "epoch": 0.28420935287888077, "grad_norm": 0.6236990690231323, "learning_rate": 8.283165176659474e-05, "loss": 1.8281, "step": 5099 }, { "epoch": 0.2842650911320439, "grad_norm": 0.5581399202346802, "learning_rate": 8.282493557998678e-05, "loss": 1.764, "step": 5100 }, { "epoch": 0.2843208293852071, "grad_norm": 0.5508102774620056, "learning_rate": 8.281821835236325e-05, "loss": 1.8694, "step": 5101 }, { "epoch": 0.2843765676383702, "grad_norm": 0.6012663841247559, "learning_rate": 8.281150008393718e-05, "loss": 1.8829, "step": 5102 }, { "epoch": 0.28443230589153334, "grad_norm": 0.5453019738197327, "learning_rate": 8.280478077492163e-05, "loss": 1.8996, "step": 5103 }, { "epoch": 0.28448804414469653, "grad_norm": 0.5334420204162598, "learning_rate": 8.27980604255297e-05, "loss": 1.7342, "step": 5104 }, { "epoch": 0.28454378239785966, "grad_norm": 0.5454635620117188, "learning_rate": 8.279133903597451e-05, "loss": 1.7496, "step": 5105 }, { "epoch": 0.2845995206510228, "grad_norm": 0.5557402968406677, "learning_rate": 8.278461660646925e-05, "loss": 1.63, "step": 5106 }, { "epoch": 0.28465525890418597, "grad_norm": 0.5542622208595276, "learning_rate": 8.27778931372271e-05, "loss": 1.6639, "step": 5107 }, { "epoch": 0.2847109971573491, "grad_norm": 0.565591037273407, "learning_rate": 8.277116862846126e-05, "loss": 1.9303, "step": 5108 }, { "epoch": 0.28476673541051223, "grad_norm": 0.6099279522895813, "learning_rate": 8.276444308038504e-05, "loss": 1.7833, "step": 5109 }, { "epoch": 0.28482247366367536, "grad_norm": 0.6192046999931335, "learning_rate": 8.27577164932117e-05, "loss": 1.9167, "step": 5110 }, { "epoch": 0.28487821191683854, "grad_norm": 0.5659559965133667, "learning_rate": 8.275098886715462e-05, "loss": 1.7716, "step": 5111 }, { "epoch": 0.28493395017000167, "grad_norm": 0.6038410067558289, "learning_rate": 8.274426020242709e-05, "loss": 1.9078, "step": 5112 }, { "epoch": 0.2849896884231648, "grad_norm": 0.5924156904220581, "learning_rate": 8.273753049924256e-05, "loss": 1.7014, "step": 5113 }, { "epoch": 0.285045426676328, "grad_norm": 0.5436737537384033, "learning_rate": 8.273079975781442e-05, "loss": 1.6482, "step": 5114 }, { "epoch": 0.2851011649294911, "grad_norm": 0.5460022687911987, "learning_rate": 8.272406797835614e-05, "loss": 1.7304, "step": 5115 }, { "epoch": 0.28515690318265424, "grad_norm": 0.5954405069351196, "learning_rate": 8.271733516108125e-05, "loss": 1.6698, "step": 5116 }, { "epoch": 0.2852126414358174, "grad_norm": 0.638888418674469, "learning_rate": 8.27106013062032e-05, "loss": 2.0553, "step": 5117 }, { "epoch": 0.28526837968898056, "grad_norm": 0.5477131605148315, "learning_rate": 8.270386641393564e-05, "loss": 1.5031, "step": 5118 }, { "epoch": 0.2853241179421437, "grad_norm": 0.5998544692993164, "learning_rate": 8.269713048449208e-05, "loss": 1.9087, "step": 5119 }, { "epoch": 0.2853798561953068, "grad_norm": 0.5584544539451599, "learning_rate": 8.26903935180862e-05, "loss": 1.8125, "step": 5120 }, { "epoch": 0.28543559444847, "grad_norm": 0.5390369892120361, "learning_rate": 8.268365551493161e-05, "loss": 1.6459, "step": 5121 }, { "epoch": 0.2854913327016331, "grad_norm": 0.5171942710876465, "learning_rate": 8.267691647524206e-05, "loss": 1.6801, "step": 5122 }, { "epoch": 0.28554707095479626, "grad_norm": 0.4894436299800873, "learning_rate": 8.26701763992312e-05, "loss": 1.4172, "step": 5123 }, { "epoch": 0.28560280920795944, "grad_norm": 0.5318630337715149, "learning_rate": 8.266343528711285e-05, "loss": 1.6956, "step": 5124 }, { "epoch": 0.28565854746112257, "grad_norm": 0.513378918170929, "learning_rate": 8.265669313910077e-05, "loss": 1.5235, "step": 5125 }, { "epoch": 0.2857142857142857, "grad_norm": 0.6027741432189941, "learning_rate": 8.264994995540878e-05, "loss": 1.9089, "step": 5126 }, { "epoch": 0.2857700239674489, "grad_norm": 0.5300361514091492, "learning_rate": 8.264320573625075e-05, "loss": 1.6013, "step": 5127 }, { "epoch": 0.285825762220612, "grad_norm": 0.5484519600868225, "learning_rate": 8.263646048184055e-05, "loss": 1.6596, "step": 5128 }, { "epoch": 0.28588150047377514, "grad_norm": 0.6186813116073608, "learning_rate": 8.26297141923921e-05, "loss": 1.7786, "step": 5129 }, { "epoch": 0.2859372387269383, "grad_norm": 0.5475611686706543, "learning_rate": 8.262296686811936e-05, "loss": 1.6151, "step": 5130 }, { "epoch": 0.28599297698010145, "grad_norm": 0.612417995929718, "learning_rate": 8.261621850923634e-05, "loss": 1.587, "step": 5131 }, { "epoch": 0.2860487152332646, "grad_norm": 0.5619268417358398, "learning_rate": 8.260946911595701e-05, "loss": 1.6915, "step": 5132 }, { "epoch": 0.2861044534864277, "grad_norm": 0.5510770678520203, "learning_rate": 8.260271868849547e-05, "loss": 1.9188, "step": 5133 }, { "epoch": 0.2861601917395909, "grad_norm": 0.5569331049919128, "learning_rate": 8.259596722706575e-05, "loss": 1.7657, "step": 5134 }, { "epoch": 0.286215929992754, "grad_norm": 0.48364466428756714, "learning_rate": 8.258921473188202e-05, "loss": 1.3247, "step": 5135 }, { "epoch": 0.28627166824591715, "grad_norm": 0.5114015936851501, "learning_rate": 8.25824612031584e-05, "loss": 1.6025, "step": 5136 }, { "epoch": 0.28632740649908034, "grad_norm": 0.5254806876182556, "learning_rate": 8.257570664110907e-05, "loss": 1.7264, "step": 5137 }, { "epoch": 0.28638314475224347, "grad_norm": 0.5384583473205566, "learning_rate": 8.256895104594828e-05, "loss": 1.802, "step": 5138 }, { "epoch": 0.2864388830054066, "grad_norm": 0.5924034118652344, "learning_rate": 8.256219441789022e-05, "loss": 1.9493, "step": 5139 }, { "epoch": 0.2864946212585698, "grad_norm": 0.5453627705574036, "learning_rate": 8.255543675714923e-05, "loss": 1.5655, "step": 5140 }, { "epoch": 0.2865503595117329, "grad_norm": 0.535179853439331, "learning_rate": 8.254867806393957e-05, "loss": 1.5492, "step": 5141 }, { "epoch": 0.28660609776489604, "grad_norm": 0.5418823957443237, "learning_rate": 8.254191833847564e-05, "loss": 1.7343, "step": 5142 }, { "epoch": 0.28666183601805917, "grad_norm": 0.5330826044082642, "learning_rate": 8.253515758097179e-05, "loss": 1.6551, "step": 5143 }, { "epoch": 0.28671757427122235, "grad_norm": 0.6033239960670471, "learning_rate": 8.252839579164243e-05, "loss": 1.8227, "step": 5144 }, { "epoch": 0.2867733125243855, "grad_norm": 0.5882185697555542, "learning_rate": 8.252163297070201e-05, "loss": 1.9731, "step": 5145 }, { "epoch": 0.2868290507775486, "grad_norm": 0.537185788154602, "learning_rate": 8.251486911836501e-05, "loss": 1.5992, "step": 5146 }, { "epoch": 0.2868847890307118, "grad_norm": 0.5307870507240295, "learning_rate": 8.250810423484592e-05, "loss": 1.5641, "step": 5147 }, { "epoch": 0.2869405272838749, "grad_norm": 0.5483027696609497, "learning_rate": 8.25013383203593e-05, "loss": 1.759, "step": 5148 }, { "epoch": 0.28699626553703805, "grad_norm": 0.5503141283988953, "learning_rate": 8.249457137511976e-05, "loss": 1.7229, "step": 5149 }, { "epoch": 0.28705200379020124, "grad_norm": 0.5450831651687622, "learning_rate": 8.248780339934183e-05, "loss": 1.6758, "step": 5150 }, { "epoch": 0.28710774204336437, "grad_norm": 0.5555149912834167, "learning_rate": 8.248103439324022e-05, "loss": 1.7173, "step": 5151 }, { "epoch": 0.2871634802965275, "grad_norm": 0.5960267186164856, "learning_rate": 8.247426435702956e-05, "loss": 1.8327, "step": 5152 }, { "epoch": 0.2872192185496907, "grad_norm": 0.5497944951057434, "learning_rate": 8.246749329092458e-05, "loss": 1.6373, "step": 5153 }, { "epoch": 0.2872749568028538, "grad_norm": 0.6035077571868896, "learning_rate": 8.246072119514e-05, "loss": 2.0384, "step": 5154 }, { "epoch": 0.28733069505601694, "grad_norm": 0.5685641765594482, "learning_rate": 8.245394806989062e-05, "loss": 1.9093, "step": 5155 }, { "epoch": 0.28738643330918007, "grad_norm": 0.5542479753494263, "learning_rate": 8.244717391539124e-05, "loss": 1.6794, "step": 5156 }, { "epoch": 0.28744217156234325, "grad_norm": 0.5434539318084717, "learning_rate": 8.244039873185664e-05, "loss": 1.6624, "step": 5157 }, { "epoch": 0.2874979098155064, "grad_norm": 0.5240741968154907, "learning_rate": 8.243362251950177e-05, "loss": 1.7119, "step": 5158 }, { "epoch": 0.2875536480686695, "grad_norm": 0.5400795340538025, "learning_rate": 8.242684527854148e-05, "loss": 1.7379, "step": 5159 }, { "epoch": 0.2876093863218327, "grad_norm": 0.5450997352600098, "learning_rate": 8.242006700919072e-05, "loss": 1.648, "step": 5160 }, { "epoch": 0.2876651245749958, "grad_norm": 0.5497955679893494, "learning_rate": 8.241328771166446e-05, "loss": 1.8969, "step": 5161 }, { "epoch": 0.28772086282815895, "grad_norm": 0.556607186794281, "learning_rate": 8.24065073861777e-05, "loss": 1.7941, "step": 5162 }, { "epoch": 0.28777660108132214, "grad_norm": 0.5775546431541443, "learning_rate": 8.239972603294546e-05, "loss": 1.7996, "step": 5163 }, { "epoch": 0.28783233933448527, "grad_norm": 0.5500494241714478, "learning_rate": 8.239294365218282e-05, "loss": 1.486, "step": 5164 }, { "epoch": 0.2878880775876484, "grad_norm": 0.5263432860374451, "learning_rate": 8.238616024410486e-05, "loss": 1.8011, "step": 5165 }, { "epoch": 0.2879438158408115, "grad_norm": 0.580796480178833, "learning_rate": 8.237937580892674e-05, "loss": 1.7308, "step": 5166 }, { "epoch": 0.2879995540939747, "grad_norm": 0.5561580657958984, "learning_rate": 8.237259034686359e-05, "loss": 1.7732, "step": 5167 }, { "epoch": 0.28805529234713784, "grad_norm": 0.5456521511077881, "learning_rate": 8.236580385813062e-05, "loss": 1.6932, "step": 5168 }, { "epoch": 0.28811103060030097, "grad_norm": 0.5676544904708862, "learning_rate": 8.235901634294306e-05, "loss": 1.8033, "step": 5169 }, { "epoch": 0.28816676885346415, "grad_norm": 0.5046932697296143, "learning_rate": 8.235222780151616e-05, "loss": 1.5637, "step": 5170 }, { "epoch": 0.2882225071066273, "grad_norm": 0.5261063575744629, "learning_rate": 8.234543823406525e-05, "loss": 1.5763, "step": 5171 }, { "epoch": 0.2882782453597904, "grad_norm": 0.5619118809700012, "learning_rate": 8.23386476408056e-05, "loss": 1.7251, "step": 5172 }, { "epoch": 0.2883339836129536, "grad_norm": 0.5556089282035828, "learning_rate": 8.233185602195259e-05, "loss": 1.7168, "step": 5173 }, { "epoch": 0.2883897218661167, "grad_norm": 0.5449663400650024, "learning_rate": 8.232506337772163e-05, "loss": 1.7282, "step": 5174 }, { "epoch": 0.28844546011927985, "grad_norm": 0.5821020007133484, "learning_rate": 8.231826970832812e-05, "loss": 2.0267, "step": 5175 }, { "epoch": 0.28850119837244304, "grad_norm": 0.5104268193244934, "learning_rate": 8.231147501398753e-05, "loss": 1.4387, "step": 5176 }, { "epoch": 0.28855693662560616, "grad_norm": 0.548219621181488, "learning_rate": 8.230467929491534e-05, "loss": 1.7042, "step": 5177 }, { "epoch": 0.2886126748787693, "grad_norm": 0.5711565017700195, "learning_rate": 8.229788255132706e-05, "loss": 1.6752, "step": 5178 }, { "epoch": 0.2886684131319324, "grad_norm": 0.526942789554596, "learning_rate": 8.229108478343827e-05, "loss": 1.5905, "step": 5179 }, { "epoch": 0.2887241513850956, "grad_norm": 0.5535737872123718, "learning_rate": 8.228428599146453e-05, "loss": 1.6857, "step": 5180 }, { "epoch": 0.28877988963825874, "grad_norm": 0.5093039870262146, "learning_rate": 8.227748617562147e-05, "loss": 1.6489, "step": 5181 }, { "epoch": 0.28883562789142186, "grad_norm": 0.5642322301864624, "learning_rate": 8.227068533612475e-05, "loss": 1.8709, "step": 5182 }, { "epoch": 0.28889136614458505, "grad_norm": 0.5547685623168945, "learning_rate": 8.226388347319004e-05, "loss": 1.7088, "step": 5183 }, { "epoch": 0.2889471043977482, "grad_norm": 0.5316441059112549, "learning_rate": 8.225708058703305e-05, "loss": 1.59, "step": 5184 }, { "epoch": 0.2890028426509113, "grad_norm": 0.5305221080780029, "learning_rate": 8.225027667786955e-05, "loss": 1.4301, "step": 5185 }, { "epoch": 0.2890585809040745, "grad_norm": 0.5498524904251099, "learning_rate": 8.224347174591529e-05, "loss": 1.533, "step": 5186 }, { "epoch": 0.2891143191572376, "grad_norm": 0.5519589781761169, "learning_rate": 8.22366657913861e-05, "loss": 1.7171, "step": 5187 }, { "epoch": 0.28917005741040075, "grad_norm": 0.5893858075141907, "learning_rate": 8.222985881449783e-05, "loss": 1.7751, "step": 5188 }, { "epoch": 0.2892257956635639, "grad_norm": 0.5334852933883667, "learning_rate": 8.222305081546635e-05, "loss": 1.6905, "step": 5189 }, { "epoch": 0.28928153391672706, "grad_norm": 0.5692505836486816, "learning_rate": 8.221624179450757e-05, "loss": 1.6461, "step": 5190 }, { "epoch": 0.2893372721698902, "grad_norm": 0.5988993644714355, "learning_rate": 8.220943175183743e-05, "loss": 2.0131, "step": 5191 }, { "epoch": 0.2893930104230533, "grad_norm": 0.6873819231987, "learning_rate": 8.220262068767191e-05, "loss": 1.977, "step": 5192 }, { "epoch": 0.2894487486762165, "grad_norm": 0.5408362746238708, "learning_rate": 8.219580860222701e-05, "loss": 1.6866, "step": 5193 }, { "epoch": 0.28950448692937963, "grad_norm": 0.8928006291389465, "learning_rate": 8.218899549571878e-05, "loss": 1.6639, "step": 5194 }, { "epoch": 0.28956022518254276, "grad_norm": 0.5256812572479248, "learning_rate": 8.218218136836331e-05, "loss": 1.435, "step": 5195 }, { "epoch": 0.28961596343570595, "grad_norm": 0.5350750684738159, "learning_rate": 8.217536622037667e-05, "loss": 1.6317, "step": 5196 }, { "epoch": 0.2896717016888691, "grad_norm": 0.5534375309944153, "learning_rate": 8.2168550051975e-05, "loss": 1.7473, "step": 5197 }, { "epoch": 0.2897274399420322, "grad_norm": 0.5433312058448792, "learning_rate": 8.216173286337448e-05, "loss": 1.8094, "step": 5198 }, { "epoch": 0.2897831781951954, "grad_norm": 0.5386417508125305, "learning_rate": 8.215491465479133e-05, "loss": 1.5757, "step": 5199 }, { "epoch": 0.2898389164483585, "grad_norm": 0.6519530415534973, "learning_rate": 8.214809542644173e-05, "loss": 1.9404, "step": 5200 }, { "epoch": 0.28989465470152165, "grad_norm": 0.6092321872711182, "learning_rate": 8.214127517854199e-05, "loss": 1.8751, "step": 5201 }, { "epoch": 0.2899503929546848, "grad_norm": 0.5904344320297241, "learning_rate": 8.213445391130841e-05, "loss": 1.8278, "step": 5202 }, { "epoch": 0.29000613120784796, "grad_norm": 0.6538552045822144, "learning_rate": 8.212763162495729e-05, "loss": 1.683, "step": 5203 }, { "epoch": 0.2900618694610111, "grad_norm": 0.5683111548423767, "learning_rate": 8.212080831970503e-05, "loss": 1.6758, "step": 5204 }, { "epoch": 0.2901176077141742, "grad_norm": 0.5633412599563599, "learning_rate": 8.2113983995768e-05, "loss": 1.7229, "step": 5205 }, { "epoch": 0.2901733459673374, "grad_norm": 0.5722443461418152, "learning_rate": 8.210715865336263e-05, "loss": 1.8076, "step": 5206 }, { "epoch": 0.29022908422050053, "grad_norm": 0.562892496585846, "learning_rate": 8.21003322927054e-05, "loss": 1.672, "step": 5207 }, { "epoch": 0.29028482247366366, "grad_norm": 0.5266914367675781, "learning_rate": 8.209350491401277e-05, "loss": 1.6009, "step": 5208 }, { "epoch": 0.29034056072682685, "grad_norm": 0.576404869556427, "learning_rate": 8.20866765175013e-05, "loss": 1.8675, "step": 5209 }, { "epoch": 0.29039629897999, "grad_norm": 0.6091673374176025, "learning_rate": 8.207984710338752e-05, "loss": 1.7122, "step": 5210 }, { "epoch": 0.2904520372331531, "grad_norm": 0.590103030204773, "learning_rate": 8.207301667188803e-05, "loss": 1.5629, "step": 5211 }, { "epoch": 0.29050777548631623, "grad_norm": 0.5491459369659424, "learning_rate": 8.206618522321945e-05, "loss": 1.6373, "step": 5212 }, { "epoch": 0.2905635137394794, "grad_norm": 0.5361247062683105, "learning_rate": 8.205935275759842e-05, "loss": 1.7587, "step": 5213 }, { "epoch": 0.29061925199264255, "grad_norm": 0.5602622628211975, "learning_rate": 8.205251927524164e-05, "loss": 1.6596, "step": 5214 }, { "epoch": 0.2906749902458057, "grad_norm": 0.5763882994651794, "learning_rate": 8.204568477636585e-05, "loss": 1.7195, "step": 5215 }, { "epoch": 0.29073072849896886, "grad_norm": 0.5280525088310242, "learning_rate": 8.203884926118777e-05, "loss": 1.6929, "step": 5216 }, { "epoch": 0.290786466752132, "grad_norm": 0.5279143452644348, "learning_rate": 8.203201272992419e-05, "loss": 1.4884, "step": 5217 }, { "epoch": 0.2908422050052951, "grad_norm": 0.5360000729560852, "learning_rate": 8.202517518279193e-05, "loss": 1.6383, "step": 5218 }, { "epoch": 0.2908979432584583, "grad_norm": 0.5178120732307434, "learning_rate": 8.201833662000781e-05, "loss": 1.3916, "step": 5219 }, { "epoch": 0.29095368151162143, "grad_norm": 0.5441476702690125, "learning_rate": 8.201149704178875e-05, "loss": 1.8316, "step": 5220 }, { "epoch": 0.29100941976478456, "grad_norm": 0.5272539854049683, "learning_rate": 8.200465644835165e-05, "loss": 1.479, "step": 5221 }, { "epoch": 0.29106515801794774, "grad_norm": 0.5858429074287415, "learning_rate": 8.199781483991345e-05, "loss": 1.8735, "step": 5222 }, { "epoch": 0.2911208962711109, "grad_norm": 0.5939355492591858, "learning_rate": 8.19909722166911e-05, "loss": 1.8911, "step": 5223 }, { "epoch": 0.291176634524274, "grad_norm": 0.6942164301872253, "learning_rate": 8.198412857890166e-05, "loss": 1.5865, "step": 5224 }, { "epoch": 0.29123237277743713, "grad_norm": 0.5283763408660889, "learning_rate": 8.197728392676211e-05, "loss": 1.518, "step": 5225 }, { "epoch": 0.2912881110306003, "grad_norm": 0.5898897051811218, "learning_rate": 8.197043826048957e-05, "loss": 1.4729, "step": 5226 }, { "epoch": 0.29134384928376345, "grad_norm": 0.6161963939666748, "learning_rate": 8.196359158030113e-05, "loss": 1.7724, "step": 5227 }, { "epoch": 0.2913995875369266, "grad_norm": 0.5693463683128357, "learning_rate": 8.195674388641393e-05, "loss": 1.7379, "step": 5228 }, { "epoch": 0.29145532579008976, "grad_norm": 0.5397728681564331, "learning_rate": 8.194989517904513e-05, "loss": 1.694, "step": 5229 }, { "epoch": 0.2915110640432529, "grad_norm": 0.5856531858444214, "learning_rate": 8.194304545841193e-05, "loss": 1.7607, "step": 5230 }, { "epoch": 0.291566802296416, "grad_norm": 0.5777943730354309, "learning_rate": 8.19361947247316e-05, "loss": 1.7321, "step": 5231 }, { "epoch": 0.2916225405495792, "grad_norm": 0.5896830558776855, "learning_rate": 8.192934297822133e-05, "loss": 1.7183, "step": 5232 }, { "epoch": 0.29167827880274233, "grad_norm": 0.6119521260261536, "learning_rate": 8.192249021909847e-05, "loss": 1.9229, "step": 5233 }, { "epoch": 0.29173401705590546, "grad_norm": 0.5776544213294983, "learning_rate": 8.191563644758037e-05, "loss": 1.8151, "step": 5234 }, { "epoch": 0.2917897553090686, "grad_norm": 0.510097086429596, "learning_rate": 8.190878166388435e-05, "loss": 1.6619, "step": 5235 }, { "epoch": 0.2918454935622318, "grad_norm": 0.5378518104553223, "learning_rate": 8.19019258682278e-05, "loss": 1.8347, "step": 5236 }, { "epoch": 0.2919012318153949, "grad_norm": 0.5934120416641235, "learning_rate": 8.189506906082818e-05, "loss": 1.7583, "step": 5237 }, { "epoch": 0.29195697006855803, "grad_norm": 0.49861982464790344, "learning_rate": 8.188821124190293e-05, "loss": 1.4644, "step": 5238 }, { "epoch": 0.2920127083217212, "grad_norm": 0.5318624377250671, "learning_rate": 8.188135241166953e-05, "loss": 1.6562, "step": 5239 }, { "epoch": 0.29206844657488434, "grad_norm": 0.5517171621322632, "learning_rate": 8.187449257034552e-05, "loss": 1.6493, "step": 5240 }, { "epoch": 0.2921241848280475, "grad_norm": 0.5400835275650024, "learning_rate": 8.186763171814845e-05, "loss": 1.5672, "step": 5241 }, { "epoch": 0.29217992308121066, "grad_norm": 0.5250990986824036, "learning_rate": 8.186076985529589e-05, "loss": 1.6091, "step": 5242 }, { "epoch": 0.2922356613343738, "grad_norm": 0.5855765342712402, "learning_rate": 8.18539069820055e-05, "loss": 1.8457, "step": 5243 }, { "epoch": 0.2922913995875369, "grad_norm": 0.6245700716972351, "learning_rate": 8.184704309849487e-05, "loss": 1.5562, "step": 5244 }, { "epoch": 0.2923471378407001, "grad_norm": 0.583342432975769, "learning_rate": 8.184017820498173e-05, "loss": 1.8421, "step": 5245 }, { "epoch": 0.29240287609386323, "grad_norm": 0.576387345790863, "learning_rate": 8.183331230168377e-05, "loss": 1.7761, "step": 5246 }, { "epoch": 0.29245861434702636, "grad_norm": 0.5464752316474915, "learning_rate": 8.182644538881873e-05, "loss": 1.6677, "step": 5247 }, { "epoch": 0.2925143526001895, "grad_norm": 0.602606475353241, "learning_rate": 8.181957746660445e-05, "loss": 2.0468, "step": 5248 }, { "epoch": 0.29257009085335267, "grad_norm": 0.535839855670929, "learning_rate": 8.181270853525866e-05, "loss": 1.5903, "step": 5249 }, { "epoch": 0.2926258291065158, "grad_norm": 0.5617656707763672, "learning_rate": 8.180583859499923e-05, "loss": 1.6818, "step": 5250 }, { "epoch": 0.29268156735967893, "grad_norm": 0.5979596972465515, "learning_rate": 8.179896764604407e-05, "loss": 1.7915, "step": 5251 }, { "epoch": 0.2927373056128421, "grad_norm": 0.5312914848327637, "learning_rate": 8.179209568861104e-05, "loss": 1.4523, "step": 5252 }, { "epoch": 0.29279304386600524, "grad_norm": 0.5243698358535767, "learning_rate": 8.178522272291809e-05, "loss": 1.5611, "step": 5253 }, { "epoch": 0.29284878211916837, "grad_norm": 0.5564961433410645, "learning_rate": 8.17783487491832e-05, "loss": 1.7228, "step": 5254 }, { "epoch": 0.29290452037233156, "grad_norm": 0.5704841613769531, "learning_rate": 8.177147376762437e-05, "loss": 1.8324, "step": 5255 }, { "epoch": 0.2929602586254947, "grad_norm": 0.5011201500892639, "learning_rate": 8.176459777845964e-05, "loss": 1.6782, "step": 5256 }, { "epoch": 0.2930159968786578, "grad_norm": 0.4964855909347534, "learning_rate": 8.175772078190707e-05, "loss": 1.4567, "step": 5257 }, { "epoch": 0.29307173513182094, "grad_norm": 0.547637403011322, "learning_rate": 8.175084277818472e-05, "loss": 1.6129, "step": 5258 }, { "epoch": 0.2931274733849841, "grad_norm": 0.5082324743270874, "learning_rate": 8.174396376751079e-05, "loss": 1.5253, "step": 5259 }, { "epoch": 0.29318321163814726, "grad_norm": 0.535663366317749, "learning_rate": 8.173708375010342e-05, "loss": 1.574, "step": 5260 }, { "epoch": 0.2932389498913104, "grad_norm": 0.5733945965766907, "learning_rate": 8.173020272618078e-05, "loss": 1.8022, "step": 5261 }, { "epoch": 0.29329468814447357, "grad_norm": 0.5937253832817078, "learning_rate": 8.172332069596111e-05, "loss": 1.952, "step": 5262 }, { "epoch": 0.2933504263976367, "grad_norm": 0.5622910261154175, "learning_rate": 8.171643765966266e-05, "loss": 1.6838, "step": 5263 }, { "epoch": 0.29340616465079983, "grad_norm": 0.5633754730224609, "learning_rate": 8.170955361750373e-05, "loss": 1.8205, "step": 5264 }, { "epoch": 0.293461902903963, "grad_norm": 0.5639583468437195, "learning_rate": 8.170266856970264e-05, "loss": 1.6995, "step": 5265 }, { "epoch": 0.29351764115712614, "grad_norm": 0.5767412781715393, "learning_rate": 8.169578251647775e-05, "loss": 1.8193, "step": 5266 }, { "epoch": 0.29357337941028927, "grad_norm": 0.5323848128318787, "learning_rate": 8.168889545804743e-05, "loss": 1.6137, "step": 5267 }, { "epoch": 0.29362911766345245, "grad_norm": 0.5105542540550232, "learning_rate": 8.16820073946301e-05, "loss": 1.3883, "step": 5268 }, { "epoch": 0.2936848559166156, "grad_norm": 0.5348597168922424, "learning_rate": 8.167511832644423e-05, "loss": 1.7465, "step": 5269 }, { "epoch": 0.2937405941697787, "grad_norm": 0.5634239315986633, "learning_rate": 8.166822825370828e-05, "loss": 1.8121, "step": 5270 }, { "epoch": 0.29379633242294184, "grad_norm": 0.5704219937324524, "learning_rate": 8.166133717664075e-05, "loss": 1.8007, "step": 5271 }, { "epoch": 0.293852070676105, "grad_norm": 0.5514686703681946, "learning_rate": 8.165444509546023e-05, "loss": 1.7627, "step": 5272 }, { "epoch": 0.29390780892926816, "grad_norm": 0.5763065218925476, "learning_rate": 8.164755201038525e-05, "loss": 1.8668, "step": 5273 }, { "epoch": 0.2939635471824313, "grad_norm": 0.5290045738220215, "learning_rate": 8.164065792163445e-05, "loss": 1.6992, "step": 5274 }, { "epoch": 0.29401928543559447, "grad_norm": 0.5327118039131165, "learning_rate": 8.163376282942645e-05, "loss": 1.6882, "step": 5275 }, { "epoch": 0.2940750236887576, "grad_norm": 0.5230002403259277, "learning_rate": 8.162686673397995e-05, "loss": 1.6314, "step": 5276 }, { "epoch": 0.2941307619419207, "grad_norm": 0.5596842765808105, "learning_rate": 8.161996963551361e-05, "loss": 1.8543, "step": 5277 }, { "epoch": 0.2941865001950839, "grad_norm": 0.4837280809879303, "learning_rate": 8.16130715342462e-05, "loss": 1.407, "step": 5278 }, { "epoch": 0.29424223844824704, "grad_norm": 0.5188647508621216, "learning_rate": 8.160617243039648e-05, "loss": 1.6469, "step": 5279 }, { "epoch": 0.29429797670141017, "grad_norm": 0.5345882177352905, "learning_rate": 8.159927232418325e-05, "loss": 1.762, "step": 5280 }, { "epoch": 0.2943537149545733, "grad_norm": 0.6385248303413391, "learning_rate": 8.159237121582532e-05, "loss": 1.725, "step": 5281 }, { "epoch": 0.2944094532077365, "grad_norm": 0.532394289970398, "learning_rate": 8.158546910554159e-05, "loss": 1.59, "step": 5282 }, { "epoch": 0.2944651914608996, "grad_norm": 0.5918634533882141, "learning_rate": 8.157856599355093e-05, "loss": 1.8722, "step": 5283 }, { "epoch": 0.29452092971406274, "grad_norm": 0.5643036365509033, "learning_rate": 8.157166188007228e-05, "loss": 1.6608, "step": 5284 }, { "epoch": 0.2945766679672259, "grad_norm": 0.5480226874351501, "learning_rate": 8.156475676532458e-05, "loss": 1.6745, "step": 5285 }, { "epoch": 0.29463240622038905, "grad_norm": 0.5562642216682434, "learning_rate": 8.155785064952683e-05, "loss": 1.9036, "step": 5286 }, { "epoch": 0.2946881444735522, "grad_norm": 0.5737085938453674, "learning_rate": 8.155094353289807e-05, "loss": 1.6749, "step": 5287 }, { "epoch": 0.29474388272671537, "grad_norm": 0.537407398223877, "learning_rate": 8.154403541565732e-05, "loss": 1.5855, "step": 5288 }, { "epoch": 0.2947996209798785, "grad_norm": 0.5637186169624329, "learning_rate": 8.153712629802369e-05, "loss": 1.6667, "step": 5289 }, { "epoch": 0.2948553592330416, "grad_norm": 0.587086021900177, "learning_rate": 8.153021618021628e-05, "loss": 1.709, "step": 5290 }, { "epoch": 0.2949110974862048, "grad_norm": 0.5255305767059326, "learning_rate": 8.152330506245425e-05, "loss": 1.4982, "step": 5291 }, { "epoch": 0.29496683573936794, "grad_norm": 0.5582296848297119, "learning_rate": 8.151639294495678e-05, "loss": 1.6915, "step": 5292 }, { "epoch": 0.29502257399253107, "grad_norm": 0.5476033687591553, "learning_rate": 8.150947982794307e-05, "loss": 1.4827, "step": 5293 }, { "epoch": 0.2950783122456942, "grad_norm": 0.548763632774353, "learning_rate": 8.150256571163238e-05, "loss": 1.805, "step": 5294 }, { "epoch": 0.2951340504988574, "grad_norm": 0.58586585521698, "learning_rate": 8.149565059624398e-05, "loss": 1.7433, "step": 5295 }, { "epoch": 0.2951897887520205, "grad_norm": 0.5618621110916138, "learning_rate": 8.148873448199717e-05, "loss": 1.7681, "step": 5296 }, { "epoch": 0.29524552700518364, "grad_norm": 0.5388831496238708, "learning_rate": 8.148181736911129e-05, "loss": 1.582, "step": 5297 }, { "epoch": 0.2953012652583468, "grad_norm": 0.5742696523666382, "learning_rate": 8.147489925780572e-05, "loss": 1.8182, "step": 5298 }, { "epoch": 0.29535700351150995, "grad_norm": 0.5271889567375183, "learning_rate": 8.146798014829986e-05, "loss": 1.4823, "step": 5299 }, { "epoch": 0.2954127417646731, "grad_norm": 0.5565046072006226, "learning_rate": 8.146106004081315e-05, "loss": 1.6328, "step": 5300 }, { "epoch": 0.29546848001783627, "grad_norm": 0.5434616804122925, "learning_rate": 8.145413893556503e-05, "loss": 1.5871, "step": 5301 }, { "epoch": 0.2955242182709994, "grad_norm": 0.5343239903450012, "learning_rate": 8.144721683277504e-05, "loss": 1.6328, "step": 5302 }, { "epoch": 0.2955799565241625, "grad_norm": 0.5372942686080933, "learning_rate": 8.144029373266264e-05, "loss": 1.6885, "step": 5303 }, { "epoch": 0.29563569477732565, "grad_norm": 0.5881915092468262, "learning_rate": 8.143336963544746e-05, "loss": 1.8579, "step": 5304 }, { "epoch": 0.29569143303048884, "grad_norm": 0.5892425179481506, "learning_rate": 8.142644454134905e-05, "loss": 1.8771, "step": 5305 }, { "epoch": 0.29574717128365197, "grad_norm": 0.5286465287208557, "learning_rate": 8.141951845058707e-05, "loss": 1.6766, "step": 5306 }, { "epoch": 0.2958029095368151, "grad_norm": 0.5843679904937744, "learning_rate": 8.141259136338113e-05, "loss": 1.7359, "step": 5307 }, { "epoch": 0.2958586477899783, "grad_norm": 0.6178736090660095, "learning_rate": 8.140566327995094e-05, "loss": 1.9672, "step": 5308 }, { "epoch": 0.2959143860431414, "grad_norm": 0.5524381399154663, "learning_rate": 8.139873420051623e-05, "loss": 1.5947, "step": 5309 }, { "epoch": 0.29597012429630454, "grad_norm": 0.5591756105422974, "learning_rate": 8.139180412529674e-05, "loss": 1.7245, "step": 5310 }, { "epoch": 0.2960258625494677, "grad_norm": 0.5642113089561462, "learning_rate": 8.138487305451224e-05, "loss": 1.7156, "step": 5311 }, { "epoch": 0.29608160080263085, "grad_norm": 0.5767959356307983, "learning_rate": 8.137794098838257e-05, "loss": 1.78, "step": 5312 }, { "epoch": 0.296137339055794, "grad_norm": 0.5422171950340271, "learning_rate": 8.137100792712755e-05, "loss": 1.9258, "step": 5313 }, { "epoch": 0.29619307730895716, "grad_norm": 0.5860824584960938, "learning_rate": 8.136407387096704e-05, "loss": 1.7132, "step": 5314 }, { "epoch": 0.2962488155621203, "grad_norm": 0.6460077166557312, "learning_rate": 8.135713882012102e-05, "loss": 1.8024, "step": 5315 }, { "epoch": 0.2963045538152834, "grad_norm": 0.5744182467460632, "learning_rate": 8.135020277480934e-05, "loss": 1.7025, "step": 5316 }, { "epoch": 0.29636029206844655, "grad_norm": 0.560867965221405, "learning_rate": 8.134326573525202e-05, "loss": 1.7402, "step": 5317 }, { "epoch": 0.29641603032160974, "grad_norm": 0.5005339980125427, "learning_rate": 8.133632770166907e-05, "loss": 1.585, "step": 5318 }, { "epoch": 0.29647176857477286, "grad_norm": 0.5216720700263977, "learning_rate": 8.13293886742805e-05, "loss": 1.7313, "step": 5319 }, { "epoch": 0.296527506827936, "grad_norm": 0.5353510975837708, "learning_rate": 8.132244865330638e-05, "loss": 1.7854, "step": 5320 }, { "epoch": 0.2965832450810992, "grad_norm": 0.5222895741462708, "learning_rate": 8.131550763896682e-05, "loss": 1.6821, "step": 5321 }, { "epoch": 0.2966389833342623, "grad_norm": 0.5571734309196472, "learning_rate": 8.130856563148193e-05, "loss": 1.6151, "step": 5322 }, { "epoch": 0.29669472158742544, "grad_norm": 0.5494416952133179, "learning_rate": 8.130162263107189e-05, "loss": 1.7497, "step": 5323 }, { "epoch": 0.2967504598405886, "grad_norm": 0.5263827443122864, "learning_rate": 8.129467863795688e-05, "loss": 1.7157, "step": 5324 }, { "epoch": 0.29680619809375175, "grad_norm": 0.5756681561470032, "learning_rate": 8.128773365235711e-05, "loss": 1.6488, "step": 5325 }, { "epoch": 0.2968619363469149, "grad_norm": 0.5204091668128967, "learning_rate": 8.128078767449287e-05, "loss": 1.6868, "step": 5326 }, { "epoch": 0.296917674600078, "grad_norm": 0.5748211145401001, "learning_rate": 8.127384070458442e-05, "loss": 1.9352, "step": 5327 }, { "epoch": 0.2969734128532412, "grad_norm": 0.5648884773254395, "learning_rate": 8.126689274285207e-05, "loss": 1.9085, "step": 5328 }, { "epoch": 0.2970291511064043, "grad_norm": 0.5396182537078857, "learning_rate": 8.125994378951619e-05, "loss": 1.715, "step": 5329 }, { "epoch": 0.29708488935956745, "grad_norm": 0.5755982398986816, "learning_rate": 8.125299384479714e-05, "loss": 1.7472, "step": 5330 }, { "epoch": 0.29714062761273063, "grad_norm": 0.5721607804298401, "learning_rate": 8.124604290891535e-05, "loss": 1.8646, "step": 5331 }, { "epoch": 0.29719636586589376, "grad_norm": 0.5612310171127319, "learning_rate": 8.123909098209126e-05, "loss": 1.6506, "step": 5332 }, { "epoch": 0.2972521041190569, "grad_norm": 0.5630115866661072, "learning_rate": 8.123213806454535e-05, "loss": 1.805, "step": 5333 }, { "epoch": 0.2973078423722201, "grad_norm": 0.5319987535476685, "learning_rate": 8.122518415649808e-05, "loss": 1.6501, "step": 5334 }, { "epoch": 0.2973635806253832, "grad_norm": 0.5346727967262268, "learning_rate": 8.121822925817006e-05, "loss": 1.7944, "step": 5335 }, { "epoch": 0.29741931887854633, "grad_norm": 0.5356037616729736, "learning_rate": 8.121127336978183e-05, "loss": 1.5578, "step": 5336 }, { "epoch": 0.2974750571317095, "grad_norm": 0.5593723058700562, "learning_rate": 8.120431649155396e-05, "loss": 1.7118, "step": 5337 }, { "epoch": 0.29753079538487265, "grad_norm": 0.5361452102661133, "learning_rate": 8.11973586237071e-05, "loss": 1.7363, "step": 5338 }, { "epoch": 0.2975865336380358, "grad_norm": 0.5503700971603394, "learning_rate": 8.119039976646192e-05, "loss": 1.74, "step": 5339 }, { "epoch": 0.2976422718911989, "grad_norm": 0.5040326714515686, "learning_rate": 8.118343992003913e-05, "loss": 1.5712, "step": 5340 }, { "epoch": 0.2976980101443621, "grad_norm": 0.5251342058181763, "learning_rate": 8.117647908465942e-05, "loss": 1.5346, "step": 5341 }, { "epoch": 0.2977537483975252, "grad_norm": 0.5664347410202026, "learning_rate": 8.116951726054358e-05, "loss": 2.0871, "step": 5342 }, { "epoch": 0.29780948665068835, "grad_norm": 0.5798686742782593, "learning_rate": 8.116255444791237e-05, "loss": 1.5362, "step": 5343 }, { "epoch": 0.29786522490385153, "grad_norm": 0.5248550772666931, "learning_rate": 8.115559064698662e-05, "loss": 1.5788, "step": 5344 }, { "epoch": 0.29792096315701466, "grad_norm": 0.6149808764457703, "learning_rate": 8.11486258579872e-05, "loss": 1.7055, "step": 5345 }, { "epoch": 0.2979767014101778, "grad_norm": 0.6035127639770508, "learning_rate": 8.114166008113498e-05, "loss": 1.8135, "step": 5346 }, { "epoch": 0.298032439663341, "grad_norm": 0.5967592000961304, "learning_rate": 8.113469331665085e-05, "loss": 1.655, "step": 5347 }, { "epoch": 0.2980881779165041, "grad_norm": 0.5948666334152222, "learning_rate": 8.112772556475579e-05, "loss": 2.0929, "step": 5348 }, { "epoch": 0.29814391616966723, "grad_norm": 0.5955588221549988, "learning_rate": 8.112075682567075e-05, "loss": 1.6594, "step": 5349 }, { "epoch": 0.29819965442283036, "grad_norm": 0.5304718017578125, "learning_rate": 8.111378709961676e-05, "loss": 1.7254, "step": 5350 }, { "epoch": 0.29825539267599355, "grad_norm": 0.5426492691040039, "learning_rate": 8.110681638681485e-05, "loss": 1.7559, "step": 5351 }, { "epoch": 0.2983111309291567, "grad_norm": 0.6616886258125305, "learning_rate": 8.109984468748608e-05, "loss": 1.6271, "step": 5352 }, { "epoch": 0.2983668691823198, "grad_norm": 0.537685751914978, "learning_rate": 8.109287200185157e-05, "loss": 1.6231, "step": 5353 }, { "epoch": 0.298422607435483, "grad_norm": 0.5190281867980957, "learning_rate": 8.108589833013245e-05, "loss": 1.5838, "step": 5354 }, { "epoch": 0.2984783456886461, "grad_norm": 0.5232527852058411, "learning_rate": 8.107892367254986e-05, "loss": 1.5132, "step": 5355 }, { "epoch": 0.29853408394180925, "grad_norm": 0.5797703266143799, "learning_rate": 8.107194802932503e-05, "loss": 1.811, "step": 5356 }, { "epoch": 0.29858982219497243, "grad_norm": 0.5324226021766663, "learning_rate": 8.106497140067916e-05, "loss": 1.8477, "step": 5357 }, { "epoch": 0.29864556044813556, "grad_norm": 0.5274566411972046, "learning_rate": 8.105799378683353e-05, "loss": 1.5521, "step": 5358 }, { "epoch": 0.2987012987012987, "grad_norm": 0.5862823128700256, "learning_rate": 8.10510151880094e-05, "loss": 1.6123, "step": 5359 }, { "epoch": 0.2987570369544619, "grad_norm": 0.5503446459770203, "learning_rate": 8.104403560442813e-05, "loss": 1.6369, "step": 5360 }, { "epoch": 0.298812775207625, "grad_norm": 0.5560075044631958, "learning_rate": 8.103705503631104e-05, "loss": 1.762, "step": 5361 }, { "epoch": 0.29886851346078813, "grad_norm": 0.5699611306190491, "learning_rate": 8.103007348387952e-05, "loss": 1.9896, "step": 5362 }, { "epoch": 0.29892425171395126, "grad_norm": 0.5774125456809998, "learning_rate": 8.102309094735498e-05, "loss": 1.7463, "step": 5363 }, { "epoch": 0.29897998996711445, "grad_norm": 0.5046089887619019, "learning_rate": 8.101610742695889e-05, "loss": 1.4381, "step": 5364 }, { "epoch": 0.2990357282202776, "grad_norm": 0.5611773133277893, "learning_rate": 8.100912292291269e-05, "loss": 1.8118, "step": 5365 }, { "epoch": 0.2990914664734407, "grad_norm": 0.5826941132545471, "learning_rate": 8.100213743543793e-05, "loss": 1.7309, "step": 5366 }, { "epoch": 0.2991472047266039, "grad_norm": 0.5598444938659668, "learning_rate": 8.099515096475611e-05, "loss": 1.7422, "step": 5367 }, { "epoch": 0.299202942979767, "grad_norm": 0.5191280841827393, "learning_rate": 8.098816351108881e-05, "loss": 1.5088, "step": 5368 }, { "epoch": 0.29925868123293015, "grad_norm": 0.589454174041748, "learning_rate": 8.098117507465765e-05, "loss": 1.4643, "step": 5369 }, { "epoch": 0.29931441948609333, "grad_norm": 0.5066042542457581, "learning_rate": 8.097418565568424e-05, "loss": 1.3811, "step": 5370 }, { "epoch": 0.29937015773925646, "grad_norm": 0.5717688798904419, "learning_rate": 8.096719525439026e-05, "loss": 1.5929, "step": 5371 }, { "epoch": 0.2994258959924196, "grad_norm": 0.5810229778289795, "learning_rate": 8.096020387099739e-05, "loss": 1.5428, "step": 5372 }, { "epoch": 0.2994816342455827, "grad_norm": 0.5295297503471375, "learning_rate": 8.095321150572738e-05, "loss": 1.5148, "step": 5373 }, { "epoch": 0.2995373724987459, "grad_norm": 0.6027771234512329, "learning_rate": 8.094621815880197e-05, "loss": 1.898, "step": 5374 }, { "epoch": 0.29959311075190903, "grad_norm": 0.5107868909835815, "learning_rate": 8.093922383044293e-05, "loss": 1.4073, "step": 5375 }, { "epoch": 0.29964884900507216, "grad_norm": 0.5989086031913757, "learning_rate": 8.09322285208721e-05, "loss": 1.7551, "step": 5376 }, { "epoch": 0.29970458725823534, "grad_norm": 0.5706072449684143, "learning_rate": 8.092523223031134e-05, "loss": 1.8272, "step": 5377 }, { "epoch": 0.2997603255113985, "grad_norm": 0.5593813061714172, "learning_rate": 8.091823495898251e-05, "loss": 1.6346, "step": 5378 }, { "epoch": 0.2998160637645616, "grad_norm": 0.5510803461074829, "learning_rate": 8.091123670710754e-05, "loss": 1.7025, "step": 5379 }, { "epoch": 0.2998718020177248, "grad_norm": 0.5860506892204285, "learning_rate": 8.090423747490836e-05, "loss": 1.6895, "step": 5380 }, { "epoch": 0.2999275402708879, "grad_norm": 0.5655683875083923, "learning_rate": 8.089723726260696e-05, "loss": 1.8338, "step": 5381 }, { "epoch": 0.29998327852405104, "grad_norm": 0.5369336605072021, "learning_rate": 8.089023607042534e-05, "loss": 1.65, "step": 5382 }, { "epoch": 0.30003901677721423, "grad_norm": 0.5484170317649841, "learning_rate": 8.088323389858552e-05, "loss": 1.433, "step": 5383 }, { "epoch": 0.30009475503037736, "grad_norm": 0.5139251947402954, "learning_rate": 8.08762307473096e-05, "loss": 1.3703, "step": 5384 }, { "epoch": 0.3001504932835405, "grad_norm": 0.6160516142845154, "learning_rate": 8.086922661681966e-05, "loss": 2.1215, "step": 5385 }, { "epoch": 0.3002062315367036, "grad_norm": 0.5299053192138672, "learning_rate": 8.086222150733782e-05, "loss": 1.5703, "step": 5386 }, { "epoch": 0.3002619697898668, "grad_norm": 0.5320441722869873, "learning_rate": 8.085521541908627e-05, "loss": 1.5785, "step": 5387 }, { "epoch": 0.30031770804302993, "grad_norm": 0.5633600354194641, "learning_rate": 8.084820835228717e-05, "loss": 1.799, "step": 5388 }, { "epoch": 0.30037344629619306, "grad_norm": 0.5468734502792358, "learning_rate": 8.084120030716275e-05, "loss": 1.6782, "step": 5389 }, { "epoch": 0.30042918454935624, "grad_norm": 0.5711122751235962, "learning_rate": 8.083419128393528e-05, "loss": 1.6544, "step": 5390 }, { "epoch": 0.30048492280251937, "grad_norm": 0.5407732129096985, "learning_rate": 8.082718128282705e-05, "loss": 1.7962, "step": 5391 }, { "epoch": 0.3005406610556825, "grad_norm": 0.5521290898323059, "learning_rate": 8.082017030406037e-05, "loss": 1.7551, "step": 5392 }, { "epoch": 0.3005963993088457, "grad_norm": 0.5816917419433594, "learning_rate": 8.081315834785756e-05, "loss": 1.8789, "step": 5393 }, { "epoch": 0.3006521375620088, "grad_norm": 0.5271922945976257, "learning_rate": 8.080614541444103e-05, "loss": 1.7545, "step": 5394 }, { "epoch": 0.30070787581517194, "grad_norm": 0.543911337852478, "learning_rate": 8.079913150403318e-05, "loss": 1.6059, "step": 5395 }, { "epoch": 0.3007636140683351, "grad_norm": 0.547044038772583, "learning_rate": 8.079211661685644e-05, "loss": 2.0125, "step": 5396 }, { "epoch": 0.30081935232149826, "grad_norm": 0.6385172605514526, "learning_rate": 8.07851007531333e-05, "loss": 1.8713, "step": 5397 }, { "epoch": 0.3008750905746614, "grad_norm": 0.5882077813148499, "learning_rate": 8.077808391308626e-05, "loss": 1.6547, "step": 5398 }, { "epoch": 0.3009308288278245, "grad_norm": 0.5390593409538269, "learning_rate": 8.077106609693784e-05, "loss": 1.5186, "step": 5399 }, { "epoch": 0.3009865670809877, "grad_norm": 0.5759447813034058, "learning_rate": 8.076404730491061e-05, "loss": 1.8402, "step": 5400 }, { "epoch": 0.30104230533415083, "grad_norm": 0.5196195244789124, "learning_rate": 8.075702753722718e-05, "loss": 1.656, "step": 5401 }, { "epoch": 0.30109804358731396, "grad_norm": 0.5357980728149414, "learning_rate": 8.075000679411014e-05, "loss": 1.6743, "step": 5402 }, { "epoch": 0.30115378184047714, "grad_norm": 0.5370086431503296, "learning_rate": 8.074298507578218e-05, "loss": 1.7567, "step": 5403 }, { "epoch": 0.30120952009364027, "grad_norm": 0.5173280835151672, "learning_rate": 8.073596238246599e-05, "loss": 1.5783, "step": 5404 }, { "epoch": 0.3012652583468034, "grad_norm": 0.5284645557403564, "learning_rate": 8.072893871438428e-05, "loss": 1.7135, "step": 5405 }, { "epoch": 0.3013209965999666, "grad_norm": 0.5838817954063416, "learning_rate": 8.072191407175976e-05, "loss": 1.8845, "step": 5406 }, { "epoch": 0.3013767348531297, "grad_norm": 0.5520975589752197, "learning_rate": 8.071488845481528e-05, "loss": 1.6139, "step": 5407 }, { "epoch": 0.30143247310629284, "grad_norm": 0.5155717730522156, "learning_rate": 8.07078618637736e-05, "loss": 1.4973, "step": 5408 }, { "epoch": 0.30148821135945597, "grad_norm": 0.5581832528114319, "learning_rate": 8.070083429885758e-05, "loss": 1.7224, "step": 5409 }, { "epoch": 0.30154394961261916, "grad_norm": 0.5734993815422058, "learning_rate": 8.069380576029011e-05, "loss": 1.508, "step": 5410 }, { "epoch": 0.3015996878657823, "grad_norm": 0.5819764733314514, "learning_rate": 8.068677624829406e-05, "loss": 2.0365, "step": 5411 }, { "epoch": 0.3016554261189454, "grad_norm": 0.538995623588562, "learning_rate": 8.067974576309241e-05, "loss": 1.8489, "step": 5412 }, { "epoch": 0.3017111643721086, "grad_norm": 0.5447677373886108, "learning_rate": 8.067271430490809e-05, "loss": 1.7361, "step": 5413 }, { "epoch": 0.3017669026252717, "grad_norm": 0.5370633602142334, "learning_rate": 8.066568187396409e-05, "loss": 1.5648, "step": 5414 }, { "epoch": 0.30182264087843486, "grad_norm": 0.5709346532821655, "learning_rate": 8.065864847048346e-05, "loss": 1.7308, "step": 5415 }, { "epoch": 0.30187837913159804, "grad_norm": 0.5642514824867249, "learning_rate": 8.065161409468925e-05, "loss": 1.9456, "step": 5416 }, { "epoch": 0.30193411738476117, "grad_norm": 0.5522916316986084, "learning_rate": 8.064457874680457e-05, "loss": 1.8213, "step": 5417 }, { "epoch": 0.3019898556379243, "grad_norm": 0.5913909077644348, "learning_rate": 8.06375424270525e-05, "loss": 1.8837, "step": 5418 }, { "epoch": 0.3020455938910874, "grad_norm": 0.596079409122467, "learning_rate": 8.063050513565624e-05, "loss": 1.9783, "step": 5419 }, { "epoch": 0.3021013321442506, "grad_norm": 0.5493654012680054, "learning_rate": 8.062346687283892e-05, "loss": 1.8092, "step": 5420 }, { "epoch": 0.30215707039741374, "grad_norm": 0.5493000745773315, "learning_rate": 8.06164276388238e-05, "loss": 1.6994, "step": 5421 }, { "epoch": 0.30221280865057687, "grad_norm": 0.4986167550086975, "learning_rate": 8.060938743383408e-05, "loss": 1.5504, "step": 5422 }, { "epoch": 0.30226854690374005, "grad_norm": 0.5836266875267029, "learning_rate": 8.060234625809306e-05, "loss": 1.8898, "step": 5423 }, { "epoch": 0.3023242851569032, "grad_norm": 0.5557297468185425, "learning_rate": 8.059530411182406e-05, "loss": 1.7518, "step": 5424 }, { "epoch": 0.3023800234100663, "grad_norm": 0.5643293261528015, "learning_rate": 8.058826099525039e-05, "loss": 1.92, "step": 5425 }, { "epoch": 0.3024357616632295, "grad_norm": 0.5600275993347168, "learning_rate": 8.058121690859541e-05, "loss": 1.7421, "step": 5426 }, { "epoch": 0.3024914999163926, "grad_norm": 0.5405864119529724, "learning_rate": 8.057417185208254e-05, "loss": 1.7487, "step": 5427 }, { "epoch": 0.30254723816955575, "grad_norm": 0.5578258633613586, "learning_rate": 8.056712582593519e-05, "loss": 1.7268, "step": 5428 }, { "epoch": 0.30260297642271894, "grad_norm": 0.5377827286720276, "learning_rate": 8.056007883037682e-05, "loss": 1.8249, "step": 5429 }, { "epoch": 0.30265871467588207, "grad_norm": 0.5574936270713806, "learning_rate": 8.055303086563095e-05, "loss": 1.8337, "step": 5430 }, { "epoch": 0.3027144529290452, "grad_norm": 0.594794511795044, "learning_rate": 8.054598193192106e-05, "loss": 2.0531, "step": 5431 }, { "epoch": 0.3027701911822083, "grad_norm": 0.509722888469696, "learning_rate": 8.053893202947074e-05, "loss": 1.6712, "step": 5432 }, { "epoch": 0.3028259294353715, "grad_norm": 0.5056367516517639, "learning_rate": 8.053188115850354e-05, "loss": 1.5738, "step": 5433 }, { "epoch": 0.30288166768853464, "grad_norm": 0.5353802442550659, "learning_rate": 8.052482931924308e-05, "loss": 1.8257, "step": 5434 }, { "epoch": 0.30293740594169777, "grad_norm": 0.535033106803894, "learning_rate": 8.051777651191299e-05, "loss": 1.7261, "step": 5435 }, { "epoch": 0.30299314419486095, "grad_norm": 0.5537331700325012, "learning_rate": 8.051072273673698e-05, "loss": 1.7634, "step": 5436 }, { "epoch": 0.3030488824480241, "grad_norm": 0.538147509098053, "learning_rate": 8.050366799393874e-05, "loss": 1.5592, "step": 5437 }, { "epoch": 0.3031046207011872, "grad_norm": 0.5110997557640076, "learning_rate": 8.049661228374199e-05, "loss": 1.7104, "step": 5438 }, { "epoch": 0.3031603589543504, "grad_norm": 0.5138676166534424, "learning_rate": 8.04895556063705e-05, "loss": 1.7344, "step": 5439 }, { "epoch": 0.3032160972075135, "grad_norm": 0.5240350961685181, "learning_rate": 8.048249796204808e-05, "loss": 1.6345, "step": 5440 }, { "epoch": 0.30327183546067665, "grad_norm": 0.5258268713951111, "learning_rate": 8.047543935099855e-05, "loss": 1.542, "step": 5441 }, { "epoch": 0.3033275737138398, "grad_norm": 0.5549874901771545, "learning_rate": 8.046837977344577e-05, "loss": 1.8106, "step": 5442 }, { "epoch": 0.30338331196700297, "grad_norm": 0.5787036418914795, "learning_rate": 8.046131922961362e-05, "loss": 1.8995, "step": 5443 }, { "epoch": 0.3034390502201661, "grad_norm": 0.5319430828094482, "learning_rate": 8.045425771972603e-05, "loss": 1.471, "step": 5444 }, { "epoch": 0.3034947884733292, "grad_norm": 0.5467014312744141, "learning_rate": 8.044719524400694e-05, "loss": 1.6613, "step": 5445 }, { "epoch": 0.3035505267264924, "grad_norm": 0.5461364388465881, "learning_rate": 8.044013180268034e-05, "loss": 1.7442, "step": 5446 }, { "epoch": 0.30360626497965554, "grad_norm": 0.5711673498153687, "learning_rate": 8.043306739597024e-05, "loss": 1.7848, "step": 5447 }, { "epoch": 0.30366200323281867, "grad_norm": 0.5382382273674011, "learning_rate": 8.042600202410066e-05, "loss": 1.5744, "step": 5448 }, { "epoch": 0.30371774148598185, "grad_norm": 0.5482212901115417, "learning_rate": 8.041893568729573e-05, "loss": 1.6689, "step": 5449 }, { "epoch": 0.303773479739145, "grad_norm": 0.5345839262008667, "learning_rate": 8.041186838577949e-05, "loss": 1.6285, "step": 5450 }, { "epoch": 0.3038292179923081, "grad_norm": 0.5510614514350891, "learning_rate": 8.04048001197761e-05, "loss": 1.5176, "step": 5451 }, { "epoch": 0.3038849562454713, "grad_norm": 0.5475590825080872, "learning_rate": 8.039773088950973e-05, "loss": 1.6778, "step": 5452 }, { "epoch": 0.3039406944986344, "grad_norm": 0.5662024021148682, "learning_rate": 8.039066069520455e-05, "loss": 1.9253, "step": 5453 }, { "epoch": 0.30399643275179755, "grad_norm": 0.6412192583084106, "learning_rate": 8.038358953708482e-05, "loss": 1.8921, "step": 5454 }, { "epoch": 0.3040521710049607, "grad_norm": 0.5427385568618774, "learning_rate": 8.037651741537478e-05, "loss": 1.6157, "step": 5455 }, { "epoch": 0.30410790925812387, "grad_norm": 0.5492942333221436, "learning_rate": 8.03694443302987e-05, "loss": 1.6204, "step": 5456 }, { "epoch": 0.304163647511287, "grad_norm": 0.5571532249450684, "learning_rate": 8.036237028208092e-05, "loss": 1.6984, "step": 5457 }, { "epoch": 0.3042193857644501, "grad_norm": 0.5320706963539124, "learning_rate": 8.035529527094578e-05, "loss": 1.5733, "step": 5458 }, { "epoch": 0.3042751240176133, "grad_norm": 0.5525981187820435, "learning_rate": 8.034821929711767e-05, "loss": 1.6158, "step": 5459 }, { "epoch": 0.30433086227077644, "grad_norm": 0.5780904293060303, "learning_rate": 8.034114236082098e-05, "loss": 1.8269, "step": 5460 }, { "epoch": 0.30438660052393957, "grad_norm": 0.5405531525611877, "learning_rate": 8.033406446228014e-05, "loss": 1.8742, "step": 5461 }, { "epoch": 0.30444233877710275, "grad_norm": 0.5742613077163696, "learning_rate": 8.032698560171964e-05, "loss": 1.9496, "step": 5462 }, { "epoch": 0.3044980770302659, "grad_norm": 0.49316903948783875, "learning_rate": 8.031990577936398e-05, "loss": 1.5899, "step": 5463 }, { "epoch": 0.304553815283429, "grad_norm": 0.5170844197273254, "learning_rate": 8.031282499543769e-05, "loss": 1.6575, "step": 5464 }, { "epoch": 0.30460955353659214, "grad_norm": 0.5051673650741577, "learning_rate": 8.030574325016532e-05, "loss": 1.5878, "step": 5465 }, { "epoch": 0.3046652917897553, "grad_norm": 0.493794709444046, "learning_rate": 8.029866054377148e-05, "loss": 1.5681, "step": 5466 }, { "epoch": 0.30472103004291845, "grad_norm": 0.5372213125228882, "learning_rate": 8.029157687648077e-05, "loss": 1.6819, "step": 5467 }, { "epoch": 0.3047767682960816, "grad_norm": 0.559104323387146, "learning_rate": 8.028449224851785e-05, "loss": 1.8688, "step": 5468 }, { "epoch": 0.30483250654924476, "grad_norm": 0.558225691318512, "learning_rate": 8.027740666010741e-05, "loss": 1.7629, "step": 5469 }, { "epoch": 0.3048882448024079, "grad_norm": 0.511577844619751, "learning_rate": 8.027032011147417e-05, "loss": 1.594, "step": 5470 }, { "epoch": 0.304943983055571, "grad_norm": 0.5308223962783813, "learning_rate": 8.026323260284286e-05, "loss": 1.6677, "step": 5471 }, { "epoch": 0.3049997213087342, "grad_norm": 0.5670995712280273, "learning_rate": 8.025614413443824e-05, "loss": 1.5382, "step": 5472 }, { "epoch": 0.30505545956189734, "grad_norm": 0.553377091884613, "learning_rate": 8.024905470648516e-05, "loss": 1.59, "step": 5473 }, { "epoch": 0.30511119781506046, "grad_norm": 0.5147939324378967, "learning_rate": 8.024196431920841e-05, "loss": 1.6797, "step": 5474 }, { "epoch": 0.30516693606822365, "grad_norm": 0.5732524394989014, "learning_rate": 8.023487297283289e-05, "loss": 1.7703, "step": 5475 }, { "epoch": 0.3052226743213868, "grad_norm": 0.5088878870010376, "learning_rate": 8.022778066758348e-05, "loss": 1.5239, "step": 5476 }, { "epoch": 0.3052784125745499, "grad_norm": 0.5896703600883484, "learning_rate": 8.02206874036851e-05, "loss": 1.8356, "step": 5477 }, { "epoch": 0.30533415082771304, "grad_norm": 0.5752948522567749, "learning_rate": 8.021359318136273e-05, "loss": 1.8527, "step": 5478 }, { "epoch": 0.3053898890808762, "grad_norm": 0.5507591366767883, "learning_rate": 8.020649800084133e-05, "loss": 1.7682, "step": 5479 }, { "epoch": 0.30544562733403935, "grad_norm": 0.5891523957252502, "learning_rate": 8.019940186234591e-05, "loss": 1.7112, "step": 5480 }, { "epoch": 0.3055013655872025, "grad_norm": 0.5745503306388855, "learning_rate": 8.019230476610155e-05, "loss": 1.7824, "step": 5481 }, { "epoch": 0.30555710384036566, "grad_norm": 0.6154142022132874, "learning_rate": 8.018520671233333e-05, "loss": 1.8217, "step": 5482 }, { "epoch": 0.3056128420935288, "grad_norm": 0.5336470603942871, "learning_rate": 8.017810770126633e-05, "loss": 1.572, "step": 5483 }, { "epoch": 0.3056685803466919, "grad_norm": 0.6083388328552246, "learning_rate": 8.017100773312572e-05, "loss": 1.8889, "step": 5484 }, { "epoch": 0.3057243185998551, "grad_norm": 0.5398688912391663, "learning_rate": 8.016390680813664e-05, "loss": 1.8318, "step": 5485 }, { "epoch": 0.30578005685301823, "grad_norm": 0.5180187225341797, "learning_rate": 8.015680492652432e-05, "loss": 1.4898, "step": 5486 }, { "epoch": 0.30583579510618136, "grad_norm": 0.5112860798835754, "learning_rate": 8.014970208851395e-05, "loss": 1.622, "step": 5487 }, { "epoch": 0.3058915333593445, "grad_norm": 0.5450818538665771, "learning_rate": 8.014259829433082e-05, "loss": 1.5932, "step": 5488 }, { "epoch": 0.3059472716125077, "grad_norm": 0.5598384737968445, "learning_rate": 8.013549354420022e-05, "loss": 1.7663, "step": 5489 }, { "epoch": 0.3060030098656708, "grad_norm": 0.574329137802124, "learning_rate": 8.012838783834749e-05, "loss": 1.7812, "step": 5490 }, { "epoch": 0.30605874811883393, "grad_norm": 0.5636276006698608, "learning_rate": 8.012128117699793e-05, "loss": 1.8031, "step": 5491 }, { "epoch": 0.3061144863719971, "grad_norm": 0.5229976177215576, "learning_rate": 8.011417356037697e-05, "loss": 1.7483, "step": 5492 }, { "epoch": 0.30617022462516025, "grad_norm": 0.5263829231262207, "learning_rate": 8.010706498870997e-05, "loss": 1.6449, "step": 5493 }, { "epoch": 0.3062259628783234, "grad_norm": 0.5461215376853943, "learning_rate": 8.009995546222242e-05, "loss": 1.5837, "step": 5494 }, { "epoch": 0.30628170113148656, "grad_norm": 0.541483998298645, "learning_rate": 8.009284498113979e-05, "loss": 1.7239, "step": 5495 }, { "epoch": 0.3063374393846497, "grad_norm": 0.540389358997345, "learning_rate": 8.008573354568756e-05, "loss": 1.6928, "step": 5496 }, { "epoch": 0.3063931776378128, "grad_norm": 0.550672709941864, "learning_rate": 8.007862115609129e-05, "loss": 1.7299, "step": 5497 }, { "epoch": 0.306448915890976, "grad_norm": 0.532590389251709, "learning_rate": 8.007150781257651e-05, "loss": 1.6299, "step": 5498 }, { "epoch": 0.30650465414413913, "grad_norm": 0.5489155650138855, "learning_rate": 8.006439351536883e-05, "loss": 1.6814, "step": 5499 }, { "epoch": 0.30656039239730226, "grad_norm": 0.5809459090232849, "learning_rate": 8.005727826469389e-05, "loss": 1.7617, "step": 5500 }, { "epoch": 0.3066161306504654, "grad_norm": 0.5688945055007935, "learning_rate": 8.005016206077731e-05, "loss": 1.913, "step": 5501 }, { "epoch": 0.3066718689036286, "grad_norm": 0.5430113673210144, "learning_rate": 8.004304490384482e-05, "loss": 1.6782, "step": 5502 }, { "epoch": 0.3067276071567917, "grad_norm": 0.5550969243049622, "learning_rate": 8.003592679412208e-05, "loss": 1.4965, "step": 5503 }, { "epoch": 0.30678334540995483, "grad_norm": 0.5173535943031311, "learning_rate": 8.00288077318349e-05, "loss": 1.4724, "step": 5504 }, { "epoch": 0.306839083663118, "grad_norm": 0.5464041233062744, "learning_rate": 8.0021687717209e-05, "loss": 1.6722, "step": 5505 }, { "epoch": 0.30689482191628115, "grad_norm": 0.5555015206336975, "learning_rate": 8.001456675047019e-05, "loss": 1.8088, "step": 5506 }, { "epoch": 0.3069505601694443, "grad_norm": 0.5883082747459412, "learning_rate": 8.000744483184433e-05, "loss": 1.5916, "step": 5507 }, { "epoch": 0.30700629842260746, "grad_norm": 0.5937238931655884, "learning_rate": 8.000032196155726e-05, "loss": 1.8253, "step": 5508 }, { "epoch": 0.3070620366757706, "grad_norm": 0.5752248764038086, "learning_rate": 7.999319813983492e-05, "loss": 1.7183, "step": 5509 }, { "epoch": 0.3071177749289337, "grad_norm": 0.5927345156669617, "learning_rate": 7.99860733669032e-05, "loss": 1.8415, "step": 5510 }, { "epoch": 0.30717351318209685, "grad_norm": 0.597845196723938, "learning_rate": 7.997894764298806e-05, "loss": 1.8575, "step": 5511 }, { "epoch": 0.30722925143526003, "grad_norm": 0.5484491586685181, "learning_rate": 7.997182096831548e-05, "loss": 1.6398, "step": 5512 }, { "epoch": 0.30728498968842316, "grad_norm": 0.5977261662483215, "learning_rate": 7.99646933431115e-05, "loss": 2.0446, "step": 5513 }, { "epoch": 0.3073407279415863, "grad_norm": 0.5897913575172424, "learning_rate": 7.995756476760214e-05, "loss": 1.7335, "step": 5514 }, { "epoch": 0.3073964661947495, "grad_norm": 0.5303786396980286, "learning_rate": 7.995043524201351e-05, "loss": 1.6374, "step": 5515 }, { "epoch": 0.3074522044479126, "grad_norm": 0.6054732799530029, "learning_rate": 7.994330476657168e-05, "loss": 1.8542, "step": 5516 }, { "epoch": 0.30750794270107573, "grad_norm": 0.5825492739677429, "learning_rate": 7.993617334150282e-05, "loss": 1.74, "step": 5517 }, { "epoch": 0.3075636809542389, "grad_norm": 0.5496809482574463, "learning_rate": 7.992904096703307e-05, "loss": 1.6844, "step": 5518 }, { "epoch": 0.30761941920740205, "grad_norm": 0.5574871301651001, "learning_rate": 7.992190764338864e-05, "loss": 1.7397, "step": 5519 }, { "epoch": 0.3076751574605652, "grad_norm": 0.5654902458190918, "learning_rate": 7.991477337079576e-05, "loss": 1.7361, "step": 5520 }, { "epoch": 0.30773089571372836, "grad_norm": 0.5748382806777954, "learning_rate": 7.990763814948068e-05, "loss": 1.8819, "step": 5521 }, { "epoch": 0.3077866339668915, "grad_norm": 0.5120726823806763, "learning_rate": 7.99005019796697e-05, "loss": 1.5405, "step": 5522 }, { "epoch": 0.3078423722200546, "grad_norm": 0.5529910326004028, "learning_rate": 7.989336486158912e-05, "loss": 1.6712, "step": 5523 }, { "epoch": 0.30789811047321775, "grad_norm": 0.5775067210197449, "learning_rate": 7.988622679546529e-05, "loss": 2.0319, "step": 5524 }, { "epoch": 0.30795384872638093, "grad_norm": 0.5432143211364746, "learning_rate": 7.987908778152462e-05, "loss": 1.5891, "step": 5525 }, { "epoch": 0.30800958697954406, "grad_norm": 0.5764423608779907, "learning_rate": 7.987194781999345e-05, "loss": 1.865, "step": 5526 }, { "epoch": 0.3080653252327072, "grad_norm": 0.5256220698356628, "learning_rate": 7.98648069110983e-05, "loss": 1.5777, "step": 5527 }, { "epoch": 0.3081210634858704, "grad_norm": 0.5597642064094543, "learning_rate": 7.985766505506559e-05, "loss": 1.8957, "step": 5528 }, { "epoch": 0.3081768017390335, "grad_norm": 0.5411173701286316, "learning_rate": 7.985052225212181e-05, "loss": 1.7575, "step": 5529 }, { "epoch": 0.30823253999219663, "grad_norm": 0.5252230763435364, "learning_rate": 7.984337850249352e-05, "loss": 1.7377, "step": 5530 }, { "epoch": 0.3082882782453598, "grad_norm": 0.5985997915267944, "learning_rate": 7.983623380640729e-05, "loss": 1.7941, "step": 5531 }, { "epoch": 0.30834401649852294, "grad_norm": 0.5696808099746704, "learning_rate": 7.982908816408963e-05, "loss": 1.8425, "step": 5532 }, { "epoch": 0.3083997547516861, "grad_norm": 0.5184767246246338, "learning_rate": 7.982194157576723e-05, "loss": 1.6765, "step": 5533 }, { "epoch": 0.3084554930048492, "grad_norm": 0.5509563088417053, "learning_rate": 7.981479404166672e-05, "loss": 1.8554, "step": 5534 }, { "epoch": 0.3085112312580124, "grad_norm": 0.5477381944656372, "learning_rate": 7.980764556201478e-05, "loss": 1.6513, "step": 5535 }, { "epoch": 0.3085669695111755, "grad_norm": 0.5575202107429504, "learning_rate": 7.980049613703811e-05, "loss": 1.7565, "step": 5536 }, { "epoch": 0.30862270776433864, "grad_norm": 0.578071117401123, "learning_rate": 7.979334576696344e-05, "loss": 1.6711, "step": 5537 }, { "epoch": 0.30867844601750183, "grad_norm": 0.5293973684310913, "learning_rate": 7.978619445201756e-05, "loss": 1.8865, "step": 5538 }, { "epoch": 0.30873418427066496, "grad_norm": 0.5793629288673401, "learning_rate": 7.977904219242724e-05, "loss": 1.9338, "step": 5539 }, { "epoch": 0.3087899225238281, "grad_norm": 0.5701123476028442, "learning_rate": 7.977188898841936e-05, "loss": 1.778, "step": 5540 }, { "epoch": 0.30884566077699127, "grad_norm": 0.5166484117507935, "learning_rate": 7.976473484022071e-05, "loss": 1.6528, "step": 5541 }, { "epoch": 0.3089013990301544, "grad_norm": 0.5501734018325806, "learning_rate": 7.975757974805824e-05, "loss": 1.6939, "step": 5542 }, { "epoch": 0.30895713728331753, "grad_norm": 0.5325387716293335, "learning_rate": 7.975042371215881e-05, "loss": 1.5085, "step": 5543 }, { "epoch": 0.3090128755364807, "grad_norm": 0.5717397928237915, "learning_rate": 7.974326673274943e-05, "loss": 1.7745, "step": 5544 }, { "epoch": 0.30906861378964384, "grad_norm": 0.5344177484512329, "learning_rate": 7.973610881005702e-05, "loss": 1.6344, "step": 5545 }, { "epoch": 0.30912435204280697, "grad_norm": 0.5647115707397461, "learning_rate": 7.972894994430862e-05, "loss": 1.8173, "step": 5546 }, { "epoch": 0.3091800902959701, "grad_norm": 0.5356699824333191, "learning_rate": 7.972179013573125e-05, "loss": 1.6173, "step": 5547 }, { "epoch": 0.3092358285491333, "grad_norm": 0.5651494860649109, "learning_rate": 7.971462938455199e-05, "loss": 1.5781, "step": 5548 }, { "epoch": 0.3092915668022964, "grad_norm": 0.5726121664047241, "learning_rate": 7.970746769099795e-05, "loss": 1.5528, "step": 5549 }, { "epoch": 0.30934730505545954, "grad_norm": 0.6116449236869812, "learning_rate": 7.970030505529624e-05, "loss": 1.9145, "step": 5550 }, { "epoch": 0.3094030433086227, "grad_norm": 0.5738492012023926, "learning_rate": 7.969314147767399e-05, "loss": 1.7875, "step": 5551 }, { "epoch": 0.30945878156178586, "grad_norm": 0.5894981026649475, "learning_rate": 7.968597695835844e-05, "loss": 1.5879, "step": 5552 }, { "epoch": 0.309514519814949, "grad_norm": 0.5126131772994995, "learning_rate": 7.967881149757678e-05, "loss": 1.6178, "step": 5553 }, { "epoch": 0.30957025806811217, "grad_norm": 0.5616469979286194, "learning_rate": 7.967164509555624e-05, "loss": 1.7701, "step": 5554 }, { "epoch": 0.3096259963212753, "grad_norm": 0.5041468739509583, "learning_rate": 7.966447775252415e-05, "loss": 1.5632, "step": 5555 }, { "epoch": 0.3096817345744384, "grad_norm": 0.5093483328819275, "learning_rate": 7.965730946870775e-05, "loss": 1.7161, "step": 5556 }, { "epoch": 0.30973747282760156, "grad_norm": 0.6104699373245239, "learning_rate": 7.965014024433443e-05, "loss": 1.7959, "step": 5557 }, { "epoch": 0.30979321108076474, "grad_norm": 0.5576456189155579, "learning_rate": 7.964297007963151e-05, "loss": 1.8631, "step": 5558 }, { "epoch": 0.30984894933392787, "grad_norm": 0.5558076500892639, "learning_rate": 7.963579897482642e-05, "loss": 1.7503, "step": 5559 }, { "epoch": 0.309904687587091, "grad_norm": 0.5433835983276367, "learning_rate": 7.96286269301466e-05, "loss": 1.6935, "step": 5560 }, { "epoch": 0.3099604258402542, "grad_norm": 0.5542037487030029, "learning_rate": 7.962145394581944e-05, "loss": 1.7342, "step": 5561 }, { "epoch": 0.3100161640934173, "grad_norm": 0.5680848360061646, "learning_rate": 7.961428002207249e-05, "loss": 1.6875, "step": 5562 }, { "epoch": 0.31007190234658044, "grad_norm": 0.5349116921424866, "learning_rate": 7.960710515913323e-05, "loss": 1.6991, "step": 5563 }, { "epoch": 0.3101276405997436, "grad_norm": 0.5729091167449951, "learning_rate": 7.959992935722924e-05, "loss": 1.8622, "step": 5564 }, { "epoch": 0.31018337885290675, "grad_norm": 0.558594286441803, "learning_rate": 7.959275261658804e-05, "loss": 1.8244, "step": 5565 }, { "epoch": 0.3102391171060699, "grad_norm": 0.5720626711845398, "learning_rate": 7.958557493743728e-05, "loss": 1.796, "step": 5566 }, { "epoch": 0.31029485535923307, "grad_norm": 0.7089996933937073, "learning_rate": 7.957839632000457e-05, "loss": 2.2928, "step": 5567 }, { "epoch": 0.3103505936123962, "grad_norm": 0.51308274269104, "learning_rate": 7.957121676451759e-05, "loss": 1.5466, "step": 5568 }, { "epoch": 0.3104063318655593, "grad_norm": 0.5389419794082642, "learning_rate": 7.956403627120403e-05, "loss": 1.7847, "step": 5569 }, { "epoch": 0.31046207011872246, "grad_norm": 0.5362538695335388, "learning_rate": 7.95568548402916e-05, "loss": 1.752, "step": 5570 }, { "epoch": 0.31051780837188564, "grad_norm": 0.5565882921218872, "learning_rate": 7.954967247200806e-05, "loss": 1.7436, "step": 5571 }, { "epoch": 0.31057354662504877, "grad_norm": 0.5700491070747375, "learning_rate": 7.95424891665812e-05, "loss": 1.3893, "step": 5572 }, { "epoch": 0.3106292848782119, "grad_norm": 0.5634492635726929, "learning_rate": 7.953530492423884e-05, "loss": 1.5228, "step": 5573 }, { "epoch": 0.3106850231313751, "grad_norm": 0.5454849004745483, "learning_rate": 7.95281197452088e-05, "loss": 1.7454, "step": 5574 }, { "epoch": 0.3107407613845382, "grad_norm": 0.5382822751998901, "learning_rate": 7.952093362971897e-05, "loss": 1.6264, "step": 5575 }, { "epoch": 0.31079649963770134, "grad_norm": 0.5650563836097717, "learning_rate": 7.951374657799724e-05, "loss": 1.4175, "step": 5576 }, { "epoch": 0.3108522378908645, "grad_norm": 0.570775032043457, "learning_rate": 7.950655859027154e-05, "loss": 1.6686, "step": 5577 }, { "epoch": 0.31090797614402765, "grad_norm": 0.5498449206352234, "learning_rate": 7.949936966676984e-05, "loss": 1.7351, "step": 5578 }, { "epoch": 0.3109637143971908, "grad_norm": 0.6256487369537354, "learning_rate": 7.949217980772012e-05, "loss": 1.9914, "step": 5579 }, { "epoch": 0.3110194526503539, "grad_norm": 0.6062150001525879, "learning_rate": 7.948498901335042e-05, "loss": 1.9362, "step": 5580 }, { "epoch": 0.3110751909035171, "grad_norm": 0.5351932048797607, "learning_rate": 7.947779728388878e-05, "loss": 1.6922, "step": 5581 }, { "epoch": 0.3111309291566802, "grad_norm": 0.6049745678901672, "learning_rate": 7.947060461956329e-05, "loss": 2.146, "step": 5582 }, { "epoch": 0.31118666740984335, "grad_norm": 0.5465789437294006, "learning_rate": 7.946341102060202e-05, "loss": 1.7858, "step": 5583 }, { "epoch": 0.31124240566300654, "grad_norm": 0.5127213597297668, "learning_rate": 7.945621648723313e-05, "loss": 1.6921, "step": 5584 }, { "epoch": 0.31129814391616967, "grad_norm": 0.5576222538948059, "learning_rate": 7.944902101968482e-05, "loss": 1.7601, "step": 5585 }, { "epoch": 0.3113538821693328, "grad_norm": 0.5145538449287415, "learning_rate": 7.944182461818525e-05, "loss": 1.6861, "step": 5586 }, { "epoch": 0.311409620422496, "grad_norm": 0.5060127973556519, "learning_rate": 7.943462728296266e-05, "loss": 1.4954, "step": 5587 }, { "epoch": 0.3114653586756591, "grad_norm": 0.5226243138313293, "learning_rate": 7.942742901424531e-05, "loss": 1.7086, "step": 5588 }, { "epoch": 0.31152109692882224, "grad_norm": 0.5711196064949036, "learning_rate": 7.942022981226149e-05, "loss": 1.7788, "step": 5589 }, { "epoch": 0.3115768351819854, "grad_norm": 0.511813759803772, "learning_rate": 7.941302967723951e-05, "loss": 1.3316, "step": 5590 }, { "epoch": 0.31163257343514855, "grad_norm": 0.5399052500724792, "learning_rate": 7.940582860940771e-05, "loss": 1.6683, "step": 5591 }, { "epoch": 0.3116883116883117, "grad_norm": 0.5305676460266113, "learning_rate": 7.939862660899448e-05, "loss": 1.7344, "step": 5592 }, { "epoch": 0.3117440499414748, "grad_norm": 0.5254833698272705, "learning_rate": 7.939142367622823e-05, "loss": 1.5524, "step": 5593 }, { "epoch": 0.311799788194638, "grad_norm": 0.5858429074287415, "learning_rate": 7.938421981133738e-05, "loss": 1.7415, "step": 5594 }, { "epoch": 0.3118555264478011, "grad_norm": 0.6082313656806946, "learning_rate": 7.937701501455039e-05, "loss": 1.5333, "step": 5595 }, { "epoch": 0.31191126470096425, "grad_norm": 0.5757048726081848, "learning_rate": 7.936980928609577e-05, "loss": 1.8723, "step": 5596 }, { "epoch": 0.31196700295412744, "grad_norm": 0.6089504957199097, "learning_rate": 7.936260262620205e-05, "loss": 1.8915, "step": 5597 }, { "epoch": 0.31202274120729057, "grad_norm": 0.588326096534729, "learning_rate": 7.935539503509775e-05, "loss": 1.8353, "step": 5598 }, { "epoch": 0.3120784794604537, "grad_norm": 0.5930234789848328, "learning_rate": 7.934818651301148e-05, "loss": 1.832, "step": 5599 }, { "epoch": 0.3121342177136169, "grad_norm": 0.5394973158836365, "learning_rate": 7.934097706017185e-05, "loss": 1.7301, "step": 5600 }, { "epoch": 0.31218995596678, "grad_norm": 0.5147609114646912, "learning_rate": 7.93337666768075e-05, "loss": 1.7095, "step": 5601 }, { "epoch": 0.31224569421994314, "grad_norm": 0.5531661510467529, "learning_rate": 7.932655536314708e-05, "loss": 1.6071, "step": 5602 }, { "epoch": 0.31230143247310627, "grad_norm": 0.5388891696929932, "learning_rate": 7.931934311941933e-05, "loss": 1.5759, "step": 5603 }, { "epoch": 0.31235717072626945, "grad_norm": 0.5236558318138123, "learning_rate": 7.931212994585294e-05, "loss": 1.5492, "step": 5604 }, { "epoch": 0.3124129089794326, "grad_norm": 0.6088682413101196, "learning_rate": 7.93049158426767e-05, "loss": 1.7768, "step": 5605 }, { "epoch": 0.3124686472325957, "grad_norm": 0.5254512429237366, "learning_rate": 7.92977008101194e-05, "loss": 1.6003, "step": 5606 }, { "epoch": 0.3125243854857589, "grad_norm": 0.5747987031936646, "learning_rate": 7.929048484840984e-05, "loss": 1.7666, "step": 5607 }, { "epoch": 0.312580123738922, "grad_norm": 0.5682463645935059, "learning_rate": 7.928326795777688e-05, "loss": 1.7861, "step": 5608 }, { "epoch": 0.31263586199208515, "grad_norm": 0.5339683890342712, "learning_rate": 7.927605013844939e-05, "loss": 1.614, "step": 5609 }, { "epoch": 0.31269160024524834, "grad_norm": 0.5913909673690796, "learning_rate": 7.926883139065627e-05, "loss": 1.7949, "step": 5610 }, { "epoch": 0.31274733849841146, "grad_norm": 0.5656397342681885, "learning_rate": 7.926161171462648e-05, "loss": 1.8147, "step": 5611 }, { "epoch": 0.3128030767515746, "grad_norm": 0.5707045197486877, "learning_rate": 7.925439111058897e-05, "loss": 1.7117, "step": 5612 }, { "epoch": 0.3128588150047378, "grad_norm": 0.5682026743888855, "learning_rate": 7.924716957877275e-05, "loss": 1.6873, "step": 5613 }, { "epoch": 0.3129145532579009, "grad_norm": 0.6239393353462219, "learning_rate": 7.92399471194068e-05, "loss": 2.136, "step": 5614 }, { "epoch": 0.31297029151106404, "grad_norm": 0.5405849814414978, "learning_rate": 7.923272373272024e-05, "loss": 1.7105, "step": 5615 }, { "epoch": 0.31302602976422716, "grad_norm": 0.5093609094619751, "learning_rate": 7.922549941894212e-05, "loss": 1.7117, "step": 5616 }, { "epoch": 0.31308176801739035, "grad_norm": 0.5615028738975525, "learning_rate": 7.921827417830155e-05, "loss": 1.7621, "step": 5617 }, { "epoch": 0.3131375062705535, "grad_norm": 0.5841954946517944, "learning_rate": 7.921104801102766e-05, "loss": 1.7155, "step": 5618 }, { "epoch": 0.3131932445237166, "grad_norm": 0.5684096217155457, "learning_rate": 7.920382091734966e-05, "loss": 1.5615, "step": 5619 }, { "epoch": 0.3132489827768798, "grad_norm": 0.5647116303443909, "learning_rate": 7.919659289749673e-05, "loss": 1.6964, "step": 5620 }, { "epoch": 0.3133047210300429, "grad_norm": 0.5479496121406555, "learning_rate": 7.918936395169809e-05, "loss": 1.6701, "step": 5621 }, { "epoch": 0.31336045928320605, "grad_norm": 0.5465035438537598, "learning_rate": 7.918213408018302e-05, "loss": 1.8372, "step": 5622 }, { "epoch": 0.31341619753636923, "grad_norm": 0.5440232157707214, "learning_rate": 7.91749032831808e-05, "loss": 1.6181, "step": 5623 }, { "epoch": 0.31347193578953236, "grad_norm": 0.5956066846847534, "learning_rate": 7.916767156092073e-05, "loss": 1.8816, "step": 5624 }, { "epoch": 0.3135276740426955, "grad_norm": 0.4970141053199768, "learning_rate": 7.916043891363221e-05, "loss": 1.331, "step": 5625 }, { "epoch": 0.3135834122958586, "grad_norm": 0.5314142107963562, "learning_rate": 7.915320534154457e-05, "loss": 1.7526, "step": 5626 }, { "epoch": 0.3136391505490218, "grad_norm": 0.5765748620033264, "learning_rate": 7.914597084488723e-05, "loss": 1.7204, "step": 5627 }, { "epoch": 0.31369488880218493, "grad_norm": 0.5975958704948425, "learning_rate": 7.913873542388963e-05, "loss": 1.8833, "step": 5628 }, { "epoch": 0.31375062705534806, "grad_norm": 0.5788082480430603, "learning_rate": 7.913149907878123e-05, "loss": 1.9049, "step": 5629 }, { "epoch": 0.31380636530851125, "grad_norm": 0.6019555330276489, "learning_rate": 7.912426180979152e-05, "loss": 2.005, "step": 5630 }, { "epoch": 0.3138621035616744, "grad_norm": 0.5763736963272095, "learning_rate": 7.911702361715006e-05, "loss": 1.7476, "step": 5631 }, { "epoch": 0.3139178418148375, "grad_norm": 0.5758547782897949, "learning_rate": 7.910978450108634e-05, "loss": 1.69, "step": 5632 }, { "epoch": 0.3139735800680007, "grad_norm": 0.5762767791748047, "learning_rate": 7.910254446183e-05, "loss": 1.7354, "step": 5633 }, { "epoch": 0.3140293183211638, "grad_norm": 0.5475091338157654, "learning_rate": 7.909530349961062e-05, "loss": 1.803, "step": 5634 }, { "epoch": 0.31408505657432695, "grad_norm": 0.5797522664070129, "learning_rate": 7.908806161465785e-05, "loss": 1.8425, "step": 5635 }, { "epoch": 0.31414079482749013, "grad_norm": 0.5494913458824158, "learning_rate": 7.908081880720137e-05, "loss": 1.7041, "step": 5636 }, { "epoch": 0.31419653308065326, "grad_norm": 0.5253703594207764, "learning_rate": 7.907357507747087e-05, "loss": 1.5982, "step": 5637 }, { "epoch": 0.3142522713338164, "grad_norm": 0.5663535594940186, "learning_rate": 7.906633042569607e-05, "loss": 1.6506, "step": 5638 }, { "epoch": 0.3143080095869795, "grad_norm": 0.5768305659294128, "learning_rate": 7.905908485210674e-05, "loss": 1.675, "step": 5639 }, { "epoch": 0.3143637478401427, "grad_norm": 0.5730108022689819, "learning_rate": 7.905183835693266e-05, "loss": 1.6702, "step": 5640 }, { "epoch": 0.31441948609330583, "grad_norm": 0.5377948880195618, "learning_rate": 7.904459094040366e-05, "loss": 1.8156, "step": 5641 }, { "epoch": 0.31447522434646896, "grad_norm": 0.5925690531730652, "learning_rate": 7.903734260274958e-05, "loss": 1.8198, "step": 5642 }, { "epoch": 0.31453096259963215, "grad_norm": 0.5221425294876099, "learning_rate": 7.903009334420027e-05, "loss": 1.6291, "step": 5643 }, { "epoch": 0.3145867008527953, "grad_norm": 0.5379535555839539, "learning_rate": 7.902284316498567e-05, "loss": 1.6026, "step": 5644 }, { "epoch": 0.3146424391059584, "grad_norm": 0.5477253198623657, "learning_rate": 7.901559206533571e-05, "loss": 1.9096, "step": 5645 }, { "epoch": 0.3146981773591216, "grad_norm": 0.6306549310684204, "learning_rate": 7.900834004548034e-05, "loss": 1.9637, "step": 5646 }, { "epoch": 0.3147539156122847, "grad_norm": 0.5738115906715393, "learning_rate": 7.900108710564954e-05, "loss": 1.8217, "step": 5647 }, { "epoch": 0.31480965386544785, "grad_norm": 0.5737825036048889, "learning_rate": 7.899383324607336e-05, "loss": 1.7018, "step": 5648 }, { "epoch": 0.314865392118611, "grad_norm": 0.5575332641601562, "learning_rate": 7.898657846698183e-05, "loss": 1.823, "step": 5649 }, { "epoch": 0.31492113037177416, "grad_norm": 0.5665508508682251, "learning_rate": 7.897932276860502e-05, "loss": 1.8531, "step": 5650 }, { "epoch": 0.3149768686249373, "grad_norm": 0.6147223711013794, "learning_rate": 7.897206615117307e-05, "loss": 1.8, "step": 5651 }, { "epoch": 0.3150326068781004, "grad_norm": 0.5605811476707458, "learning_rate": 7.89648086149161e-05, "loss": 1.8554, "step": 5652 }, { "epoch": 0.3150883451312636, "grad_norm": 0.5749962329864502, "learning_rate": 7.895755016006427e-05, "loss": 1.9814, "step": 5653 }, { "epoch": 0.31514408338442673, "grad_norm": 0.6655054688453674, "learning_rate": 7.895029078684779e-05, "loss": 1.6895, "step": 5654 }, { "epoch": 0.31519982163758986, "grad_norm": 0.5131604671478271, "learning_rate": 7.894303049549687e-05, "loss": 1.4731, "step": 5655 }, { "epoch": 0.31525555989075305, "grad_norm": 0.5364745855331421, "learning_rate": 7.893576928624178e-05, "loss": 1.819, "step": 5656 }, { "epoch": 0.3153112981439162, "grad_norm": 0.563586413860321, "learning_rate": 7.89285071593128e-05, "loss": 1.6023, "step": 5657 }, { "epoch": 0.3153670363970793, "grad_norm": 0.5618447065353394, "learning_rate": 7.892124411494022e-05, "loss": 1.5903, "step": 5658 }, { "epoch": 0.3154227746502425, "grad_norm": 0.5073031783103943, "learning_rate": 7.891398015335442e-05, "loss": 1.646, "step": 5659 }, { "epoch": 0.3154785129034056, "grad_norm": 0.5081502795219421, "learning_rate": 7.890671527478574e-05, "loss": 1.3751, "step": 5660 }, { "epoch": 0.31553425115656875, "grad_norm": 0.524069607257843, "learning_rate": 7.88994494794646e-05, "loss": 1.6491, "step": 5661 }, { "epoch": 0.3155899894097319, "grad_norm": 0.5874504446983337, "learning_rate": 7.88921827676214e-05, "loss": 1.5753, "step": 5662 }, { "epoch": 0.31564572766289506, "grad_norm": 0.5709517002105713, "learning_rate": 7.888491513948661e-05, "loss": 1.8023, "step": 5663 }, { "epoch": 0.3157014659160582, "grad_norm": 0.5294995903968811, "learning_rate": 7.887764659529073e-05, "loss": 1.6754, "step": 5664 }, { "epoch": 0.3157572041692213, "grad_norm": 0.5117160677909851, "learning_rate": 7.887037713526428e-05, "loss": 1.6262, "step": 5665 }, { "epoch": 0.3158129424223845, "grad_norm": 0.49994394183158875, "learning_rate": 7.88631067596378e-05, "loss": 1.5649, "step": 5666 }, { "epoch": 0.31586868067554763, "grad_norm": 0.486306756734848, "learning_rate": 7.885583546864184e-05, "loss": 1.4968, "step": 5667 }, { "epoch": 0.31592441892871076, "grad_norm": 0.5242376327514648, "learning_rate": 7.884856326250703e-05, "loss": 1.5559, "step": 5668 }, { "epoch": 0.31598015718187394, "grad_norm": 0.5692494511604309, "learning_rate": 7.884129014146397e-05, "loss": 1.8384, "step": 5669 }, { "epoch": 0.3160358954350371, "grad_norm": 0.5784143209457397, "learning_rate": 7.883401610574336e-05, "loss": 1.9506, "step": 5670 }, { "epoch": 0.3160916336882002, "grad_norm": 0.5659399032592773, "learning_rate": 7.882674115557587e-05, "loss": 1.6864, "step": 5671 }, { "epoch": 0.31614737194136333, "grad_norm": 0.6336827278137207, "learning_rate": 7.881946529119223e-05, "loss": 1.9635, "step": 5672 }, { "epoch": 0.3162031101945265, "grad_norm": 0.5327314734458923, "learning_rate": 7.881218851282317e-05, "loss": 1.5806, "step": 5673 }, { "epoch": 0.31625884844768964, "grad_norm": 0.5700320601463318, "learning_rate": 7.880491082069949e-05, "loss": 1.7419, "step": 5674 }, { "epoch": 0.3163145867008528, "grad_norm": 0.569348156452179, "learning_rate": 7.879763221505197e-05, "loss": 1.7392, "step": 5675 }, { "epoch": 0.31637032495401596, "grad_norm": 0.5255264639854431, "learning_rate": 7.879035269611146e-05, "loss": 1.6862, "step": 5676 }, { "epoch": 0.3164260632071791, "grad_norm": 0.5734140872955322, "learning_rate": 7.878307226410882e-05, "loss": 1.8253, "step": 5677 }, { "epoch": 0.3164818014603422, "grad_norm": 0.5915566086769104, "learning_rate": 7.877579091927496e-05, "loss": 1.7754, "step": 5678 }, { "epoch": 0.3165375397135054, "grad_norm": 0.5272923707962036, "learning_rate": 7.876850866184077e-05, "loss": 1.7315, "step": 5679 }, { "epoch": 0.31659327796666853, "grad_norm": 0.5072640180587769, "learning_rate": 7.876122549203723e-05, "loss": 1.5367, "step": 5680 }, { "epoch": 0.31664901621983166, "grad_norm": 0.5453153848648071, "learning_rate": 7.87539414100953e-05, "loss": 1.7551, "step": 5681 }, { "epoch": 0.31670475447299484, "grad_norm": 0.5492895245552063, "learning_rate": 7.874665641624599e-05, "loss": 1.7739, "step": 5682 }, { "epoch": 0.31676049272615797, "grad_norm": 0.5405164957046509, "learning_rate": 7.873937051072035e-05, "loss": 1.747, "step": 5683 }, { "epoch": 0.3168162309793211, "grad_norm": 0.5549308061599731, "learning_rate": 7.873208369374943e-05, "loss": 1.8224, "step": 5684 }, { "epoch": 0.31687196923248423, "grad_norm": 0.5366522669792175, "learning_rate": 7.872479596556435e-05, "loss": 1.6589, "step": 5685 }, { "epoch": 0.3169277074856474, "grad_norm": 0.527472734451294, "learning_rate": 7.871750732639621e-05, "loss": 1.6122, "step": 5686 }, { "epoch": 0.31698344573881054, "grad_norm": 0.5421255826950073, "learning_rate": 7.871021777647618e-05, "loss": 1.766, "step": 5687 }, { "epoch": 0.31703918399197367, "grad_norm": 0.5596272945404053, "learning_rate": 7.870292731603544e-05, "loss": 1.765, "step": 5688 }, { "epoch": 0.31709492224513686, "grad_norm": 0.5629613995552063, "learning_rate": 7.869563594530517e-05, "loss": 1.6374, "step": 5689 }, { "epoch": 0.3171506604983, "grad_norm": 0.5471567511558533, "learning_rate": 7.868834366451665e-05, "loss": 1.8048, "step": 5690 }, { "epoch": 0.3172063987514631, "grad_norm": 0.6505834460258484, "learning_rate": 7.868105047390113e-05, "loss": 2.1298, "step": 5691 }, { "epoch": 0.3172621370046263, "grad_norm": 0.5665611624717712, "learning_rate": 7.867375637368993e-05, "loss": 1.6, "step": 5692 }, { "epoch": 0.31731787525778943, "grad_norm": 0.5327755212783813, "learning_rate": 7.866646136411433e-05, "loss": 1.7876, "step": 5693 }, { "epoch": 0.31737361351095256, "grad_norm": 0.5993742942810059, "learning_rate": 7.865916544540573e-05, "loss": 1.7237, "step": 5694 }, { "epoch": 0.3174293517641157, "grad_norm": 0.5317041873931885, "learning_rate": 7.865186861779548e-05, "loss": 1.5221, "step": 5695 }, { "epoch": 0.31748509001727887, "grad_norm": 0.5825653076171875, "learning_rate": 7.864457088151502e-05, "loss": 1.7575, "step": 5696 }, { "epoch": 0.317540828270442, "grad_norm": 0.5435444116592407, "learning_rate": 7.863727223679578e-05, "loss": 1.789, "step": 5697 }, { "epoch": 0.31759656652360513, "grad_norm": 0.5559577941894531, "learning_rate": 7.862997268386924e-05, "loss": 1.802, "step": 5698 }, { "epoch": 0.3176523047767683, "grad_norm": 0.6636247634887695, "learning_rate": 7.862267222296687e-05, "loss": 2.0765, "step": 5699 }, { "epoch": 0.31770804302993144, "grad_norm": 0.49671420454978943, "learning_rate": 7.861537085432025e-05, "loss": 1.5644, "step": 5700 }, { "epoch": 0.31776378128309457, "grad_norm": 0.5270445942878723, "learning_rate": 7.860806857816088e-05, "loss": 1.7291, "step": 5701 }, { "epoch": 0.31781951953625776, "grad_norm": 0.6097070574760437, "learning_rate": 7.860076539472037e-05, "loss": 1.9244, "step": 5702 }, { "epoch": 0.3178752577894209, "grad_norm": 0.537875235080719, "learning_rate": 7.859346130423035e-05, "loss": 1.7579, "step": 5703 }, { "epoch": 0.317930996042584, "grad_norm": 0.5384728908538818, "learning_rate": 7.858615630692244e-05, "loss": 1.5755, "step": 5704 }, { "epoch": 0.3179867342957472, "grad_norm": 0.5751199722290039, "learning_rate": 7.857885040302833e-05, "loss": 1.6979, "step": 5705 }, { "epoch": 0.3180424725489103, "grad_norm": 0.5749076008796692, "learning_rate": 7.857154359277972e-05, "loss": 1.6744, "step": 5706 }, { "epoch": 0.31809821080207346, "grad_norm": 0.5693714022636414, "learning_rate": 7.85642358764083e-05, "loss": 1.8986, "step": 5707 }, { "epoch": 0.3181539490552366, "grad_norm": 0.504147469997406, "learning_rate": 7.855692725414587e-05, "loss": 1.5641, "step": 5708 }, { "epoch": 0.31820968730839977, "grad_norm": 0.5494616031646729, "learning_rate": 7.854961772622423e-05, "loss": 1.6743, "step": 5709 }, { "epoch": 0.3182654255615629, "grad_norm": 0.49635690450668335, "learning_rate": 7.854230729287515e-05, "loss": 1.5466, "step": 5710 }, { "epoch": 0.318321163814726, "grad_norm": 0.569781482219696, "learning_rate": 7.853499595433049e-05, "loss": 1.7647, "step": 5711 }, { "epoch": 0.3183769020678892, "grad_norm": 0.540679931640625, "learning_rate": 7.852768371082215e-05, "loss": 1.6237, "step": 5712 }, { "epoch": 0.31843264032105234, "grad_norm": 0.5818458795547485, "learning_rate": 7.852037056258199e-05, "loss": 1.9955, "step": 5713 }, { "epoch": 0.31848837857421547, "grad_norm": 0.5366159081459045, "learning_rate": 7.851305650984197e-05, "loss": 1.5985, "step": 5714 }, { "epoch": 0.31854411682737865, "grad_norm": 0.7078673839569092, "learning_rate": 7.850574155283404e-05, "loss": 1.6371, "step": 5715 }, { "epoch": 0.3185998550805418, "grad_norm": 0.6395692825317383, "learning_rate": 7.849842569179017e-05, "loss": 2.0647, "step": 5716 }, { "epoch": 0.3186555933337049, "grad_norm": 0.5583460927009583, "learning_rate": 7.849110892694242e-05, "loss": 1.8005, "step": 5717 }, { "epoch": 0.31871133158686804, "grad_norm": 0.6016951203346252, "learning_rate": 7.848379125852282e-05, "loss": 1.9861, "step": 5718 }, { "epoch": 0.3187670698400312, "grad_norm": 0.5291598439216614, "learning_rate": 7.847647268676341e-05, "loss": 1.6806, "step": 5719 }, { "epoch": 0.31882280809319435, "grad_norm": 0.5864149332046509, "learning_rate": 7.846915321189632e-05, "loss": 1.7323, "step": 5720 }, { "epoch": 0.3188785463463575, "grad_norm": 0.5477664470672607, "learning_rate": 7.846183283415367e-05, "loss": 1.7307, "step": 5721 }, { "epoch": 0.31893428459952067, "grad_norm": 0.5449158549308777, "learning_rate": 7.845451155376764e-05, "loss": 1.679, "step": 5722 }, { "epoch": 0.3189900228526838, "grad_norm": 0.5383809804916382, "learning_rate": 7.844718937097039e-05, "loss": 1.6991, "step": 5723 }, { "epoch": 0.3190457611058469, "grad_norm": 0.4735757112503052, "learning_rate": 7.843986628599416e-05, "loss": 1.4701, "step": 5724 }, { "epoch": 0.3191014993590101, "grad_norm": 0.5248317122459412, "learning_rate": 7.843254229907119e-05, "loss": 1.7293, "step": 5725 }, { "epoch": 0.31915723761217324, "grad_norm": 0.5262721180915833, "learning_rate": 7.842521741043375e-05, "loss": 1.6067, "step": 5726 }, { "epoch": 0.31921297586533637, "grad_norm": 0.5584807991981506, "learning_rate": 7.841789162031415e-05, "loss": 1.8573, "step": 5727 }, { "epoch": 0.31926871411849955, "grad_norm": 0.5617311596870422, "learning_rate": 7.84105649289447e-05, "loss": 1.7482, "step": 5728 }, { "epoch": 0.3193244523716627, "grad_norm": 0.5431827902793884, "learning_rate": 7.840323733655778e-05, "loss": 1.8564, "step": 5729 }, { "epoch": 0.3193801906248258, "grad_norm": 0.5269571542739868, "learning_rate": 7.839590884338579e-05, "loss": 1.4677, "step": 5730 }, { "epoch": 0.31943592887798894, "grad_norm": 0.5726506114006042, "learning_rate": 7.838857944966113e-05, "loss": 1.7656, "step": 5731 }, { "epoch": 0.3194916671311521, "grad_norm": 0.5350455641746521, "learning_rate": 7.838124915561623e-05, "loss": 1.525, "step": 5732 }, { "epoch": 0.31954740538431525, "grad_norm": 0.6093659996986389, "learning_rate": 7.837391796148359e-05, "loss": 1.9737, "step": 5733 }, { "epoch": 0.3196031436374784, "grad_norm": 0.5513406991958618, "learning_rate": 7.83665858674957e-05, "loss": 1.6783, "step": 5734 }, { "epoch": 0.31965888189064157, "grad_norm": 0.5465078949928284, "learning_rate": 7.835925287388511e-05, "loss": 1.5786, "step": 5735 }, { "epoch": 0.3197146201438047, "grad_norm": 0.5756266713142395, "learning_rate": 7.835191898088435e-05, "loss": 1.7969, "step": 5736 }, { "epoch": 0.3197703583969678, "grad_norm": 0.5218703150749207, "learning_rate": 7.8344584188726e-05, "loss": 1.619, "step": 5737 }, { "epoch": 0.319826096650131, "grad_norm": 0.5465853810310364, "learning_rate": 7.833724849764273e-05, "loss": 1.6193, "step": 5738 }, { "epoch": 0.31988183490329414, "grad_norm": 0.596364438533783, "learning_rate": 7.832991190786716e-05, "loss": 1.7853, "step": 5739 }, { "epoch": 0.31993757315645727, "grad_norm": 0.544185221195221, "learning_rate": 7.832257441963195e-05, "loss": 1.8835, "step": 5740 }, { "epoch": 0.3199933114096204, "grad_norm": 0.6070075631141663, "learning_rate": 7.83152360331698e-05, "loss": 2.1082, "step": 5741 }, { "epoch": 0.3200490496627836, "grad_norm": 0.5382431745529175, "learning_rate": 7.830789674871346e-05, "loss": 1.7184, "step": 5742 }, { "epoch": 0.3201047879159467, "grad_norm": 0.5074361562728882, "learning_rate": 7.830055656649568e-05, "loss": 1.5133, "step": 5743 }, { "epoch": 0.32016052616910984, "grad_norm": 0.5396546125411987, "learning_rate": 7.829321548674926e-05, "loss": 1.6203, "step": 5744 }, { "epoch": 0.320216264422273, "grad_norm": 0.5758295059204102, "learning_rate": 7.8285873509707e-05, "loss": 1.8658, "step": 5745 }, { "epoch": 0.32027200267543615, "grad_norm": 0.506420910358429, "learning_rate": 7.827853063560175e-05, "loss": 1.509, "step": 5746 }, { "epoch": 0.3203277409285993, "grad_norm": 0.5390977263450623, "learning_rate": 7.82711868646664e-05, "loss": 1.8333, "step": 5747 }, { "epoch": 0.32038347918176246, "grad_norm": 0.5680609345436096, "learning_rate": 7.82638421971338e-05, "loss": 1.6984, "step": 5748 }, { "epoch": 0.3204392174349256, "grad_norm": 0.5344312191009521, "learning_rate": 7.825649663323693e-05, "loss": 1.6667, "step": 5749 }, { "epoch": 0.3204949556880887, "grad_norm": 0.610658586025238, "learning_rate": 7.824915017320874e-05, "loss": 1.7763, "step": 5750 }, { "epoch": 0.3205506939412519, "grad_norm": 0.5463300943374634, "learning_rate": 7.824180281728222e-05, "loss": 1.5632, "step": 5751 }, { "epoch": 0.32060643219441504, "grad_norm": 0.5856190919876099, "learning_rate": 7.823445456569036e-05, "loss": 1.8129, "step": 5752 }, { "epoch": 0.32066217044757817, "grad_norm": 0.7068459987640381, "learning_rate": 7.822710541866622e-05, "loss": 1.8126, "step": 5753 }, { "epoch": 0.3207179087007413, "grad_norm": 0.6159639954566956, "learning_rate": 7.821975537644286e-05, "loss": 1.7802, "step": 5754 }, { "epoch": 0.3207736469539045, "grad_norm": 0.583821177482605, "learning_rate": 7.821240443925341e-05, "loss": 1.9406, "step": 5755 }, { "epoch": 0.3208293852070676, "grad_norm": 0.49633607268333435, "learning_rate": 7.820505260733098e-05, "loss": 1.4748, "step": 5756 }, { "epoch": 0.32088512346023074, "grad_norm": 0.5159478187561035, "learning_rate": 7.819769988090873e-05, "loss": 1.716, "step": 5757 }, { "epoch": 0.3209408617133939, "grad_norm": 0.5665544867515564, "learning_rate": 7.819034626021983e-05, "loss": 1.8005, "step": 5758 }, { "epoch": 0.32099659996655705, "grad_norm": 0.567043125629425, "learning_rate": 7.818299174549752e-05, "loss": 1.675, "step": 5759 }, { "epoch": 0.3210523382197202, "grad_norm": 0.5980729460716248, "learning_rate": 7.817563633697503e-05, "loss": 1.9635, "step": 5760 }, { "epoch": 0.32110807647288336, "grad_norm": 0.5714271068572998, "learning_rate": 7.816828003488563e-05, "loss": 1.7265, "step": 5761 }, { "epoch": 0.3211638147260465, "grad_norm": 0.5386238694190979, "learning_rate": 7.816092283946261e-05, "loss": 1.6653, "step": 5762 }, { "epoch": 0.3212195529792096, "grad_norm": 0.5798346400260925, "learning_rate": 7.815356475093931e-05, "loss": 1.6578, "step": 5763 }, { "epoch": 0.32127529123237275, "grad_norm": 0.5155278444290161, "learning_rate": 7.81462057695491e-05, "loss": 1.787, "step": 5764 }, { "epoch": 0.32133102948553594, "grad_norm": 0.49146315455436707, "learning_rate": 7.813884589552534e-05, "loss": 1.5927, "step": 5765 }, { "epoch": 0.32138676773869906, "grad_norm": 0.553433895111084, "learning_rate": 7.813148512910144e-05, "loss": 1.7973, "step": 5766 }, { "epoch": 0.3214425059918622, "grad_norm": 0.5665645003318787, "learning_rate": 7.812412347051083e-05, "loss": 1.7949, "step": 5767 }, { "epoch": 0.3214982442450254, "grad_norm": 0.5180385708808899, "learning_rate": 7.811676091998704e-05, "loss": 1.7011, "step": 5768 }, { "epoch": 0.3215539824981885, "grad_norm": 0.581295371055603, "learning_rate": 7.81093974777635e-05, "loss": 1.7513, "step": 5769 }, { "epoch": 0.32160972075135164, "grad_norm": 0.5677274465560913, "learning_rate": 7.810203314407377e-05, "loss": 1.9528, "step": 5770 }, { "epoch": 0.3216654590045148, "grad_norm": 0.5377728939056396, "learning_rate": 7.80946679191514e-05, "loss": 1.6544, "step": 5771 }, { "epoch": 0.32172119725767795, "grad_norm": 0.533319354057312, "learning_rate": 7.808730180322996e-05, "loss": 1.6561, "step": 5772 }, { "epoch": 0.3217769355108411, "grad_norm": 0.5324406623840332, "learning_rate": 7.807993479654307e-05, "loss": 1.6776, "step": 5773 }, { "epoch": 0.32183267376400426, "grad_norm": 0.5995755195617676, "learning_rate": 7.807256689932435e-05, "loss": 1.6976, "step": 5774 }, { "epoch": 0.3218884120171674, "grad_norm": 0.5474086999893188, "learning_rate": 7.806519811180751e-05, "loss": 1.4983, "step": 5775 }, { "epoch": 0.3219441502703305, "grad_norm": 0.5364895462989807, "learning_rate": 7.805782843422618e-05, "loss": 1.7632, "step": 5776 }, { "epoch": 0.32199988852349365, "grad_norm": 0.5104418396949768, "learning_rate": 7.805045786681415e-05, "loss": 1.6873, "step": 5777 }, { "epoch": 0.32205562677665683, "grad_norm": 0.5162766575813293, "learning_rate": 7.804308640980513e-05, "loss": 1.6692, "step": 5778 }, { "epoch": 0.32211136502981996, "grad_norm": 0.5526577234268188, "learning_rate": 7.803571406343293e-05, "loss": 1.631, "step": 5779 }, { "epoch": 0.3221671032829831, "grad_norm": 0.4954930245876312, "learning_rate": 7.802834082793131e-05, "loss": 1.4774, "step": 5780 }, { "epoch": 0.3222228415361463, "grad_norm": 0.5704354643821716, "learning_rate": 7.802096670353416e-05, "loss": 1.9247, "step": 5781 }, { "epoch": 0.3222785797893094, "grad_norm": 0.5746217966079712, "learning_rate": 7.80135916904753e-05, "loss": 1.9075, "step": 5782 }, { "epoch": 0.32233431804247253, "grad_norm": 0.5538354516029358, "learning_rate": 7.800621578898867e-05, "loss": 1.6338, "step": 5783 }, { "epoch": 0.3223900562956357, "grad_norm": 0.5441854596138, "learning_rate": 7.799883899930815e-05, "loss": 1.6214, "step": 5784 }, { "epoch": 0.32244579454879885, "grad_norm": 0.5677271485328674, "learning_rate": 7.79914613216677e-05, "loss": 1.7258, "step": 5785 }, { "epoch": 0.322501532801962, "grad_norm": 0.5610553026199341, "learning_rate": 7.798408275630129e-05, "loss": 1.6471, "step": 5786 }, { "epoch": 0.3225572710551251, "grad_norm": 0.5126567482948303, "learning_rate": 7.797670330344294e-05, "loss": 1.7154, "step": 5787 }, { "epoch": 0.3226130093082883, "grad_norm": 0.565370500087738, "learning_rate": 7.796932296332667e-05, "loss": 1.7534, "step": 5788 }, { "epoch": 0.3226687475614514, "grad_norm": 0.5113086104393005, "learning_rate": 7.796194173618654e-05, "loss": 1.5581, "step": 5789 }, { "epoch": 0.32272448581461455, "grad_norm": 0.543984591960907, "learning_rate": 7.795455962225669e-05, "loss": 1.7255, "step": 5790 }, { "epoch": 0.32278022406777773, "grad_norm": 0.5158193707466125, "learning_rate": 7.794717662177115e-05, "loss": 1.6029, "step": 5791 }, { "epoch": 0.32283596232094086, "grad_norm": 0.5405291318893433, "learning_rate": 7.793979273496414e-05, "loss": 1.6035, "step": 5792 }, { "epoch": 0.322891700574104, "grad_norm": 0.617701530456543, "learning_rate": 7.793240796206979e-05, "loss": 1.8577, "step": 5793 }, { "epoch": 0.3229474388272672, "grad_norm": 0.4910410940647125, "learning_rate": 7.79250223033223e-05, "loss": 1.4227, "step": 5794 }, { "epoch": 0.3230031770804303, "grad_norm": 0.5436237454414368, "learning_rate": 7.791763575895594e-05, "loss": 1.5865, "step": 5795 }, { "epoch": 0.32305891533359343, "grad_norm": 0.5777418613433838, "learning_rate": 7.791024832920496e-05, "loss": 1.8056, "step": 5796 }, { "epoch": 0.3231146535867566, "grad_norm": 0.5960043668746948, "learning_rate": 7.79028600143036e-05, "loss": 1.8124, "step": 5797 }, { "epoch": 0.32317039183991975, "grad_norm": 0.5568564534187317, "learning_rate": 7.789547081448622e-05, "loss": 1.614, "step": 5798 }, { "epoch": 0.3232261300930829, "grad_norm": 0.5896525979042053, "learning_rate": 7.788808072998715e-05, "loss": 1.784, "step": 5799 }, { "epoch": 0.323281868346246, "grad_norm": 0.5450705885887146, "learning_rate": 7.788068976104074e-05, "loss": 1.462, "step": 5800 }, { "epoch": 0.3233376065994092, "grad_norm": 0.4870886206626892, "learning_rate": 7.787329790788142e-05, "loss": 1.5523, "step": 5801 }, { "epoch": 0.3233933448525723, "grad_norm": 0.5481093525886536, "learning_rate": 7.78659051707436e-05, "loss": 1.6292, "step": 5802 }, { "epoch": 0.32344908310573545, "grad_norm": 0.5144929885864258, "learning_rate": 7.785851154986174e-05, "loss": 1.4811, "step": 5803 }, { "epoch": 0.32350482135889863, "grad_norm": 0.5884720683097839, "learning_rate": 7.785111704547032e-05, "loss": 1.8426, "step": 5804 }, { "epoch": 0.32356055961206176, "grad_norm": 0.5478202104568481, "learning_rate": 7.784372165780386e-05, "loss": 1.4918, "step": 5805 }, { "epoch": 0.3236162978652249, "grad_norm": 0.5706868767738342, "learning_rate": 7.783632538709688e-05, "loss": 1.6687, "step": 5806 }, { "epoch": 0.3236720361183881, "grad_norm": 0.569288432598114, "learning_rate": 7.782892823358394e-05, "loss": 1.7208, "step": 5807 }, { "epoch": 0.3237277743715512, "grad_norm": 0.6056145429611206, "learning_rate": 7.782153019749967e-05, "loss": 1.9566, "step": 5808 }, { "epoch": 0.32378351262471433, "grad_norm": 0.5828245878219604, "learning_rate": 7.781413127907868e-05, "loss": 1.7169, "step": 5809 }, { "epoch": 0.32383925087787746, "grad_norm": 0.5503557920455933, "learning_rate": 7.780673147855559e-05, "loss": 1.7084, "step": 5810 }, { "epoch": 0.32389498913104064, "grad_norm": 0.5861828327178955, "learning_rate": 7.779933079616512e-05, "loss": 1.6815, "step": 5811 }, { "epoch": 0.3239507273842038, "grad_norm": 0.5410308837890625, "learning_rate": 7.779192923214196e-05, "loss": 1.6899, "step": 5812 }, { "epoch": 0.3240064656373669, "grad_norm": 0.6349414587020874, "learning_rate": 7.778452678672084e-05, "loss": 2.0061, "step": 5813 }, { "epoch": 0.3240622038905301, "grad_norm": 0.6143296360969543, "learning_rate": 7.777712346013651e-05, "loss": 1.6939, "step": 5814 }, { "epoch": 0.3241179421436932, "grad_norm": 0.5646039247512817, "learning_rate": 7.776971925262379e-05, "loss": 1.4296, "step": 5815 }, { "epoch": 0.32417368039685635, "grad_norm": 0.570025622844696, "learning_rate": 7.776231416441748e-05, "loss": 1.8693, "step": 5816 }, { "epoch": 0.32422941865001953, "grad_norm": 0.4873752295970917, "learning_rate": 7.775490819575242e-05, "loss": 1.5215, "step": 5817 }, { "epoch": 0.32428515690318266, "grad_norm": 0.5546776652336121, "learning_rate": 7.774750134686352e-05, "loss": 1.6002, "step": 5818 }, { "epoch": 0.3243408951563458, "grad_norm": 0.5605872273445129, "learning_rate": 7.774009361798565e-05, "loss": 1.42, "step": 5819 }, { "epoch": 0.32439663340950897, "grad_norm": 0.5118110179901123, "learning_rate": 7.773268500935372e-05, "loss": 1.6076, "step": 5820 }, { "epoch": 0.3244523716626721, "grad_norm": 0.5516108274459839, "learning_rate": 7.772527552120273e-05, "loss": 1.6444, "step": 5821 }, { "epoch": 0.32450810991583523, "grad_norm": 0.5176465511322021, "learning_rate": 7.771786515376765e-05, "loss": 1.3809, "step": 5822 }, { "epoch": 0.32456384816899836, "grad_norm": 0.5901971459388733, "learning_rate": 7.77104539072835e-05, "loss": 1.8976, "step": 5823 }, { "epoch": 0.32461958642216154, "grad_norm": 0.5981687903404236, "learning_rate": 7.770304178198531e-05, "loss": 1.7352, "step": 5824 }, { "epoch": 0.3246753246753247, "grad_norm": 0.48600277304649353, "learning_rate": 7.769562877810816e-05, "loss": 1.5827, "step": 5825 }, { "epoch": 0.3247310629284878, "grad_norm": 0.47773730754852295, "learning_rate": 7.768821489588713e-05, "loss": 1.44, "step": 5826 }, { "epoch": 0.324786801181651, "grad_norm": 0.5615780353546143, "learning_rate": 7.768080013555737e-05, "loss": 1.6719, "step": 5827 }, { "epoch": 0.3248425394348141, "grad_norm": 0.5451145172119141, "learning_rate": 7.767338449735401e-05, "loss": 1.355, "step": 5828 }, { "epoch": 0.32489827768797724, "grad_norm": 0.5609704852104187, "learning_rate": 7.766596798151224e-05, "loss": 1.6764, "step": 5829 }, { "epoch": 0.32495401594114043, "grad_norm": 0.5926015973091125, "learning_rate": 7.765855058826727e-05, "loss": 1.8243, "step": 5830 }, { "epoch": 0.32500975419430356, "grad_norm": 0.5234283804893494, "learning_rate": 7.765113231785435e-05, "loss": 1.7313, "step": 5831 }, { "epoch": 0.3250654924474667, "grad_norm": 0.5433173179626465, "learning_rate": 7.764371317050873e-05, "loss": 1.7546, "step": 5832 }, { "epoch": 0.3251212307006298, "grad_norm": 0.6074669361114502, "learning_rate": 7.763629314646568e-05, "loss": 1.7879, "step": 5833 }, { "epoch": 0.325176968953793, "grad_norm": 0.6136168241500854, "learning_rate": 7.762887224596055e-05, "loss": 1.8066, "step": 5834 }, { "epoch": 0.32523270720695613, "grad_norm": 0.5498754978179932, "learning_rate": 7.76214504692287e-05, "loss": 1.6913, "step": 5835 }, { "epoch": 0.32528844546011926, "grad_norm": 0.5876418352127075, "learning_rate": 7.761402781650547e-05, "loss": 1.7581, "step": 5836 }, { "epoch": 0.32534418371328244, "grad_norm": 0.5235028862953186, "learning_rate": 7.760660428802628e-05, "loss": 1.5955, "step": 5837 }, { "epoch": 0.32539992196644557, "grad_norm": 0.54973304271698, "learning_rate": 7.759917988402657e-05, "loss": 1.6833, "step": 5838 }, { "epoch": 0.3254556602196087, "grad_norm": 0.6082160472869873, "learning_rate": 7.759175460474177e-05, "loss": 1.8303, "step": 5839 }, { "epoch": 0.3255113984727719, "grad_norm": 0.5204039812088013, "learning_rate": 7.758432845040737e-05, "loss": 1.7216, "step": 5840 }, { "epoch": 0.325567136725935, "grad_norm": 0.5268458724021912, "learning_rate": 7.757690142125893e-05, "loss": 1.6099, "step": 5841 }, { "epoch": 0.32562287497909814, "grad_norm": 0.5118129253387451, "learning_rate": 7.756947351753196e-05, "loss": 1.5388, "step": 5842 }, { "epoch": 0.3256786132322613, "grad_norm": 0.5349292159080505, "learning_rate": 7.756204473946203e-05, "loss": 1.6813, "step": 5843 }, { "epoch": 0.32573435148542446, "grad_norm": 0.5555446743965149, "learning_rate": 7.755461508728472e-05, "loss": 1.5549, "step": 5844 }, { "epoch": 0.3257900897385876, "grad_norm": 0.5379804372787476, "learning_rate": 7.75471845612357e-05, "loss": 1.5658, "step": 5845 }, { "epoch": 0.3258458279917507, "grad_norm": 0.618511974811554, "learning_rate": 7.753975316155057e-05, "loss": 1.8505, "step": 5846 }, { "epoch": 0.3259015662449139, "grad_norm": 0.6143367290496826, "learning_rate": 7.753232088846505e-05, "loss": 1.953, "step": 5847 }, { "epoch": 0.325957304498077, "grad_norm": 0.543201208114624, "learning_rate": 7.752488774221485e-05, "loss": 1.9068, "step": 5848 }, { "epoch": 0.32601304275124016, "grad_norm": 0.5580254197120667, "learning_rate": 7.751745372303567e-05, "loss": 1.6766, "step": 5849 }, { "epoch": 0.32606878100440334, "grad_norm": 0.5846728086471558, "learning_rate": 7.751001883116331e-05, "loss": 1.874, "step": 5850 }, { "epoch": 0.32612451925756647, "grad_norm": 0.5597751140594482, "learning_rate": 7.750258306683353e-05, "loss": 1.7491, "step": 5851 }, { "epoch": 0.3261802575107296, "grad_norm": 0.49921393394470215, "learning_rate": 7.749514643028218e-05, "loss": 1.3701, "step": 5852 }, { "epoch": 0.3262359957638928, "grad_norm": 0.5255808234214783, "learning_rate": 7.748770892174509e-05, "loss": 1.4772, "step": 5853 }, { "epoch": 0.3262917340170559, "grad_norm": 0.5470353960990906, "learning_rate": 7.748027054145814e-05, "loss": 1.7885, "step": 5854 }, { "epoch": 0.32634747227021904, "grad_norm": 0.575181782245636, "learning_rate": 7.747283128965723e-05, "loss": 1.8875, "step": 5855 }, { "epoch": 0.32640321052338217, "grad_norm": 0.6346047520637512, "learning_rate": 7.74653911665783e-05, "loss": 2.0948, "step": 5856 }, { "epoch": 0.32645894877654535, "grad_norm": 0.5814865231513977, "learning_rate": 7.745795017245729e-05, "loss": 1.572, "step": 5857 }, { "epoch": 0.3265146870297085, "grad_norm": 0.5990648865699768, "learning_rate": 7.745050830753018e-05, "loss": 1.7464, "step": 5858 }, { "epoch": 0.3265704252828716, "grad_norm": 0.5689359903335571, "learning_rate": 7.744306557203299e-05, "loss": 1.9168, "step": 5859 }, { "epoch": 0.3266261635360348, "grad_norm": 0.5398204326629639, "learning_rate": 7.743562196620177e-05, "loss": 1.6884, "step": 5860 }, { "epoch": 0.3266819017891979, "grad_norm": 0.5738016366958618, "learning_rate": 7.74281774902726e-05, "loss": 1.815, "step": 5861 }, { "epoch": 0.32673764004236105, "grad_norm": 0.5424049496650696, "learning_rate": 7.742073214448153e-05, "loss": 1.832, "step": 5862 }, { "epoch": 0.32679337829552424, "grad_norm": 0.5409512519836426, "learning_rate": 7.741328592906474e-05, "loss": 1.7179, "step": 5863 }, { "epoch": 0.32684911654868737, "grad_norm": 0.5621674656867981, "learning_rate": 7.740583884425833e-05, "loss": 1.8319, "step": 5864 }, { "epoch": 0.3269048548018505, "grad_norm": 0.5400972962379456, "learning_rate": 7.73983908902985e-05, "loss": 1.6868, "step": 5865 }, { "epoch": 0.3269605930550137, "grad_norm": 0.5927982926368713, "learning_rate": 7.739094206742146e-05, "loss": 1.6426, "step": 5866 }, { "epoch": 0.3270163313081768, "grad_norm": 0.510775089263916, "learning_rate": 7.738349237586343e-05, "loss": 1.6661, "step": 5867 }, { "epoch": 0.32707206956133994, "grad_norm": 0.5710152387619019, "learning_rate": 7.737604181586068e-05, "loss": 1.7263, "step": 5868 }, { "epoch": 0.32712780781450307, "grad_norm": 0.5645250082015991, "learning_rate": 7.736859038764952e-05, "loss": 1.7197, "step": 5869 }, { "epoch": 0.32718354606766625, "grad_norm": 0.5439823865890503, "learning_rate": 7.73611380914662e-05, "loss": 1.7229, "step": 5870 }, { "epoch": 0.3272392843208294, "grad_norm": 0.5163010358810425, "learning_rate": 7.735368492754715e-05, "loss": 1.5273, "step": 5871 }, { "epoch": 0.3272950225739925, "grad_norm": 0.5735363960266113, "learning_rate": 7.734623089612867e-05, "loss": 1.7926, "step": 5872 }, { "epoch": 0.3273507608271557, "grad_norm": 0.5508522391319275, "learning_rate": 7.73387759974472e-05, "loss": 1.492, "step": 5873 }, { "epoch": 0.3274064990803188, "grad_norm": 0.6105926632881165, "learning_rate": 7.733132023173915e-05, "loss": 1.6155, "step": 5874 }, { "epoch": 0.32746223733348195, "grad_norm": 0.5956704020500183, "learning_rate": 7.732386359924097e-05, "loss": 1.7757, "step": 5875 }, { "epoch": 0.32751797558664514, "grad_norm": 0.6001446843147278, "learning_rate": 7.731640610018914e-05, "loss": 1.6669, "step": 5876 }, { "epoch": 0.32757371383980827, "grad_norm": 0.6132667660713196, "learning_rate": 7.730894773482019e-05, "loss": 1.944, "step": 5877 }, { "epoch": 0.3276294520929714, "grad_norm": 0.5684986710548401, "learning_rate": 7.730148850337062e-05, "loss": 1.7491, "step": 5878 }, { "epoch": 0.3276851903461345, "grad_norm": 0.537605881690979, "learning_rate": 7.729402840607702e-05, "loss": 1.7473, "step": 5879 }, { "epoch": 0.3277409285992977, "grad_norm": 0.5186078548431396, "learning_rate": 7.728656744317598e-05, "loss": 1.7703, "step": 5880 }, { "epoch": 0.32779666685246084, "grad_norm": 0.5188151001930237, "learning_rate": 7.727910561490411e-05, "loss": 1.6632, "step": 5881 }, { "epoch": 0.32785240510562397, "grad_norm": 0.5799871683120728, "learning_rate": 7.727164292149806e-05, "loss": 1.7289, "step": 5882 }, { "epoch": 0.32790814335878715, "grad_norm": 0.5974400639533997, "learning_rate": 7.72641793631945e-05, "loss": 1.9396, "step": 5883 }, { "epoch": 0.3279638816119503, "grad_norm": 0.5383574366569519, "learning_rate": 7.725671494023014e-05, "loss": 1.6176, "step": 5884 }, { "epoch": 0.3280196198651134, "grad_norm": 0.5623538494110107, "learning_rate": 7.724924965284169e-05, "loss": 1.7997, "step": 5885 }, { "epoch": 0.3280753581182766, "grad_norm": 0.5270793437957764, "learning_rate": 7.72417835012659e-05, "loss": 1.762, "step": 5886 }, { "epoch": 0.3281310963714397, "grad_norm": 0.4922736585140228, "learning_rate": 7.72343164857396e-05, "loss": 1.29, "step": 5887 }, { "epoch": 0.32818683462460285, "grad_norm": 0.5568634867668152, "learning_rate": 7.722684860649953e-05, "loss": 1.8285, "step": 5888 }, { "epoch": 0.32824257287776604, "grad_norm": 0.5732812285423279, "learning_rate": 7.721937986378261e-05, "loss": 1.6134, "step": 5889 }, { "epoch": 0.32829831113092917, "grad_norm": 0.5091588497161865, "learning_rate": 7.721191025782563e-05, "loss": 1.5536, "step": 5890 }, { "epoch": 0.3283540493840923, "grad_norm": 0.5646446347236633, "learning_rate": 7.720443978886551e-05, "loss": 1.6102, "step": 5891 }, { "epoch": 0.3284097876372554, "grad_norm": 0.5230876207351685, "learning_rate": 7.71969684571392e-05, "loss": 1.7258, "step": 5892 }, { "epoch": 0.3284655258904186, "grad_norm": 0.5695227980613708, "learning_rate": 7.718949626288359e-05, "loss": 1.7538, "step": 5893 }, { "epoch": 0.32852126414358174, "grad_norm": 0.5724740028381348, "learning_rate": 7.718202320633572e-05, "loss": 1.5929, "step": 5894 }, { "epoch": 0.32857700239674487, "grad_norm": 0.5088779926300049, "learning_rate": 7.717454928773253e-05, "loss": 1.5781, "step": 5895 }, { "epoch": 0.32863274064990805, "grad_norm": 0.6324506402015686, "learning_rate": 7.716707450731109e-05, "loss": 1.97, "step": 5896 }, { "epoch": 0.3286884789030712, "grad_norm": 0.5300724506378174, "learning_rate": 7.715959886530843e-05, "loss": 1.6759, "step": 5897 }, { "epoch": 0.3287442171562343, "grad_norm": 0.5645179152488708, "learning_rate": 7.715212236196164e-05, "loss": 1.6515, "step": 5898 }, { "epoch": 0.3287999554093975, "grad_norm": 0.575449526309967, "learning_rate": 7.714464499750784e-05, "loss": 1.7267, "step": 5899 }, { "epoch": 0.3288556936625606, "grad_norm": 0.5279715657234192, "learning_rate": 7.713716677218416e-05, "loss": 1.6431, "step": 5900 }, { "epoch": 0.32891143191572375, "grad_norm": 0.5209466814994812, "learning_rate": 7.712968768622779e-05, "loss": 1.5909, "step": 5901 }, { "epoch": 0.3289671701688869, "grad_norm": 0.5469819903373718, "learning_rate": 7.712220773987589e-05, "loss": 1.6273, "step": 5902 }, { "epoch": 0.32902290842205006, "grad_norm": 0.5781688690185547, "learning_rate": 7.71147269333657e-05, "loss": 1.8497, "step": 5903 }, { "epoch": 0.3290786466752132, "grad_norm": 0.5549498200416565, "learning_rate": 7.710724526693445e-05, "loss": 1.6606, "step": 5904 }, { "epoch": 0.3291343849283763, "grad_norm": 0.5616956949234009, "learning_rate": 7.709976274081944e-05, "loss": 1.8094, "step": 5905 }, { "epoch": 0.3291901231815395, "grad_norm": 0.5189547538757324, "learning_rate": 7.709227935525796e-05, "loss": 1.7477, "step": 5906 }, { "epoch": 0.32924586143470264, "grad_norm": 0.5060945749282837, "learning_rate": 7.708479511048732e-05, "loss": 1.4591, "step": 5907 }, { "epoch": 0.32930159968786576, "grad_norm": 0.5463743209838867, "learning_rate": 7.707731000674492e-05, "loss": 1.6762, "step": 5908 }, { "epoch": 0.32935733794102895, "grad_norm": 0.5190552473068237, "learning_rate": 7.70698240442681e-05, "loss": 1.529, "step": 5909 }, { "epoch": 0.3294130761941921, "grad_norm": 0.5391181111335754, "learning_rate": 7.70623372232943e-05, "loss": 1.6953, "step": 5910 }, { "epoch": 0.3294688144473552, "grad_norm": 0.5780003070831299, "learning_rate": 7.705484954406092e-05, "loss": 1.6728, "step": 5911 }, { "epoch": 0.3295245527005184, "grad_norm": 0.554817795753479, "learning_rate": 7.704736100680547e-05, "loss": 1.6731, "step": 5912 }, { "epoch": 0.3295802909536815, "grad_norm": 0.590787410736084, "learning_rate": 7.703987161176545e-05, "loss": 1.9063, "step": 5913 }, { "epoch": 0.32963602920684465, "grad_norm": 0.5418079495429993, "learning_rate": 7.703238135917832e-05, "loss": 1.6984, "step": 5914 }, { "epoch": 0.3296917674600078, "grad_norm": 0.5568365454673767, "learning_rate": 7.702489024928168e-05, "loss": 1.7057, "step": 5915 }, { "epoch": 0.32974750571317096, "grad_norm": 0.5823662281036377, "learning_rate": 7.701739828231309e-05, "loss": 1.8851, "step": 5916 }, { "epoch": 0.3298032439663341, "grad_norm": 0.588046133518219, "learning_rate": 7.700990545851014e-05, "loss": 1.6514, "step": 5917 }, { "epoch": 0.3298589822194972, "grad_norm": 0.5833228826522827, "learning_rate": 7.700241177811048e-05, "loss": 1.7474, "step": 5918 }, { "epoch": 0.3299147204726604, "grad_norm": 0.5376124978065491, "learning_rate": 7.699491724135175e-05, "loss": 1.65, "step": 5919 }, { "epoch": 0.32997045872582353, "grad_norm": 0.579406201839447, "learning_rate": 7.698742184847163e-05, "loss": 1.7039, "step": 5920 }, { "epoch": 0.33002619697898666, "grad_norm": 0.5547471046447754, "learning_rate": 7.697992559970784e-05, "loss": 1.7428, "step": 5921 }, { "epoch": 0.33008193523214985, "grad_norm": 0.5924109816551208, "learning_rate": 7.697242849529812e-05, "loss": 1.7935, "step": 5922 }, { "epoch": 0.330137673485313, "grad_norm": 0.5609079003334045, "learning_rate": 7.69649305354802e-05, "loss": 1.7302, "step": 5923 }, { "epoch": 0.3301934117384761, "grad_norm": 0.5709410309791565, "learning_rate": 7.695743172049192e-05, "loss": 1.6529, "step": 5924 }, { "epoch": 0.33024914999163923, "grad_norm": 0.5341020822525024, "learning_rate": 7.694993205057108e-05, "loss": 1.696, "step": 5925 }, { "epoch": 0.3303048882448024, "grad_norm": 0.5852230787277222, "learning_rate": 7.694243152595552e-05, "loss": 1.6173, "step": 5926 }, { "epoch": 0.33036062649796555, "grad_norm": 0.5338337421417236, "learning_rate": 7.693493014688313e-05, "loss": 1.4818, "step": 5927 }, { "epoch": 0.3304163647511287, "grad_norm": 0.5398749113082886, "learning_rate": 7.69274279135918e-05, "loss": 1.631, "step": 5928 }, { "epoch": 0.33047210300429186, "grad_norm": 0.5520002245903015, "learning_rate": 7.691992482631944e-05, "loss": 1.8426, "step": 5929 }, { "epoch": 0.330527841257455, "grad_norm": 0.5498268008232117, "learning_rate": 7.691242088530401e-05, "loss": 1.8106, "step": 5930 }, { "epoch": 0.3305835795106181, "grad_norm": 0.5437809824943542, "learning_rate": 7.690491609078351e-05, "loss": 1.7523, "step": 5931 }, { "epoch": 0.3306393177637813, "grad_norm": 0.6089059114456177, "learning_rate": 7.689741044299595e-05, "loss": 1.7299, "step": 5932 }, { "epoch": 0.33069505601694443, "grad_norm": 0.5289489030838013, "learning_rate": 7.688990394217933e-05, "loss": 1.691, "step": 5933 }, { "epoch": 0.33075079427010756, "grad_norm": 0.555590033531189, "learning_rate": 7.688239658857174e-05, "loss": 1.45, "step": 5934 }, { "epoch": 0.33080653252327075, "grad_norm": 0.6252313256263733, "learning_rate": 7.687488838241128e-05, "loss": 1.8009, "step": 5935 }, { "epoch": 0.3308622707764339, "grad_norm": 0.5846867561340332, "learning_rate": 7.686737932393605e-05, "loss": 1.7873, "step": 5936 }, { "epoch": 0.330918009029597, "grad_norm": 0.5312223434448242, "learning_rate": 7.685986941338419e-05, "loss": 1.6196, "step": 5937 }, { "epoch": 0.33097374728276013, "grad_norm": 0.5511593222618103, "learning_rate": 7.685235865099387e-05, "loss": 1.7915, "step": 5938 }, { "epoch": 0.3310294855359233, "grad_norm": 0.5287107825279236, "learning_rate": 7.684484703700332e-05, "loss": 1.6648, "step": 5939 }, { "epoch": 0.33108522378908645, "grad_norm": 0.5697956681251526, "learning_rate": 7.683733457165071e-05, "loss": 2.0054, "step": 5940 }, { "epoch": 0.3311409620422496, "grad_norm": 0.5331019759178162, "learning_rate": 7.682982125517433e-05, "loss": 1.7598, "step": 5941 }, { "epoch": 0.33119670029541276, "grad_norm": 0.5488009452819824, "learning_rate": 7.682230708781244e-05, "loss": 1.4258, "step": 5942 }, { "epoch": 0.3312524385485759, "grad_norm": 0.5415595173835754, "learning_rate": 7.681479206980338e-05, "loss": 1.766, "step": 5943 }, { "epoch": 0.331308176801739, "grad_norm": 0.6208872199058533, "learning_rate": 7.680727620138542e-05, "loss": 1.879, "step": 5944 }, { "epoch": 0.3313639150549022, "grad_norm": 0.5650165677070618, "learning_rate": 7.679975948279699e-05, "loss": 1.4933, "step": 5945 }, { "epoch": 0.33141965330806533, "grad_norm": 0.5754852890968323, "learning_rate": 7.679224191427642e-05, "loss": 1.6821, "step": 5946 }, { "epoch": 0.33147539156122846, "grad_norm": 0.5749027132987976, "learning_rate": 7.678472349606215e-05, "loss": 1.8599, "step": 5947 }, { "epoch": 0.3315311298143916, "grad_norm": 0.5200157761573792, "learning_rate": 7.677720422839263e-05, "loss": 1.6659, "step": 5948 }, { "epoch": 0.3315868680675548, "grad_norm": 0.6056989431381226, "learning_rate": 7.676968411150629e-05, "loss": 1.9657, "step": 5949 }, { "epoch": 0.3316426063207179, "grad_norm": 0.5650584697723389, "learning_rate": 7.676216314564166e-05, "loss": 1.9396, "step": 5950 }, { "epoch": 0.33169834457388103, "grad_norm": 0.5425543785095215, "learning_rate": 7.675464133103726e-05, "loss": 1.6447, "step": 5951 }, { "epoch": 0.3317540828270442, "grad_norm": 0.5751011967658997, "learning_rate": 7.674711866793163e-05, "loss": 1.7975, "step": 5952 }, { "epoch": 0.33180982108020735, "grad_norm": 0.521195113658905, "learning_rate": 7.673959515656333e-05, "loss": 1.6343, "step": 5953 }, { "epoch": 0.3318655593333705, "grad_norm": 0.5193372964859009, "learning_rate": 7.673207079717098e-05, "loss": 1.7215, "step": 5954 }, { "epoch": 0.33192129758653366, "grad_norm": 0.4974719285964966, "learning_rate": 7.672454558999318e-05, "loss": 1.5058, "step": 5955 }, { "epoch": 0.3319770358396968, "grad_norm": 0.610576868057251, "learning_rate": 7.671701953526863e-05, "loss": 1.8826, "step": 5956 }, { "epoch": 0.3320327740928599, "grad_norm": 0.5185069441795349, "learning_rate": 7.670949263323599e-05, "loss": 1.3823, "step": 5957 }, { "epoch": 0.3320885123460231, "grad_norm": 0.5048871636390686, "learning_rate": 7.670196488413397e-05, "loss": 1.3208, "step": 5958 }, { "epoch": 0.33214425059918623, "grad_norm": 0.512177586555481, "learning_rate": 7.66944362882013e-05, "loss": 1.4293, "step": 5959 }, { "epoch": 0.33219998885234936, "grad_norm": 0.5636778473854065, "learning_rate": 7.668690684567676e-05, "loss": 1.5585, "step": 5960 }, { "epoch": 0.3322557271055125, "grad_norm": 0.5499832630157471, "learning_rate": 7.667937655679913e-05, "loss": 1.5834, "step": 5961 }, { "epoch": 0.3323114653586757, "grad_norm": 0.6139015555381775, "learning_rate": 7.667184542180723e-05, "loss": 2.0935, "step": 5962 }, { "epoch": 0.3323672036118388, "grad_norm": 0.5284989476203918, "learning_rate": 7.666431344093988e-05, "loss": 1.6838, "step": 5963 }, { "epoch": 0.33242294186500193, "grad_norm": 0.5448603630065918, "learning_rate": 7.665678061443599e-05, "loss": 1.6688, "step": 5964 }, { "epoch": 0.3324786801181651, "grad_norm": 0.5356377959251404, "learning_rate": 7.664924694253443e-05, "loss": 1.6131, "step": 5965 }, { "epoch": 0.33253441837132824, "grad_norm": 0.5786362886428833, "learning_rate": 7.664171242547414e-05, "loss": 1.859, "step": 5966 }, { "epoch": 0.3325901566244914, "grad_norm": 0.5811523199081421, "learning_rate": 7.663417706349407e-05, "loss": 1.6848, "step": 5967 }, { "epoch": 0.33264589487765456, "grad_norm": 0.5504920482635498, "learning_rate": 7.662664085683317e-05, "loss": 1.7, "step": 5968 }, { "epoch": 0.3327016331308177, "grad_norm": 0.6110926866531372, "learning_rate": 7.66191038057305e-05, "loss": 1.87, "step": 5969 }, { "epoch": 0.3327573713839808, "grad_norm": 0.5238990187644958, "learning_rate": 7.661156591042502e-05, "loss": 1.6083, "step": 5970 }, { "epoch": 0.33281310963714394, "grad_norm": 0.5919533371925354, "learning_rate": 7.660402717115584e-05, "loss": 1.6786, "step": 5971 }, { "epoch": 0.33286884789030713, "grad_norm": 0.565631091594696, "learning_rate": 7.659648758816205e-05, "loss": 1.595, "step": 5972 }, { "epoch": 0.33292458614347026, "grad_norm": 0.6189529299736023, "learning_rate": 7.658894716168271e-05, "loss": 2.0188, "step": 5973 }, { "epoch": 0.3329803243966334, "grad_norm": 0.5532551407814026, "learning_rate": 7.658140589195701e-05, "loss": 1.6095, "step": 5974 }, { "epoch": 0.33303606264979657, "grad_norm": 0.4914916157722473, "learning_rate": 7.657386377922409e-05, "loss": 1.6199, "step": 5975 }, { "epoch": 0.3330918009029597, "grad_norm": 0.5677047371864319, "learning_rate": 7.656632082372315e-05, "loss": 1.5635, "step": 5976 }, { "epoch": 0.33314753915612283, "grad_norm": 0.5638590455055237, "learning_rate": 7.65587770256934e-05, "loss": 1.7578, "step": 5977 }, { "epoch": 0.333203277409286, "grad_norm": 0.5115950107574463, "learning_rate": 7.655123238537409e-05, "loss": 1.4157, "step": 5978 }, { "epoch": 0.33325901566244914, "grad_norm": 0.6125264763832092, "learning_rate": 7.65436869030045e-05, "loss": 1.8876, "step": 5979 }, { "epoch": 0.33331475391561227, "grad_norm": 0.5354574918746948, "learning_rate": 7.653614057882393e-05, "loss": 1.7052, "step": 5980 }, { "epoch": 0.33337049216877546, "grad_norm": 0.5426600575447083, "learning_rate": 7.652859341307168e-05, "loss": 1.7011, "step": 5981 }, { "epoch": 0.3334262304219386, "grad_norm": 0.7442419528961182, "learning_rate": 7.652104540598712e-05, "loss": 1.7664, "step": 5982 }, { "epoch": 0.3334819686751017, "grad_norm": 0.5431948900222778, "learning_rate": 7.651349655780965e-05, "loss": 1.5627, "step": 5983 }, { "epoch": 0.33353770692826484, "grad_norm": 0.5939268469810486, "learning_rate": 7.650594686877863e-05, "loss": 1.8128, "step": 5984 }, { "epoch": 0.333593445181428, "grad_norm": 0.540123462677002, "learning_rate": 7.649839633913352e-05, "loss": 1.6395, "step": 5985 }, { "epoch": 0.33364918343459116, "grad_norm": 0.5777207016944885, "learning_rate": 7.649084496911378e-05, "loss": 1.7467, "step": 5986 }, { "epoch": 0.3337049216877543, "grad_norm": 0.5720601081848145, "learning_rate": 7.648329275895889e-05, "loss": 1.8314, "step": 5987 }, { "epoch": 0.33376065994091747, "grad_norm": 0.5010839104652405, "learning_rate": 7.647573970890837e-05, "loss": 1.5876, "step": 5988 }, { "epoch": 0.3338163981940806, "grad_norm": 0.5364264249801636, "learning_rate": 7.646818581920173e-05, "loss": 1.6042, "step": 5989 }, { "epoch": 0.33387213644724373, "grad_norm": 0.5355646014213562, "learning_rate": 7.646063109007858e-05, "loss": 1.5054, "step": 5990 }, { "epoch": 0.3339278747004069, "grad_norm": 0.5173195600509644, "learning_rate": 7.645307552177847e-05, "loss": 1.7355, "step": 5991 }, { "epoch": 0.33398361295357004, "grad_norm": 0.5141093134880066, "learning_rate": 7.644551911454103e-05, "loss": 1.5428, "step": 5992 }, { "epoch": 0.33403935120673317, "grad_norm": 0.5739405751228333, "learning_rate": 7.643796186860595e-05, "loss": 1.8064, "step": 5993 }, { "epoch": 0.3340950894598963, "grad_norm": 0.6502695083618164, "learning_rate": 7.643040378421282e-05, "loss": 1.9495, "step": 5994 }, { "epoch": 0.3341508277130595, "grad_norm": 0.5652748942375183, "learning_rate": 7.64228448616014e-05, "loss": 1.6926, "step": 5995 }, { "epoch": 0.3342065659662226, "grad_norm": 0.5500004291534424, "learning_rate": 7.64152851010114e-05, "loss": 1.6566, "step": 5996 }, { "epoch": 0.33426230421938574, "grad_norm": 0.6248365044593811, "learning_rate": 7.640772450268255e-05, "loss": 1.6196, "step": 5997 }, { "epoch": 0.3343180424725489, "grad_norm": 0.5509215593338013, "learning_rate": 7.640016306685467e-05, "loss": 1.6845, "step": 5998 }, { "epoch": 0.33437378072571206, "grad_norm": 0.6251245141029358, "learning_rate": 7.639260079376753e-05, "loss": 1.9948, "step": 5999 }, { "epoch": 0.3344295189788752, "grad_norm": 0.536384642124176, "learning_rate": 7.638503768366098e-05, "loss": 1.6778, "step": 6000 }, { "epoch": 0.33448525723203837, "grad_norm": 0.5998651385307312, "learning_rate": 7.637747373677486e-05, "loss": 1.6279, "step": 6001 }, { "epoch": 0.3345409954852015, "grad_norm": 0.5673259496688843, "learning_rate": 7.636990895334907e-05, "loss": 1.7001, "step": 6002 }, { "epoch": 0.3345967337383646, "grad_norm": 0.5465088486671448, "learning_rate": 7.63623433336235e-05, "loss": 1.7576, "step": 6003 }, { "epoch": 0.3346524719915278, "grad_norm": 0.5544756054878235, "learning_rate": 7.635477687783814e-05, "loss": 1.844, "step": 6004 }, { "epoch": 0.33470821024469094, "grad_norm": 0.5186877846717834, "learning_rate": 7.634720958623287e-05, "loss": 1.6125, "step": 6005 }, { "epoch": 0.33476394849785407, "grad_norm": 0.5501444935798645, "learning_rate": 7.633964145904777e-05, "loss": 1.7169, "step": 6006 }, { "epoch": 0.3348196867510172, "grad_norm": 0.5606530904769897, "learning_rate": 7.633207249652278e-05, "loss": 1.6944, "step": 6007 }, { "epoch": 0.3348754250041804, "grad_norm": 0.49215444922447205, "learning_rate": 7.6324502698898e-05, "loss": 1.4025, "step": 6008 }, { "epoch": 0.3349311632573435, "grad_norm": 0.555610716342926, "learning_rate": 7.631693206641346e-05, "loss": 1.7292, "step": 6009 }, { "epoch": 0.33498690151050664, "grad_norm": 0.5174264907836914, "learning_rate": 7.630936059930927e-05, "loss": 1.5525, "step": 6010 }, { "epoch": 0.3350426397636698, "grad_norm": 0.5901679992675781, "learning_rate": 7.630178829782558e-05, "loss": 1.7284, "step": 6011 }, { "epoch": 0.33509837801683295, "grad_norm": 0.5459769368171692, "learning_rate": 7.629421516220249e-05, "loss": 1.6727, "step": 6012 }, { "epoch": 0.3351541162699961, "grad_norm": 0.5339307188987732, "learning_rate": 7.628664119268023e-05, "loss": 1.7325, "step": 6013 }, { "epoch": 0.33520985452315927, "grad_norm": 0.533289909362793, "learning_rate": 7.627906638949895e-05, "loss": 1.5102, "step": 6014 }, { "epoch": 0.3352655927763224, "grad_norm": 0.5171735286712646, "learning_rate": 7.62714907528989e-05, "loss": 1.5725, "step": 6015 }, { "epoch": 0.3353213310294855, "grad_norm": 0.585667610168457, "learning_rate": 7.626391428312035e-05, "loss": 1.8119, "step": 6016 }, { "epoch": 0.33537706928264865, "grad_norm": 0.504396378993988, "learning_rate": 7.625633698040357e-05, "loss": 1.4209, "step": 6017 }, { "epoch": 0.33543280753581184, "grad_norm": 0.5608323216438293, "learning_rate": 7.624875884498886e-05, "loss": 1.8436, "step": 6018 }, { "epoch": 0.33548854578897497, "grad_norm": 0.5625400543212891, "learning_rate": 7.624117987711656e-05, "loss": 1.836, "step": 6019 }, { "epoch": 0.3355442840421381, "grad_norm": 0.6377468109130859, "learning_rate": 7.623360007702702e-05, "loss": 1.7539, "step": 6020 }, { "epoch": 0.3356000222953013, "grad_norm": 0.556115984916687, "learning_rate": 7.622601944496064e-05, "loss": 1.6686, "step": 6021 }, { "epoch": 0.3356557605484644, "grad_norm": 0.49739575386047363, "learning_rate": 7.621843798115785e-05, "loss": 1.5361, "step": 6022 }, { "epoch": 0.33571149880162754, "grad_norm": 0.5968783497810364, "learning_rate": 7.621085568585905e-05, "loss": 1.8225, "step": 6023 }, { "epoch": 0.3357672370547907, "grad_norm": 0.575768232345581, "learning_rate": 7.620327255930474e-05, "loss": 1.908, "step": 6024 }, { "epoch": 0.33582297530795385, "grad_norm": 0.5628235340118408, "learning_rate": 7.61956886017354e-05, "loss": 1.6388, "step": 6025 }, { "epoch": 0.335878713561117, "grad_norm": 0.5842387676239014, "learning_rate": 7.618810381339155e-05, "loss": 1.8774, "step": 6026 }, { "epoch": 0.33593445181428017, "grad_norm": 0.5307137370109558, "learning_rate": 7.618051819451373e-05, "loss": 1.6372, "step": 6027 }, { "epoch": 0.3359901900674433, "grad_norm": 0.5524066090583801, "learning_rate": 7.617293174534253e-05, "loss": 1.7415, "step": 6028 }, { "epoch": 0.3360459283206064, "grad_norm": 0.5315592885017395, "learning_rate": 7.616534446611851e-05, "loss": 1.6005, "step": 6029 }, { "epoch": 0.33610166657376955, "grad_norm": 0.5379803776741028, "learning_rate": 7.615775635708234e-05, "loss": 1.6998, "step": 6030 }, { "epoch": 0.33615740482693274, "grad_norm": 0.593471884727478, "learning_rate": 7.615016741847463e-05, "loss": 1.6948, "step": 6031 }, { "epoch": 0.33621314308009587, "grad_norm": 0.5759322643280029, "learning_rate": 7.614257765053609e-05, "loss": 1.5575, "step": 6032 }, { "epoch": 0.336268881333259, "grad_norm": 0.5627144575119019, "learning_rate": 7.61349870535074e-05, "loss": 1.7633, "step": 6033 }, { "epoch": 0.3363246195864222, "grad_norm": 0.5872805714607239, "learning_rate": 7.612739562762929e-05, "loss": 1.8196, "step": 6034 }, { "epoch": 0.3363803578395853, "grad_norm": 0.5651592016220093, "learning_rate": 7.611980337314254e-05, "loss": 1.7916, "step": 6035 }, { "epoch": 0.33643609609274844, "grad_norm": 0.5263227820396423, "learning_rate": 7.61122102902879e-05, "loss": 1.6909, "step": 6036 }, { "epoch": 0.3364918343459116, "grad_norm": 0.5474349856376648, "learning_rate": 7.610461637930621e-05, "loss": 1.7166, "step": 6037 }, { "epoch": 0.33654757259907475, "grad_norm": 0.5443328022956848, "learning_rate": 7.609702164043829e-05, "loss": 1.6479, "step": 6038 }, { "epoch": 0.3366033108522379, "grad_norm": 0.5788392424583435, "learning_rate": 7.6089426073925e-05, "loss": 1.7645, "step": 6039 }, { "epoch": 0.336659049105401, "grad_norm": 0.5407717823982239, "learning_rate": 7.608182968000721e-05, "loss": 1.7543, "step": 6040 }, { "epoch": 0.3367147873585642, "grad_norm": 0.5548073649406433, "learning_rate": 7.607423245892586e-05, "loss": 1.6023, "step": 6041 }, { "epoch": 0.3367705256117273, "grad_norm": 0.5452112555503845, "learning_rate": 7.606663441092188e-05, "loss": 1.7298, "step": 6042 }, { "epoch": 0.33682626386489045, "grad_norm": 0.5845810770988464, "learning_rate": 7.605903553623625e-05, "loss": 1.9093, "step": 6043 }, { "epoch": 0.33688200211805364, "grad_norm": 0.5392171740531921, "learning_rate": 7.605143583510991e-05, "loss": 1.7111, "step": 6044 }, { "epoch": 0.33693774037121677, "grad_norm": 0.51267009973526, "learning_rate": 7.604383530778396e-05, "loss": 1.5154, "step": 6045 }, { "epoch": 0.3369934786243799, "grad_norm": 0.5741301774978638, "learning_rate": 7.603623395449937e-05, "loss": 1.7287, "step": 6046 }, { "epoch": 0.3370492168775431, "grad_norm": 0.5356318354606628, "learning_rate": 7.602863177549724e-05, "loss": 1.7299, "step": 6047 }, { "epoch": 0.3371049551307062, "grad_norm": 0.5820077061653137, "learning_rate": 7.602102877101869e-05, "loss": 1.8304, "step": 6048 }, { "epoch": 0.33716069338386934, "grad_norm": 0.5404535531997681, "learning_rate": 7.60134249413048e-05, "loss": 1.5754, "step": 6049 }, { "epoch": 0.3372164316370325, "grad_norm": 0.5398672819137573, "learning_rate": 7.600582028659675e-05, "loss": 1.7943, "step": 6050 }, { "epoch": 0.33727216989019565, "grad_norm": 0.5376107692718506, "learning_rate": 7.59982148071357e-05, "loss": 1.4528, "step": 6051 }, { "epoch": 0.3373279081433588, "grad_norm": 0.5899469256401062, "learning_rate": 7.599060850316287e-05, "loss": 1.7503, "step": 6052 }, { "epoch": 0.3373836463965219, "grad_norm": 0.5668314695358276, "learning_rate": 7.598300137491946e-05, "loss": 1.7732, "step": 6053 }, { "epoch": 0.3374393846496851, "grad_norm": 0.6154149174690247, "learning_rate": 7.597539342264675e-05, "loss": 1.6534, "step": 6054 }, { "epoch": 0.3374951229028482, "grad_norm": 0.5487502813339233, "learning_rate": 7.596778464658599e-05, "loss": 1.6286, "step": 6055 }, { "epoch": 0.33755086115601135, "grad_norm": 0.5876896977424622, "learning_rate": 7.596017504697851e-05, "loss": 1.7787, "step": 6056 }, { "epoch": 0.33760659940917453, "grad_norm": 0.5587677359580994, "learning_rate": 7.595256462406564e-05, "loss": 1.7862, "step": 6057 }, { "epoch": 0.33766233766233766, "grad_norm": 0.5694131255149841, "learning_rate": 7.594495337808873e-05, "loss": 1.6926, "step": 6058 }, { "epoch": 0.3377180759155008, "grad_norm": 0.5591508150100708, "learning_rate": 7.593734130928918e-05, "loss": 1.6135, "step": 6059 }, { "epoch": 0.337773814168664, "grad_norm": 0.5355261564254761, "learning_rate": 7.592972841790837e-05, "loss": 1.5746, "step": 6060 }, { "epoch": 0.3378295524218271, "grad_norm": 0.5518434047698975, "learning_rate": 7.592211470418777e-05, "loss": 1.6457, "step": 6061 }, { "epoch": 0.33788529067499024, "grad_norm": 0.5891780257225037, "learning_rate": 7.59145001683688e-05, "loss": 1.7026, "step": 6062 }, { "epoch": 0.33794102892815336, "grad_norm": 0.5723276734352112, "learning_rate": 7.590688481069302e-05, "loss": 1.8168, "step": 6063 }, { "epoch": 0.33799676718131655, "grad_norm": 0.5468711853027344, "learning_rate": 7.589926863140187e-05, "loss": 1.607, "step": 6064 }, { "epoch": 0.3380525054344797, "grad_norm": 0.6062466502189636, "learning_rate": 7.589165163073695e-05, "loss": 1.9372, "step": 6065 }, { "epoch": 0.3381082436876428, "grad_norm": 0.5140287280082703, "learning_rate": 7.588403380893979e-05, "loss": 1.6545, "step": 6066 }, { "epoch": 0.338163981940806, "grad_norm": 0.5543786287307739, "learning_rate": 7.587641516625197e-05, "loss": 1.8205, "step": 6067 }, { "epoch": 0.3382197201939691, "grad_norm": 0.5844648480415344, "learning_rate": 7.586879570291514e-05, "loss": 1.8597, "step": 6068 }, { "epoch": 0.33827545844713225, "grad_norm": 0.5109902024269104, "learning_rate": 7.586117541917095e-05, "loss": 1.5266, "step": 6069 }, { "epoch": 0.33833119670029543, "grad_norm": 0.5208814740180969, "learning_rate": 7.585355431526104e-05, "loss": 1.721, "step": 6070 }, { "epoch": 0.33838693495345856, "grad_norm": 0.5144614577293396, "learning_rate": 7.584593239142712e-05, "loss": 1.624, "step": 6071 }, { "epoch": 0.3384426732066217, "grad_norm": 0.5855271220207214, "learning_rate": 7.583830964791094e-05, "loss": 1.8765, "step": 6072 }, { "epoch": 0.3384984114597849, "grad_norm": 0.5410987138748169, "learning_rate": 7.58306860849542e-05, "loss": 1.6027, "step": 6073 }, { "epoch": 0.338554149712948, "grad_norm": 0.6230753064155579, "learning_rate": 7.582306170279872e-05, "loss": 1.8485, "step": 6074 }, { "epoch": 0.33860988796611113, "grad_norm": 0.5517315864562988, "learning_rate": 7.581543650168628e-05, "loss": 1.7822, "step": 6075 }, { "epoch": 0.33866562621927426, "grad_norm": 0.5739060044288635, "learning_rate": 7.580781048185871e-05, "loss": 1.6443, "step": 6076 }, { "epoch": 0.33872136447243745, "grad_norm": 0.5618791580200195, "learning_rate": 7.580018364355785e-05, "loss": 1.5943, "step": 6077 }, { "epoch": 0.3387771027256006, "grad_norm": 0.5723870396614075, "learning_rate": 7.579255598702562e-05, "loss": 1.4501, "step": 6078 }, { "epoch": 0.3388328409787637, "grad_norm": 0.5427421927452087, "learning_rate": 7.578492751250386e-05, "loss": 1.7001, "step": 6079 }, { "epoch": 0.3388885792319269, "grad_norm": 0.5765356421470642, "learning_rate": 7.577729822023455e-05, "loss": 1.6652, "step": 6080 }, { "epoch": 0.33894431748509, "grad_norm": 0.5492302179336548, "learning_rate": 7.576966811045963e-05, "loss": 1.6988, "step": 6081 }, { "epoch": 0.33900005573825315, "grad_norm": 0.5814895033836365, "learning_rate": 7.576203718342108e-05, "loss": 1.9584, "step": 6082 }, { "epoch": 0.33905579399141633, "grad_norm": 0.6068232655525208, "learning_rate": 7.575440543936092e-05, "loss": 2.0357, "step": 6083 }, { "epoch": 0.33911153224457946, "grad_norm": 0.5426899790763855, "learning_rate": 7.574677287852117e-05, "loss": 1.6323, "step": 6084 }, { "epoch": 0.3391672704977426, "grad_norm": 0.5811708569526672, "learning_rate": 7.573913950114391e-05, "loss": 1.538, "step": 6085 }, { "epoch": 0.3392230087509057, "grad_norm": 0.5753393769264221, "learning_rate": 7.573150530747122e-05, "loss": 1.6013, "step": 6086 }, { "epoch": 0.3392787470040689, "grad_norm": 0.5427485108375549, "learning_rate": 7.572387029774519e-05, "loss": 1.6444, "step": 6087 }, { "epoch": 0.33933448525723203, "grad_norm": 0.5431930422782898, "learning_rate": 7.571623447220797e-05, "loss": 1.6733, "step": 6088 }, { "epoch": 0.33939022351039516, "grad_norm": 0.555357813835144, "learning_rate": 7.570859783110176e-05, "loss": 1.7219, "step": 6089 }, { "epoch": 0.33944596176355835, "grad_norm": 0.5578222274780273, "learning_rate": 7.570096037466869e-05, "loss": 1.407, "step": 6090 }, { "epoch": 0.3395017000167215, "grad_norm": 0.5213090777397156, "learning_rate": 7.5693322103151e-05, "loss": 1.4608, "step": 6091 }, { "epoch": 0.3395574382698846, "grad_norm": 0.5651876330375671, "learning_rate": 7.568568301679096e-05, "loss": 1.6756, "step": 6092 }, { "epoch": 0.3396131765230478, "grad_norm": 0.5914562940597534, "learning_rate": 7.56780431158308e-05, "loss": 1.7648, "step": 6093 }, { "epoch": 0.3396689147762109, "grad_norm": 0.5577222108840942, "learning_rate": 7.567040240051281e-05, "loss": 1.6954, "step": 6094 }, { "epoch": 0.33972465302937405, "grad_norm": 0.5938786268234253, "learning_rate": 7.566276087107935e-05, "loss": 1.8131, "step": 6095 }, { "epoch": 0.33978039128253723, "grad_norm": 0.5387003421783447, "learning_rate": 7.565511852777274e-05, "loss": 1.6522, "step": 6096 }, { "epoch": 0.33983612953570036, "grad_norm": 0.5465493202209473, "learning_rate": 7.564747537083534e-05, "loss": 1.6971, "step": 6097 }, { "epoch": 0.3398918677888635, "grad_norm": 0.5273247361183167, "learning_rate": 7.563983140050955e-05, "loss": 1.6759, "step": 6098 }, { "epoch": 0.3399476060420266, "grad_norm": 0.5733767151832581, "learning_rate": 7.563218661703782e-05, "loss": 1.7203, "step": 6099 }, { "epoch": 0.3400033442951898, "grad_norm": 0.6077031493186951, "learning_rate": 7.562454102066255e-05, "loss": 1.9364, "step": 6100 }, { "epoch": 0.34005908254835293, "grad_norm": 0.5688176155090332, "learning_rate": 7.561689461162625e-05, "loss": 1.6623, "step": 6101 }, { "epoch": 0.34011482080151606, "grad_norm": 0.5663187503814697, "learning_rate": 7.56092473901714e-05, "loss": 1.567, "step": 6102 }, { "epoch": 0.34017055905467924, "grad_norm": 0.6150177121162415, "learning_rate": 7.560159935654056e-05, "loss": 1.8714, "step": 6103 }, { "epoch": 0.3402262973078424, "grad_norm": 0.5515531301498413, "learning_rate": 7.559395051097624e-05, "loss": 1.6713, "step": 6104 }, { "epoch": 0.3402820355610055, "grad_norm": 0.687240481376648, "learning_rate": 7.558630085372105e-05, "loss": 1.6552, "step": 6105 }, { "epoch": 0.3403377738141687, "grad_norm": 0.5493181943893433, "learning_rate": 7.557865038501756e-05, "loss": 1.65, "step": 6106 }, { "epoch": 0.3403935120673318, "grad_norm": 0.5683436989784241, "learning_rate": 7.55709991051084e-05, "loss": 1.8507, "step": 6107 }, { "epoch": 0.34044925032049494, "grad_norm": 0.5895001292228699, "learning_rate": 7.556334701423627e-05, "loss": 2.0143, "step": 6108 }, { "epoch": 0.3405049885736581, "grad_norm": 0.5967059135437012, "learning_rate": 7.555569411264378e-05, "loss": 1.9006, "step": 6109 }, { "epoch": 0.34056072682682126, "grad_norm": 0.5140407085418701, "learning_rate": 7.554804040057369e-05, "loss": 1.4028, "step": 6110 }, { "epoch": 0.3406164650799844, "grad_norm": 0.5586955547332764, "learning_rate": 7.554038587826872e-05, "loss": 1.6835, "step": 6111 }, { "epoch": 0.3406722033331475, "grad_norm": 0.4853399395942688, "learning_rate": 7.553273054597163e-05, "loss": 1.5901, "step": 6112 }, { "epoch": 0.3407279415863107, "grad_norm": 0.5674946308135986, "learning_rate": 7.552507440392518e-05, "loss": 1.8776, "step": 6113 }, { "epoch": 0.34078367983947383, "grad_norm": 0.5115534663200378, "learning_rate": 7.551741745237218e-05, "loss": 1.4647, "step": 6114 }, { "epoch": 0.34083941809263696, "grad_norm": 0.6239203214645386, "learning_rate": 7.55097596915555e-05, "loss": 1.8638, "step": 6115 }, { "epoch": 0.34089515634580014, "grad_norm": 0.5367839336395264, "learning_rate": 7.550210112171796e-05, "loss": 1.7598, "step": 6116 }, { "epoch": 0.34095089459896327, "grad_norm": 0.5434908270835876, "learning_rate": 7.549444174310246e-05, "loss": 1.8239, "step": 6117 }, { "epoch": 0.3410066328521264, "grad_norm": 0.5503940582275391, "learning_rate": 7.548678155595192e-05, "loss": 1.7103, "step": 6118 }, { "epoch": 0.3410623711052896, "grad_norm": 0.5601882338523865, "learning_rate": 7.547912056050925e-05, "loss": 1.8269, "step": 6119 }, { "epoch": 0.3411181093584527, "grad_norm": 0.5472147464752197, "learning_rate": 7.547145875701744e-05, "loss": 1.7221, "step": 6120 }, { "epoch": 0.34117384761161584, "grad_norm": 0.5327697396278381, "learning_rate": 7.546379614571947e-05, "loss": 1.6879, "step": 6121 }, { "epoch": 0.341229585864779, "grad_norm": 0.5991697311401367, "learning_rate": 7.545613272685834e-05, "loss": 1.9402, "step": 6122 }, { "epoch": 0.34128532411794216, "grad_norm": 0.5222532749176025, "learning_rate": 7.544846850067711e-05, "loss": 1.6331, "step": 6123 }, { "epoch": 0.3413410623711053, "grad_norm": 0.5213292837142944, "learning_rate": 7.544080346741884e-05, "loss": 1.6547, "step": 6124 }, { "epoch": 0.3413968006242684, "grad_norm": 0.516547441482544, "learning_rate": 7.54331376273266e-05, "loss": 1.5988, "step": 6125 }, { "epoch": 0.3414525388774316, "grad_norm": 0.5505926609039307, "learning_rate": 7.542547098064351e-05, "loss": 1.8314, "step": 6126 }, { "epoch": 0.34150827713059473, "grad_norm": 0.5631290078163147, "learning_rate": 7.541780352761275e-05, "loss": 1.7797, "step": 6127 }, { "epoch": 0.34156401538375786, "grad_norm": 0.5578431487083435, "learning_rate": 7.541013526847745e-05, "loss": 1.7118, "step": 6128 }, { "epoch": 0.34161975363692104, "grad_norm": 0.6077129244804382, "learning_rate": 7.540246620348079e-05, "loss": 1.8582, "step": 6129 }, { "epoch": 0.34167549189008417, "grad_norm": 0.5378260612487793, "learning_rate": 7.539479633286604e-05, "loss": 1.5773, "step": 6130 }, { "epoch": 0.3417312301432473, "grad_norm": 0.5147218108177185, "learning_rate": 7.538712565687637e-05, "loss": 1.6079, "step": 6131 }, { "epoch": 0.34178696839641043, "grad_norm": 0.5637179017066956, "learning_rate": 7.537945417575513e-05, "loss": 1.7772, "step": 6132 }, { "epoch": 0.3418427066495736, "grad_norm": 0.5718836188316345, "learning_rate": 7.537178188974556e-05, "loss": 1.8646, "step": 6133 }, { "epoch": 0.34189844490273674, "grad_norm": 0.5593611001968384, "learning_rate": 7.5364108799091e-05, "loss": 1.7059, "step": 6134 }, { "epoch": 0.34195418315589987, "grad_norm": 0.5491702556610107, "learning_rate": 7.535643490403478e-05, "loss": 1.5904, "step": 6135 }, { "epoch": 0.34200992140906306, "grad_norm": 0.5673286318778992, "learning_rate": 7.534876020482032e-05, "loss": 1.6569, "step": 6136 }, { "epoch": 0.3420656596622262, "grad_norm": 0.555279552936554, "learning_rate": 7.534108470169094e-05, "loss": 1.947, "step": 6137 }, { "epoch": 0.3421213979153893, "grad_norm": 0.5502607226371765, "learning_rate": 7.533340839489011e-05, "loss": 1.6199, "step": 6138 }, { "epoch": 0.3421771361685525, "grad_norm": 0.5711556673049927, "learning_rate": 7.532573128466129e-05, "loss": 1.901, "step": 6139 }, { "epoch": 0.3422328744217156, "grad_norm": 0.5685670375823975, "learning_rate": 7.53180533712479e-05, "loss": 1.7284, "step": 6140 }, { "epoch": 0.34228861267487876, "grad_norm": 0.555075466632843, "learning_rate": 7.53103746548935e-05, "loss": 1.8184, "step": 6141 }, { "epoch": 0.34234435092804194, "grad_norm": 0.5404545664787292, "learning_rate": 7.530269513584158e-05, "loss": 1.6444, "step": 6142 }, { "epoch": 0.34240008918120507, "grad_norm": 0.5739527344703674, "learning_rate": 7.52950148143357e-05, "loss": 1.5748, "step": 6143 }, { "epoch": 0.3424558274343682, "grad_norm": 0.5569913983345032, "learning_rate": 7.528733369061942e-05, "loss": 1.8188, "step": 6144 }, { "epoch": 0.3425115656875313, "grad_norm": 0.5430577397346497, "learning_rate": 7.527965176493636e-05, "loss": 1.5839, "step": 6145 }, { "epoch": 0.3425673039406945, "grad_norm": 0.5321673154830933, "learning_rate": 7.527196903753011e-05, "loss": 1.3862, "step": 6146 }, { "epoch": 0.34262304219385764, "grad_norm": 0.5757884979248047, "learning_rate": 7.526428550864437e-05, "loss": 1.5308, "step": 6147 }, { "epoch": 0.34267878044702077, "grad_norm": 0.556651771068573, "learning_rate": 7.525660117852279e-05, "loss": 1.7377, "step": 6148 }, { "epoch": 0.34273451870018395, "grad_norm": 0.5236818790435791, "learning_rate": 7.524891604740908e-05, "loss": 1.7305, "step": 6149 }, { "epoch": 0.3427902569533471, "grad_norm": 0.5686874985694885, "learning_rate": 7.524123011554697e-05, "loss": 1.5379, "step": 6150 }, { "epoch": 0.3428459952065102, "grad_norm": 0.5817770957946777, "learning_rate": 7.52335433831802e-05, "loss": 1.7069, "step": 6151 }, { "epoch": 0.3429017334596734, "grad_norm": 0.5717275738716125, "learning_rate": 7.522585585055255e-05, "loss": 1.8944, "step": 6152 }, { "epoch": 0.3429574717128365, "grad_norm": 0.5469644665718079, "learning_rate": 7.521816751790783e-05, "loss": 1.622, "step": 6153 }, { "epoch": 0.34301320996599965, "grad_norm": 0.5735164880752563, "learning_rate": 7.521047838548988e-05, "loss": 1.8005, "step": 6154 }, { "epoch": 0.3430689482191628, "grad_norm": 0.5070759057998657, "learning_rate": 7.520278845354254e-05, "loss": 1.4795, "step": 6155 }, { "epoch": 0.34312468647232597, "grad_norm": 0.5179046392440796, "learning_rate": 7.519509772230968e-05, "loss": 1.5029, "step": 6156 }, { "epoch": 0.3431804247254891, "grad_norm": 0.5747403502464294, "learning_rate": 7.518740619203523e-05, "loss": 1.7075, "step": 6157 }, { "epoch": 0.3432361629786522, "grad_norm": 0.6233847141265869, "learning_rate": 7.517971386296309e-05, "loss": 1.9524, "step": 6158 }, { "epoch": 0.3432919012318154, "grad_norm": 0.5195590853691101, "learning_rate": 7.517202073533727e-05, "loss": 1.533, "step": 6159 }, { "epoch": 0.34334763948497854, "grad_norm": 0.6035041213035583, "learning_rate": 7.516432680940168e-05, "loss": 1.7298, "step": 6160 }, { "epoch": 0.34340337773814167, "grad_norm": 0.59979248046875, "learning_rate": 7.515663208540037e-05, "loss": 1.7295, "step": 6161 }, { "epoch": 0.34345911599130485, "grad_norm": 0.5844981074333191, "learning_rate": 7.514893656357738e-05, "loss": 1.756, "step": 6162 }, { "epoch": 0.343514854244468, "grad_norm": 0.5281308889389038, "learning_rate": 7.514124024417674e-05, "loss": 1.7149, "step": 6163 }, { "epoch": 0.3435705924976311, "grad_norm": 0.5352674126625061, "learning_rate": 7.513354312744256e-05, "loss": 1.7262, "step": 6164 }, { "epoch": 0.3436263307507943, "grad_norm": 0.562127411365509, "learning_rate": 7.512584521361891e-05, "loss": 1.6434, "step": 6165 }, { "epoch": 0.3436820690039574, "grad_norm": 0.5535931587219238, "learning_rate": 7.511814650294994e-05, "loss": 1.5353, "step": 6166 }, { "epoch": 0.34373780725712055, "grad_norm": 0.543641209602356, "learning_rate": 7.511044699567981e-05, "loss": 1.8312, "step": 6167 }, { "epoch": 0.3437935455102837, "grad_norm": 0.559559166431427, "learning_rate": 7.510274669205273e-05, "loss": 1.6326, "step": 6168 }, { "epoch": 0.34384928376344687, "grad_norm": 0.5449449419975281, "learning_rate": 7.509504559231287e-05, "loss": 1.7319, "step": 6169 }, { "epoch": 0.34390502201661, "grad_norm": 0.5315961837768555, "learning_rate": 7.508734369670447e-05, "loss": 1.69, "step": 6170 }, { "epoch": 0.3439607602697731, "grad_norm": 0.5506524443626404, "learning_rate": 7.507964100547181e-05, "loss": 1.6961, "step": 6171 }, { "epoch": 0.3440164985229363, "grad_norm": 0.5587935447692871, "learning_rate": 7.507193751885915e-05, "loss": 1.794, "step": 6172 }, { "epoch": 0.34407223677609944, "grad_norm": 0.5281456112861633, "learning_rate": 7.506423323711083e-05, "loss": 1.637, "step": 6173 }, { "epoch": 0.34412797502926257, "grad_norm": 0.5220721960067749, "learning_rate": 7.505652816047115e-05, "loss": 1.4696, "step": 6174 }, { "epoch": 0.34418371328242575, "grad_norm": 0.565938413143158, "learning_rate": 7.504882228918449e-05, "loss": 1.6329, "step": 6175 }, { "epoch": 0.3442394515355889, "grad_norm": 0.532490074634552, "learning_rate": 7.504111562349524e-05, "loss": 1.5929, "step": 6176 }, { "epoch": 0.344295189788752, "grad_norm": 0.5559155941009521, "learning_rate": 7.503340816364779e-05, "loss": 1.6935, "step": 6177 }, { "epoch": 0.3443509280419152, "grad_norm": 0.5494531989097595, "learning_rate": 7.502569990988659e-05, "loss": 1.5508, "step": 6178 }, { "epoch": 0.3444066662950783, "grad_norm": 0.48615095019340515, "learning_rate": 7.50179908624561e-05, "loss": 1.3464, "step": 6179 }, { "epoch": 0.34446240454824145, "grad_norm": 0.543402373790741, "learning_rate": 7.501028102160082e-05, "loss": 1.6306, "step": 6180 }, { "epoch": 0.3445181428014046, "grad_norm": 0.5688214898109436, "learning_rate": 7.500257038756522e-05, "loss": 1.9743, "step": 6181 }, { "epoch": 0.34457388105456777, "grad_norm": 0.5336653590202332, "learning_rate": 7.499485896059389e-05, "loss": 1.7876, "step": 6182 }, { "epoch": 0.3446296193077309, "grad_norm": 0.6009781360626221, "learning_rate": 7.498714674093134e-05, "loss": 1.599, "step": 6183 }, { "epoch": 0.344685357560894, "grad_norm": 0.5108974575996399, "learning_rate": 7.497943372882219e-05, "loss": 1.3671, "step": 6184 }, { "epoch": 0.3447410958140572, "grad_norm": 0.5875006914138794, "learning_rate": 7.497171992451104e-05, "loss": 1.8846, "step": 6185 }, { "epoch": 0.34479683406722034, "grad_norm": 0.5741475820541382, "learning_rate": 7.496400532824252e-05, "loss": 1.8147, "step": 6186 }, { "epoch": 0.34485257232038347, "grad_norm": 0.5426183938980103, "learning_rate": 7.495628994026131e-05, "loss": 1.8584, "step": 6187 }, { "epoch": 0.34490831057354665, "grad_norm": 0.5665351152420044, "learning_rate": 7.49485737608121e-05, "loss": 1.6254, "step": 6188 }, { "epoch": 0.3449640488267098, "grad_norm": 0.6417822241783142, "learning_rate": 7.494085679013959e-05, "loss": 1.5997, "step": 6189 }, { "epoch": 0.3450197870798729, "grad_norm": 0.580936849117279, "learning_rate": 7.49331390284885e-05, "loss": 1.7723, "step": 6190 }, { "epoch": 0.34507552533303604, "grad_norm": 0.5405949354171753, "learning_rate": 7.492542047610362e-05, "loss": 1.7536, "step": 6191 }, { "epoch": 0.3451312635861992, "grad_norm": 0.567459225654602, "learning_rate": 7.491770113322972e-05, "loss": 1.5518, "step": 6192 }, { "epoch": 0.34518700183936235, "grad_norm": 0.5930157899856567, "learning_rate": 7.490998100011164e-05, "loss": 1.8805, "step": 6193 }, { "epoch": 0.3452427400925255, "grad_norm": 0.5590851902961731, "learning_rate": 7.490226007699418e-05, "loss": 1.7369, "step": 6194 }, { "epoch": 0.34529847834568866, "grad_norm": 0.5540249943733215, "learning_rate": 7.489453836412224e-05, "loss": 1.7199, "step": 6195 }, { "epoch": 0.3453542165988518, "grad_norm": 0.6100202798843384, "learning_rate": 7.488681586174066e-05, "loss": 1.8962, "step": 6196 }, { "epoch": 0.3454099548520149, "grad_norm": 0.5453261137008667, "learning_rate": 7.48790925700944e-05, "loss": 1.6779, "step": 6197 }, { "epoch": 0.3454656931051781, "grad_norm": 0.6191526651382446, "learning_rate": 7.487136848942838e-05, "loss": 1.837, "step": 6198 }, { "epoch": 0.34552143135834124, "grad_norm": 0.5043689608573914, "learning_rate": 7.486364361998754e-05, "loss": 1.5438, "step": 6199 }, { "epoch": 0.34557716961150436, "grad_norm": 0.5927308797836304, "learning_rate": 7.485591796201692e-05, "loss": 1.8893, "step": 6200 }, { "epoch": 0.34563290786466755, "grad_norm": 0.5387723445892334, "learning_rate": 7.484819151576147e-05, "loss": 1.7063, "step": 6201 }, { "epoch": 0.3456886461178307, "grad_norm": 0.5273063778877258, "learning_rate": 7.48404642814663e-05, "loss": 1.6052, "step": 6202 }, { "epoch": 0.3457443843709938, "grad_norm": 0.5235535502433777, "learning_rate": 7.48327362593764e-05, "loss": 1.5859, "step": 6203 }, { "epoch": 0.34580012262415694, "grad_norm": 0.5952630043029785, "learning_rate": 7.48250074497369e-05, "loss": 1.9669, "step": 6204 }, { "epoch": 0.3458558608773201, "grad_norm": 0.5512803196907043, "learning_rate": 7.48172778527929e-05, "loss": 1.6103, "step": 6205 }, { "epoch": 0.34591159913048325, "grad_norm": 0.5485497117042542, "learning_rate": 7.480954746878955e-05, "loss": 1.4648, "step": 6206 }, { "epoch": 0.3459673373836464, "grad_norm": 0.5755242109298706, "learning_rate": 7.480181629797201e-05, "loss": 1.7882, "step": 6207 }, { "epoch": 0.34602307563680956, "grad_norm": 0.586279034614563, "learning_rate": 7.479408434058545e-05, "loss": 1.757, "step": 6208 }, { "epoch": 0.3460788138899727, "grad_norm": 0.6023716926574707, "learning_rate": 7.47863515968751e-05, "loss": 1.6573, "step": 6209 }, { "epoch": 0.3461345521431358, "grad_norm": 0.5629722476005554, "learning_rate": 7.477861806708618e-05, "loss": 1.8348, "step": 6210 }, { "epoch": 0.346190290396299, "grad_norm": 0.64363032579422, "learning_rate": 7.477088375146397e-05, "loss": 2.1581, "step": 6211 }, { "epoch": 0.34624602864946213, "grad_norm": 0.5952073335647583, "learning_rate": 7.476314865025376e-05, "loss": 1.7823, "step": 6212 }, { "epoch": 0.34630176690262526, "grad_norm": 0.5444992780685425, "learning_rate": 7.475541276370083e-05, "loss": 1.5717, "step": 6213 }, { "epoch": 0.3463575051557884, "grad_norm": 0.5698938965797424, "learning_rate": 7.474767609205057e-05, "loss": 1.8471, "step": 6214 }, { "epoch": 0.3464132434089516, "grad_norm": 0.521270751953125, "learning_rate": 7.473993863554832e-05, "loss": 1.5991, "step": 6215 }, { "epoch": 0.3464689816621147, "grad_norm": 0.5909140110015869, "learning_rate": 7.473220039443942e-05, "loss": 1.8795, "step": 6216 }, { "epoch": 0.34652471991527783, "grad_norm": 0.5595431923866272, "learning_rate": 7.472446136896935e-05, "loss": 1.5189, "step": 6217 }, { "epoch": 0.346580458168441, "grad_norm": 0.5549118518829346, "learning_rate": 7.471672155938351e-05, "loss": 1.5113, "step": 6218 }, { "epoch": 0.34663619642160415, "grad_norm": 0.5784697532653809, "learning_rate": 7.470898096592738e-05, "loss": 1.62, "step": 6219 }, { "epoch": 0.3466919346747673, "grad_norm": 0.582065224647522, "learning_rate": 7.470123958884643e-05, "loss": 1.7652, "step": 6220 }, { "epoch": 0.34674767292793046, "grad_norm": 0.5781643986701965, "learning_rate": 7.469349742838619e-05, "loss": 1.816, "step": 6221 }, { "epoch": 0.3468034111810936, "grad_norm": 0.5270411968231201, "learning_rate": 7.468575448479217e-05, "loss": 1.4521, "step": 6222 }, { "epoch": 0.3468591494342567, "grad_norm": 0.5568832159042358, "learning_rate": 7.467801075830995e-05, "loss": 1.6393, "step": 6223 }, { "epoch": 0.3469148876874199, "grad_norm": 0.6102818846702576, "learning_rate": 7.467026624918511e-05, "loss": 1.8486, "step": 6224 }, { "epoch": 0.34697062594058303, "grad_norm": 0.6040059328079224, "learning_rate": 7.466252095766326e-05, "loss": 1.9639, "step": 6225 }, { "epoch": 0.34702636419374616, "grad_norm": 0.5577713847160339, "learning_rate": 7.465477488399004e-05, "loss": 1.7672, "step": 6226 }, { "epoch": 0.3470821024469093, "grad_norm": 0.6022251844406128, "learning_rate": 7.464702802841111e-05, "loss": 1.8587, "step": 6227 }, { "epoch": 0.3471378407000725, "grad_norm": 0.6043629050254822, "learning_rate": 7.463928039117216e-05, "loss": 1.6798, "step": 6228 }, { "epoch": 0.3471935789532356, "grad_norm": 0.5550456643104553, "learning_rate": 7.463153197251889e-05, "loss": 1.6258, "step": 6229 }, { "epoch": 0.34724931720639873, "grad_norm": 0.5740575790405273, "learning_rate": 7.462378277269704e-05, "loss": 1.6253, "step": 6230 }, { "epoch": 0.3473050554595619, "grad_norm": 0.5348698496818542, "learning_rate": 7.461603279195235e-05, "loss": 1.7417, "step": 6231 }, { "epoch": 0.34736079371272505, "grad_norm": 0.5703982710838318, "learning_rate": 7.460828203053063e-05, "loss": 1.8448, "step": 6232 }, { "epoch": 0.3474165319658882, "grad_norm": 0.5818899869918823, "learning_rate": 7.460053048867768e-05, "loss": 1.783, "step": 6233 }, { "epoch": 0.34747227021905136, "grad_norm": 0.5640279054641724, "learning_rate": 7.459277816663934e-05, "loss": 1.8757, "step": 6234 }, { "epoch": 0.3475280084722145, "grad_norm": 0.519883394241333, "learning_rate": 7.458502506466147e-05, "loss": 1.622, "step": 6235 }, { "epoch": 0.3475837467253776, "grad_norm": 0.5207779407501221, "learning_rate": 7.457727118298991e-05, "loss": 1.4801, "step": 6236 }, { "epoch": 0.34763948497854075, "grad_norm": 0.5227778553962708, "learning_rate": 7.456951652187063e-05, "loss": 1.6797, "step": 6237 }, { "epoch": 0.34769522323170393, "grad_norm": 0.6305186748504639, "learning_rate": 7.456176108154956e-05, "loss": 2.0804, "step": 6238 }, { "epoch": 0.34775096148486706, "grad_norm": 0.6344568133354187, "learning_rate": 7.45540048622726e-05, "loss": 1.881, "step": 6239 }, { "epoch": 0.3478066997380302, "grad_norm": 0.5849176645278931, "learning_rate": 7.454624786428576e-05, "loss": 1.7058, "step": 6240 }, { "epoch": 0.3478624379911934, "grad_norm": 0.5511870980262756, "learning_rate": 7.453849008783507e-05, "loss": 1.7262, "step": 6241 }, { "epoch": 0.3479181762443565, "grad_norm": 0.590895414352417, "learning_rate": 7.453073153316654e-05, "loss": 1.7584, "step": 6242 }, { "epoch": 0.34797391449751963, "grad_norm": 0.5347367525100708, "learning_rate": 7.452297220052624e-05, "loss": 1.7057, "step": 6243 }, { "epoch": 0.3480296527506828, "grad_norm": 0.5574136972427368, "learning_rate": 7.451521209016021e-05, "loss": 1.8928, "step": 6244 }, { "epoch": 0.34808539100384595, "grad_norm": 0.5794700384140015, "learning_rate": 7.450745120231462e-05, "loss": 1.9479, "step": 6245 }, { "epoch": 0.3481411292570091, "grad_norm": 0.5384243726730347, "learning_rate": 7.449968953723554e-05, "loss": 1.678, "step": 6246 }, { "epoch": 0.34819686751017226, "grad_norm": 0.560627281665802, "learning_rate": 7.449192709516916e-05, "loss": 1.7936, "step": 6247 }, { "epoch": 0.3482526057633354, "grad_norm": 0.6408939957618713, "learning_rate": 7.448416387636166e-05, "loss": 1.8022, "step": 6248 }, { "epoch": 0.3483083440164985, "grad_norm": 0.5532012581825256, "learning_rate": 7.447639988105922e-05, "loss": 1.6318, "step": 6249 }, { "epoch": 0.34836408226966165, "grad_norm": 0.6528187990188599, "learning_rate": 7.44686351095081e-05, "loss": 2.0857, "step": 6250 }, { "epoch": 0.34841982052282483, "grad_norm": 0.5271794199943542, "learning_rate": 7.446086956195452e-05, "loss": 1.6236, "step": 6251 }, { "epoch": 0.34847555877598796, "grad_norm": 0.6053271293640137, "learning_rate": 7.445310323864478e-05, "loss": 1.895, "step": 6252 }, { "epoch": 0.3485312970291511, "grad_norm": 0.5544027090072632, "learning_rate": 7.444533613982519e-05, "loss": 1.6158, "step": 6253 }, { "epoch": 0.3485870352823143, "grad_norm": 0.5839915871620178, "learning_rate": 7.443756826574204e-05, "loss": 1.7887, "step": 6254 }, { "epoch": 0.3486427735354774, "grad_norm": 0.5946133732795715, "learning_rate": 7.442979961664171e-05, "loss": 1.7628, "step": 6255 }, { "epoch": 0.34869851178864053, "grad_norm": 0.5356269478797913, "learning_rate": 7.442203019277059e-05, "loss": 1.6563, "step": 6256 }, { "epoch": 0.3487542500418037, "grad_norm": 0.5791853666305542, "learning_rate": 7.441425999437505e-05, "loss": 1.7944, "step": 6257 }, { "epoch": 0.34880998829496684, "grad_norm": 0.514127254486084, "learning_rate": 7.440648902170153e-05, "loss": 1.6007, "step": 6258 }, { "epoch": 0.34886572654813, "grad_norm": 0.5857915878295898, "learning_rate": 7.439871727499648e-05, "loss": 1.6401, "step": 6259 }, { "epoch": 0.3489214648012931, "grad_norm": 0.5310158729553223, "learning_rate": 7.439094475450638e-05, "loss": 1.6605, "step": 6260 }, { "epoch": 0.3489772030544563, "grad_norm": 0.5631361603736877, "learning_rate": 7.43831714604777e-05, "loss": 1.7541, "step": 6261 }, { "epoch": 0.3490329413076194, "grad_norm": 0.5697758197784424, "learning_rate": 7.4375397393157e-05, "loss": 1.5488, "step": 6262 }, { "epoch": 0.34908867956078254, "grad_norm": 0.5197820663452148, "learning_rate": 7.43676225527908e-05, "loss": 1.7463, "step": 6263 }, { "epoch": 0.34914441781394573, "grad_norm": 0.6369295120239258, "learning_rate": 7.43598469396257e-05, "loss": 2.106, "step": 6264 }, { "epoch": 0.34920015606710886, "grad_norm": 0.5751513242721558, "learning_rate": 7.435207055390828e-05, "loss": 1.8146, "step": 6265 }, { "epoch": 0.349255894320272, "grad_norm": 0.5785645246505737, "learning_rate": 7.434429339588516e-05, "loss": 1.8598, "step": 6266 }, { "epoch": 0.34931163257343517, "grad_norm": 0.5536054968833923, "learning_rate": 7.4336515465803e-05, "loss": 1.7508, "step": 6267 }, { "epoch": 0.3493673708265983, "grad_norm": 0.5529542565345764, "learning_rate": 7.432873676390845e-05, "loss": 1.7749, "step": 6268 }, { "epoch": 0.34942310907976143, "grad_norm": 0.5571187734603882, "learning_rate": 7.432095729044823e-05, "loss": 1.6954, "step": 6269 }, { "epoch": 0.3494788473329246, "grad_norm": 0.5445393323898315, "learning_rate": 7.431317704566902e-05, "loss": 1.5363, "step": 6270 }, { "epoch": 0.34953458558608774, "grad_norm": 0.5723183155059814, "learning_rate": 7.430539602981761e-05, "loss": 1.7007, "step": 6271 }, { "epoch": 0.34959032383925087, "grad_norm": 0.5553802847862244, "learning_rate": 7.429761424314075e-05, "loss": 1.9324, "step": 6272 }, { "epoch": 0.349646062092414, "grad_norm": 0.5308825969696045, "learning_rate": 7.428983168588522e-05, "loss": 1.6236, "step": 6273 }, { "epoch": 0.3497018003455772, "grad_norm": 0.5892744064331055, "learning_rate": 7.428204835829787e-05, "loss": 1.8567, "step": 6274 }, { "epoch": 0.3497575385987403, "grad_norm": 0.5890315175056458, "learning_rate": 7.42742642606255e-05, "loss": 1.7612, "step": 6275 }, { "epoch": 0.34981327685190344, "grad_norm": 0.5714004635810852, "learning_rate": 7.426647939311499e-05, "loss": 1.8783, "step": 6276 }, { "epoch": 0.3498690151050666, "grad_norm": 0.5221744775772095, "learning_rate": 7.425869375601324e-05, "loss": 1.533, "step": 6277 }, { "epoch": 0.34992475335822976, "grad_norm": 0.5754460692405701, "learning_rate": 7.425090734956717e-05, "loss": 1.7922, "step": 6278 }, { "epoch": 0.3499804916113929, "grad_norm": 0.5325612425804138, "learning_rate": 7.424312017402371e-05, "loss": 1.5523, "step": 6279 }, { "epoch": 0.35003622986455607, "grad_norm": 0.5452947020530701, "learning_rate": 7.423533222962984e-05, "loss": 1.7528, "step": 6280 }, { "epoch": 0.3500919681177192, "grad_norm": 0.5132524371147156, "learning_rate": 7.422754351663252e-05, "loss": 1.6118, "step": 6281 }, { "epoch": 0.35014770637088233, "grad_norm": 0.5661509037017822, "learning_rate": 7.421975403527877e-05, "loss": 1.7999, "step": 6282 }, { "epoch": 0.35020344462404546, "grad_norm": 0.5532317161560059, "learning_rate": 7.421196378581563e-05, "loss": 1.8317, "step": 6283 }, { "epoch": 0.35025918287720864, "grad_norm": 0.5239238142967224, "learning_rate": 7.420417276849018e-05, "loss": 1.6949, "step": 6284 }, { "epoch": 0.35031492113037177, "grad_norm": 0.5444215536117554, "learning_rate": 7.419638098354948e-05, "loss": 1.666, "step": 6285 }, { "epoch": 0.3503706593835349, "grad_norm": 0.5257874131202698, "learning_rate": 7.418858843124065e-05, "loss": 1.7663, "step": 6286 }, { "epoch": 0.3504263976366981, "grad_norm": 0.5424786806106567, "learning_rate": 7.418079511181084e-05, "loss": 1.6048, "step": 6287 }, { "epoch": 0.3504821358898612, "grad_norm": 0.5822529196739197, "learning_rate": 7.417300102550718e-05, "loss": 1.7153, "step": 6288 }, { "epoch": 0.35053787414302434, "grad_norm": 0.6322096586227417, "learning_rate": 7.416520617257686e-05, "loss": 2.0466, "step": 6289 }, { "epoch": 0.3505936123961875, "grad_norm": 0.6034446358680725, "learning_rate": 7.41574105532671e-05, "loss": 1.7793, "step": 6290 }, { "epoch": 0.35064935064935066, "grad_norm": 0.5261698365211487, "learning_rate": 7.414961416782512e-05, "loss": 1.6958, "step": 6291 }, { "epoch": 0.3507050889025138, "grad_norm": 0.5508055090904236, "learning_rate": 7.414181701649818e-05, "loss": 1.7336, "step": 6292 }, { "epoch": 0.35076082715567697, "grad_norm": 0.5106075406074524, "learning_rate": 7.413401909953356e-05, "loss": 1.5585, "step": 6293 }, { "epoch": 0.3508165654088401, "grad_norm": 0.5312706232070923, "learning_rate": 7.412622041717858e-05, "loss": 1.5692, "step": 6294 }, { "epoch": 0.3508723036620032, "grad_norm": 0.5598204135894775, "learning_rate": 7.411842096968055e-05, "loss": 1.6424, "step": 6295 }, { "epoch": 0.35092804191516636, "grad_norm": 0.5455132126808167, "learning_rate": 7.411062075728681e-05, "loss": 1.7084, "step": 6296 }, { "epoch": 0.35098378016832954, "grad_norm": 0.5335630774497986, "learning_rate": 7.410281978024478e-05, "loss": 1.6269, "step": 6297 }, { "epoch": 0.35103951842149267, "grad_norm": 0.5936735272407532, "learning_rate": 7.409501803880182e-05, "loss": 1.6821, "step": 6298 }, { "epoch": 0.3510952566746558, "grad_norm": 0.626340389251709, "learning_rate": 7.408721553320536e-05, "loss": 1.8958, "step": 6299 }, { "epoch": 0.351150994927819, "grad_norm": 0.5382502675056458, "learning_rate": 7.407941226370289e-05, "loss": 1.6456, "step": 6300 }, { "epoch": 0.3512067331809821, "grad_norm": 0.5597545504570007, "learning_rate": 7.407160823054182e-05, "loss": 1.7168, "step": 6301 }, { "epoch": 0.35126247143414524, "grad_norm": 0.5945395231246948, "learning_rate": 7.406380343396973e-05, "loss": 2.0034, "step": 6302 }, { "epoch": 0.3513182096873084, "grad_norm": 0.5297150611877441, "learning_rate": 7.405599787423406e-05, "loss": 1.5787, "step": 6303 }, { "epoch": 0.35137394794047155, "grad_norm": 0.5702363848686218, "learning_rate": 7.40481915515824e-05, "loss": 1.8993, "step": 6304 }, { "epoch": 0.3514296861936347, "grad_norm": 0.6293717622756958, "learning_rate": 7.404038446626231e-05, "loss": 1.9086, "step": 6305 }, { "epoch": 0.3514854244467978, "grad_norm": 0.579983651638031, "learning_rate": 7.403257661852142e-05, "loss": 1.74, "step": 6306 }, { "epoch": 0.351541162699961, "grad_norm": 0.558723509311676, "learning_rate": 7.40247680086073e-05, "loss": 1.7519, "step": 6307 }, { "epoch": 0.3515969009531241, "grad_norm": 0.5575239062309265, "learning_rate": 7.401695863676761e-05, "loss": 1.8393, "step": 6308 }, { "epoch": 0.35165263920628725, "grad_norm": 0.5667286515235901, "learning_rate": 7.400914850325001e-05, "loss": 1.7958, "step": 6309 }, { "epoch": 0.35170837745945044, "grad_norm": 0.5829740762710571, "learning_rate": 7.400133760830221e-05, "loss": 1.7113, "step": 6310 }, { "epoch": 0.35176411571261357, "grad_norm": 0.5255504846572876, "learning_rate": 7.399352595217193e-05, "loss": 1.6819, "step": 6311 }, { "epoch": 0.3518198539657767, "grad_norm": 0.5315664410591125, "learning_rate": 7.39857135351069e-05, "loss": 1.5692, "step": 6312 }, { "epoch": 0.3518755922189399, "grad_norm": 0.5694820880889893, "learning_rate": 7.397790035735487e-05, "loss": 1.813, "step": 6313 }, { "epoch": 0.351931330472103, "grad_norm": 0.5584225058555603, "learning_rate": 7.397008641916364e-05, "loss": 1.6653, "step": 6314 }, { "epoch": 0.35198706872526614, "grad_norm": 0.5575059652328491, "learning_rate": 7.396227172078103e-05, "loss": 1.7948, "step": 6315 }, { "epoch": 0.3520428069784293, "grad_norm": 0.5385696887969971, "learning_rate": 7.395445626245486e-05, "loss": 1.6823, "step": 6316 }, { "epoch": 0.35209854523159245, "grad_norm": 0.5181571841239929, "learning_rate": 7.394664004443302e-05, "loss": 1.4832, "step": 6317 }, { "epoch": 0.3521542834847556, "grad_norm": 0.5436875224113464, "learning_rate": 7.393882306696338e-05, "loss": 1.5743, "step": 6318 }, { "epoch": 0.3522100217379187, "grad_norm": 0.5831631422042847, "learning_rate": 7.393100533029383e-05, "loss": 1.7726, "step": 6319 }, { "epoch": 0.3522657599910819, "grad_norm": 0.5740854144096375, "learning_rate": 7.392318683467232e-05, "loss": 1.5639, "step": 6320 }, { "epoch": 0.352321498244245, "grad_norm": 0.5731649994850159, "learning_rate": 7.391536758034682e-05, "loss": 1.9563, "step": 6321 }, { "epoch": 0.35237723649740815, "grad_norm": 0.6104768514633179, "learning_rate": 7.390754756756526e-05, "loss": 1.6392, "step": 6322 }, { "epoch": 0.35243297475057134, "grad_norm": 0.5218120813369751, "learning_rate": 7.389972679657571e-05, "loss": 1.6262, "step": 6323 }, { "epoch": 0.35248871300373447, "grad_norm": 0.5537388324737549, "learning_rate": 7.389190526762618e-05, "loss": 1.7317, "step": 6324 }, { "epoch": 0.3525444512568976, "grad_norm": 0.577392578125, "learning_rate": 7.38840829809647e-05, "loss": 1.7069, "step": 6325 }, { "epoch": 0.3526001895100608, "grad_norm": 0.5511906147003174, "learning_rate": 7.387625993683937e-05, "loss": 1.6009, "step": 6326 }, { "epoch": 0.3526559277632239, "grad_norm": 0.5822625756263733, "learning_rate": 7.386843613549827e-05, "loss": 1.7174, "step": 6327 }, { "epoch": 0.35271166601638704, "grad_norm": 0.5413920879364014, "learning_rate": 7.386061157718955e-05, "loss": 1.5927, "step": 6328 }, { "epoch": 0.35276740426955017, "grad_norm": 0.5867698192596436, "learning_rate": 7.385278626216133e-05, "loss": 1.7494, "step": 6329 }, { "epoch": 0.35282314252271335, "grad_norm": 0.6775004863739014, "learning_rate": 7.384496019066182e-05, "loss": 1.8777, "step": 6330 }, { "epoch": 0.3528788807758765, "grad_norm": 0.6009215116500854, "learning_rate": 7.383713336293919e-05, "loss": 1.7538, "step": 6331 }, { "epoch": 0.3529346190290396, "grad_norm": 0.5513560771942139, "learning_rate": 7.382930577924168e-05, "loss": 1.6307, "step": 6332 }, { "epoch": 0.3529903572822028, "grad_norm": 0.5479623079299927, "learning_rate": 7.382147743981751e-05, "loss": 1.6945, "step": 6333 }, { "epoch": 0.3530460955353659, "grad_norm": 0.603458046913147, "learning_rate": 7.381364834491499e-05, "loss": 1.7531, "step": 6334 }, { "epoch": 0.35310183378852905, "grad_norm": 0.951324999332428, "learning_rate": 7.380581849478236e-05, "loss": 1.8593, "step": 6335 }, { "epoch": 0.35315757204169224, "grad_norm": 0.5293959975242615, "learning_rate": 7.379798788966798e-05, "loss": 1.7638, "step": 6336 }, { "epoch": 0.35321331029485536, "grad_norm": 0.5229690670967102, "learning_rate": 7.379015652982016e-05, "loss": 1.7042, "step": 6337 }, { "epoch": 0.3532690485480185, "grad_norm": 0.5152291059494019, "learning_rate": 7.378232441548729e-05, "loss": 1.607, "step": 6338 }, { "epoch": 0.3533247868011817, "grad_norm": 0.5136567950248718, "learning_rate": 7.377449154691775e-05, "loss": 1.7222, "step": 6339 }, { "epoch": 0.3533805250543448, "grad_norm": 0.5531160235404968, "learning_rate": 7.376665792435996e-05, "loss": 1.6946, "step": 6340 }, { "epoch": 0.35343626330750794, "grad_norm": 0.554097592830658, "learning_rate": 7.375882354806235e-05, "loss": 1.6551, "step": 6341 }, { "epoch": 0.35349200156067107, "grad_norm": 0.5862346887588501, "learning_rate": 7.375098841827337e-05, "loss": 1.7594, "step": 6342 }, { "epoch": 0.35354773981383425, "grad_norm": 0.5202105641365051, "learning_rate": 7.374315253524152e-05, "loss": 1.6205, "step": 6343 }, { "epoch": 0.3536034780669974, "grad_norm": 0.5510536432266235, "learning_rate": 7.373531589921531e-05, "loss": 1.5776, "step": 6344 }, { "epoch": 0.3536592163201605, "grad_norm": 0.5484849214553833, "learning_rate": 7.372747851044326e-05, "loss": 1.5603, "step": 6345 }, { "epoch": 0.3537149545733237, "grad_norm": 0.55774986743927, "learning_rate": 7.371964036917394e-05, "loss": 1.7814, "step": 6346 }, { "epoch": 0.3537706928264868, "grad_norm": 0.5338320732116699, "learning_rate": 7.371180147565592e-05, "loss": 1.5941, "step": 6347 }, { "epoch": 0.35382643107964995, "grad_norm": 0.5263161659240723, "learning_rate": 7.370396183013779e-05, "loss": 1.2328, "step": 6348 }, { "epoch": 0.35388216933281313, "grad_norm": 0.533647894859314, "learning_rate": 7.369612143286822e-05, "loss": 1.7327, "step": 6349 }, { "epoch": 0.35393790758597626, "grad_norm": 0.5682227611541748, "learning_rate": 7.368828028409581e-05, "loss": 1.8406, "step": 6350 }, { "epoch": 0.3539936458391394, "grad_norm": 0.5832127332687378, "learning_rate": 7.368043838406927e-05, "loss": 1.7841, "step": 6351 }, { "epoch": 0.3540493840923025, "grad_norm": 0.5741327404975891, "learning_rate": 7.36725957330373e-05, "loss": 1.787, "step": 6352 }, { "epoch": 0.3541051223454657, "grad_norm": 0.5750821828842163, "learning_rate": 7.366475233124861e-05, "loss": 1.7946, "step": 6353 }, { "epoch": 0.35416086059862883, "grad_norm": 0.5595529079437256, "learning_rate": 7.365690817895195e-05, "loss": 1.6904, "step": 6354 }, { "epoch": 0.35421659885179196, "grad_norm": 0.5768024921417236, "learning_rate": 7.364906327639608e-05, "loss": 1.7634, "step": 6355 }, { "epoch": 0.35427233710495515, "grad_norm": 0.5867105722427368, "learning_rate": 7.364121762382983e-05, "loss": 1.7406, "step": 6356 }, { "epoch": 0.3543280753581183, "grad_norm": 0.5967558026313782, "learning_rate": 7.363337122150197e-05, "loss": 1.5078, "step": 6357 }, { "epoch": 0.3543838136112814, "grad_norm": 0.5712282061576843, "learning_rate": 7.36255240696614e-05, "loss": 1.767, "step": 6358 }, { "epoch": 0.3544395518644446, "grad_norm": 0.5473513603210449, "learning_rate": 7.361767616855692e-05, "loss": 1.6409, "step": 6359 }, { "epoch": 0.3544952901176077, "grad_norm": 0.5412675738334656, "learning_rate": 7.360982751843747e-05, "loss": 1.6319, "step": 6360 }, { "epoch": 0.35455102837077085, "grad_norm": 0.5327848792076111, "learning_rate": 7.360197811955194e-05, "loss": 1.511, "step": 6361 }, { "epoch": 0.35460676662393403, "grad_norm": 0.5604977607727051, "learning_rate": 7.359412797214929e-05, "loss": 1.7604, "step": 6362 }, { "epoch": 0.35466250487709716, "grad_norm": 0.5807721018791199, "learning_rate": 7.358627707647844e-05, "loss": 1.5816, "step": 6363 }, { "epoch": 0.3547182431302603, "grad_norm": 0.5296190977096558, "learning_rate": 7.357842543278841e-05, "loss": 1.2601, "step": 6364 }, { "epoch": 0.3547739813834234, "grad_norm": 0.5498451590538025, "learning_rate": 7.357057304132819e-05, "loss": 1.8474, "step": 6365 }, { "epoch": 0.3548297196365866, "grad_norm": 0.5772817134857178, "learning_rate": 7.356271990234683e-05, "loss": 1.7508, "step": 6366 }, { "epoch": 0.35488545788974973, "grad_norm": 0.520463764667511, "learning_rate": 7.355486601609339e-05, "loss": 1.5589, "step": 6367 }, { "epoch": 0.35494119614291286, "grad_norm": 0.5433523058891296, "learning_rate": 7.354701138281688e-05, "loss": 1.7982, "step": 6368 }, { "epoch": 0.35499693439607605, "grad_norm": 0.587772011756897, "learning_rate": 7.35391560027665e-05, "loss": 1.7944, "step": 6369 }, { "epoch": 0.3550526726492392, "grad_norm": 0.562419056892395, "learning_rate": 7.353129987619133e-05, "loss": 1.8376, "step": 6370 }, { "epoch": 0.3551084109024023, "grad_norm": 0.524745523929596, "learning_rate": 7.352344300334053e-05, "loss": 1.575, "step": 6371 }, { "epoch": 0.3551641491555655, "grad_norm": 0.5049068927764893, "learning_rate": 7.351558538446326e-05, "loss": 1.3716, "step": 6372 }, { "epoch": 0.3552198874087286, "grad_norm": 0.6006641387939453, "learning_rate": 7.350772701980872e-05, "loss": 1.9018, "step": 6373 }, { "epoch": 0.35527562566189175, "grad_norm": 0.5516168475151062, "learning_rate": 7.349986790962613e-05, "loss": 1.6401, "step": 6374 }, { "epoch": 0.3553313639150549, "grad_norm": 0.5250164270401001, "learning_rate": 7.349200805416478e-05, "loss": 1.5694, "step": 6375 }, { "epoch": 0.35538710216821806, "grad_norm": 0.5079348087310791, "learning_rate": 7.348414745367387e-05, "loss": 1.6291, "step": 6376 }, { "epoch": 0.3554428404213812, "grad_norm": 0.5634783506393433, "learning_rate": 7.347628610840274e-05, "loss": 1.6777, "step": 6377 }, { "epoch": 0.3554985786745443, "grad_norm": 0.5921057462692261, "learning_rate": 7.346842401860069e-05, "loss": 1.922, "step": 6378 }, { "epoch": 0.3555543169277075, "grad_norm": 0.5826466679573059, "learning_rate": 7.346056118451705e-05, "loss": 1.7305, "step": 6379 }, { "epoch": 0.35561005518087063, "grad_norm": 0.5478690266609192, "learning_rate": 7.345269760640121e-05, "loss": 1.7387, "step": 6380 }, { "epoch": 0.35566579343403376, "grad_norm": 0.5795879364013672, "learning_rate": 7.344483328450253e-05, "loss": 1.6662, "step": 6381 }, { "epoch": 0.35572153168719695, "grad_norm": 0.5886217355728149, "learning_rate": 7.343696821907042e-05, "loss": 1.8065, "step": 6382 }, { "epoch": 0.3557772699403601, "grad_norm": 0.6385563611984253, "learning_rate": 7.342910241035434e-05, "loss": 1.7933, "step": 6383 }, { "epoch": 0.3558330081935232, "grad_norm": 0.5828480124473572, "learning_rate": 7.342123585860374e-05, "loss": 1.6203, "step": 6384 }, { "epoch": 0.3558887464466864, "grad_norm": 0.5478693842887878, "learning_rate": 7.341336856406808e-05, "loss": 1.6706, "step": 6385 }, { "epoch": 0.3559444846998495, "grad_norm": 0.5751214027404785, "learning_rate": 7.340550052699689e-05, "loss": 1.8427, "step": 6386 }, { "epoch": 0.35600022295301265, "grad_norm": 0.5512586236000061, "learning_rate": 7.339763174763968e-05, "loss": 1.7332, "step": 6387 }, { "epoch": 0.3560559612061758, "grad_norm": 0.5546371340751648, "learning_rate": 7.3389762226246e-05, "loss": 1.5966, "step": 6388 }, { "epoch": 0.35611169945933896, "grad_norm": 0.5267236232757568, "learning_rate": 7.338189196306544e-05, "loss": 1.8137, "step": 6389 }, { "epoch": 0.3561674377125021, "grad_norm": 0.5219095945358276, "learning_rate": 7.33740209583476e-05, "loss": 1.6799, "step": 6390 }, { "epoch": 0.3562231759656652, "grad_norm": 0.5330881476402283, "learning_rate": 7.33661492123421e-05, "loss": 1.6959, "step": 6391 }, { "epoch": 0.3562789142188284, "grad_norm": 0.5660157203674316, "learning_rate": 7.335827672529856e-05, "loss": 1.7565, "step": 6392 }, { "epoch": 0.35633465247199153, "grad_norm": 0.5627869963645935, "learning_rate": 7.335040349746669e-05, "loss": 1.7526, "step": 6393 }, { "epoch": 0.35639039072515466, "grad_norm": 0.588152289390564, "learning_rate": 7.334252952909615e-05, "loss": 1.64, "step": 6394 }, { "epoch": 0.35644612897831784, "grad_norm": 0.5885617733001709, "learning_rate": 7.333465482043667e-05, "loss": 1.7358, "step": 6395 }, { "epoch": 0.356501867231481, "grad_norm": 0.6158447265625, "learning_rate": 7.3326779371738e-05, "loss": 1.854, "step": 6396 }, { "epoch": 0.3565576054846441, "grad_norm": 0.5353176593780518, "learning_rate": 7.33189031832499e-05, "loss": 1.6502, "step": 6397 }, { "epoch": 0.35661334373780723, "grad_norm": 0.5986976027488708, "learning_rate": 7.331102625522212e-05, "loss": 1.6757, "step": 6398 }, { "epoch": 0.3566690819909704, "grad_norm": 0.5034981966018677, "learning_rate": 7.330314858790453e-05, "loss": 1.5362, "step": 6399 }, { "epoch": 0.35672482024413354, "grad_norm": 0.5768936276435852, "learning_rate": 7.32952701815469e-05, "loss": 1.7302, "step": 6400 }, { "epoch": 0.3567805584972967, "grad_norm": 0.5493230819702148, "learning_rate": 7.328739103639916e-05, "loss": 1.7755, "step": 6401 }, { "epoch": 0.35683629675045986, "grad_norm": 0.5121830105781555, "learning_rate": 7.327951115271113e-05, "loss": 1.5803, "step": 6402 }, { "epoch": 0.356892035003623, "grad_norm": 0.546416699886322, "learning_rate": 7.327163053073273e-05, "loss": 1.5991, "step": 6403 }, { "epoch": 0.3569477732567861, "grad_norm": 0.5108504891395569, "learning_rate": 7.32637491707139e-05, "loss": 1.6789, "step": 6404 }, { "epoch": 0.3570035115099493, "grad_norm": 0.5747851729393005, "learning_rate": 7.32558670729046e-05, "loss": 1.8266, "step": 6405 }, { "epoch": 0.35705924976311243, "grad_norm": 0.587032675743103, "learning_rate": 7.324798423755476e-05, "loss": 1.6093, "step": 6406 }, { "epoch": 0.35711498801627556, "grad_norm": 0.5485719442367554, "learning_rate": 7.324010066491442e-05, "loss": 1.6672, "step": 6407 }, { "epoch": 0.35717072626943874, "grad_norm": 0.5325014591217041, "learning_rate": 7.323221635523358e-05, "loss": 1.7776, "step": 6408 }, { "epoch": 0.35722646452260187, "grad_norm": 0.5524224638938904, "learning_rate": 7.32243313087623e-05, "loss": 1.9326, "step": 6409 }, { "epoch": 0.357282202775765, "grad_norm": 0.5688652396202087, "learning_rate": 7.321644552575062e-05, "loss": 1.8942, "step": 6410 }, { "epoch": 0.35733794102892813, "grad_norm": 0.5133098363876343, "learning_rate": 7.320855900644867e-05, "loss": 1.6339, "step": 6411 }, { "epoch": 0.3573936792820913, "grad_norm": 0.5422292947769165, "learning_rate": 7.320067175110653e-05, "loss": 1.681, "step": 6412 }, { "epoch": 0.35744941753525444, "grad_norm": 0.5691182613372803, "learning_rate": 7.319278375997436e-05, "loss": 1.847, "step": 6413 }, { "epoch": 0.3575051557884176, "grad_norm": 0.5584883689880371, "learning_rate": 7.31848950333023e-05, "loss": 1.7616, "step": 6414 }, { "epoch": 0.35756089404158076, "grad_norm": 0.5878840088844299, "learning_rate": 7.317700557134056e-05, "loss": 1.7561, "step": 6415 }, { "epoch": 0.3576166322947439, "grad_norm": 0.5363910794258118, "learning_rate": 7.316911537433933e-05, "loss": 1.6086, "step": 6416 }, { "epoch": 0.357672370547907, "grad_norm": 0.5783511996269226, "learning_rate": 7.316122444254884e-05, "loss": 1.7853, "step": 6417 }, { "epoch": 0.3577281088010702, "grad_norm": 0.5695887804031372, "learning_rate": 7.315333277621935e-05, "loss": 1.5816, "step": 6418 }, { "epoch": 0.35778384705423333, "grad_norm": 0.5631670355796814, "learning_rate": 7.314544037560114e-05, "loss": 1.5703, "step": 6419 }, { "epoch": 0.35783958530739646, "grad_norm": 0.5459564328193665, "learning_rate": 7.313754724094451e-05, "loss": 1.6222, "step": 6420 }, { "epoch": 0.3578953235605596, "grad_norm": 0.5215150117874146, "learning_rate": 7.312965337249979e-05, "loss": 1.7888, "step": 6421 }, { "epoch": 0.35795106181372277, "grad_norm": 0.5654617547988892, "learning_rate": 7.312175877051732e-05, "loss": 1.7508, "step": 6422 }, { "epoch": 0.3580068000668859, "grad_norm": 0.5510186553001404, "learning_rate": 7.311386343524747e-05, "loss": 1.8401, "step": 6423 }, { "epoch": 0.35806253832004903, "grad_norm": 0.521782398223877, "learning_rate": 7.310596736694062e-05, "loss": 1.5428, "step": 6424 }, { "epoch": 0.3581182765732122, "grad_norm": 0.5308924317359924, "learning_rate": 7.309807056584722e-05, "loss": 1.464, "step": 6425 }, { "epoch": 0.35817401482637534, "grad_norm": 0.5567795634269714, "learning_rate": 7.309017303221768e-05, "loss": 1.7063, "step": 6426 }, { "epoch": 0.35822975307953847, "grad_norm": 0.5558245778083801, "learning_rate": 7.308227476630249e-05, "loss": 1.6636, "step": 6427 }, { "epoch": 0.35828549133270166, "grad_norm": 0.5258497595787048, "learning_rate": 7.30743757683521e-05, "loss": 1.5777, "step": 6428 }, { "epoch": 0.3583412295858648, "grad_norm": 0.5101563930511475, "learning_rate": 7.306647603861706e-05, "loss": 1.5602, "step": 6429 }, { "epoch": 0.3583969678390279, "grad_norm": 0.5508061647415161, "learning_rate": 7.305857557734789e-05, "loss": 1.659, "step": 6430 }, { "epoch": 0.3584527060921911, "grad_norm": 0.6159545183181763, "learning_rate": 7.305067438479513e-05, "loss": 1.9413, "step": 6431 }, { "epoch": 0.3585084443453542, "grad_norm": 0.5804408192634583, "learning_rate": 7.30427724612094e-05, "loss": 1.7138, "step": 6432 }, { "epoch": 0.35856418259851736, "grad_norm": 0.5316668748855591, "learning_rate": 7.303486980684125e-05, "loss": 1.7588, "step": 6433 }, { "epoch": 0.3586199208516805, "grad_norm": 0.6093178391456604, "learning_rate": 7.302696642194134e-05, "loss": 1.8426, "step": 6434 }, { "epoch": 0.35867565910484367, "grad_norm": 0.5371636152267456, "learning_rate": 7.30190623067603e-05, "loss": 1.5852, "step": 6435 }, { "epoch": 0.3587313973580068, "grad_norm": 0.5050824284553528, "learning_rate": 7.301115746154884e-05, "loss": 1.5495, "step": 6436 }, { "epoch": 0.3587871356111699, "grad_norm": 0.5830590724945068, "learning_rate": 7.300325188655761e-05, "loss": 1.8611, "step": 6437 }, { "epoch": 0.3588428738643331, "grad_norm": 0.5415953397750854, "learning_rate": 7.299534558203735e-05, "loss": 1.6437, "step": 6438 }, { "epoch": 0.35889861211749624, "grad_norm": 0.5701804757118225, "learning_rate": 7.298743854823882e-05, "loss": 1.8723, "step": 6439 }, { "epoch": 0.35895435037065937, "grad_norm": 0.5361306667327881, "learning_rate": 7.297953078541274e-05, "loss": 1.518, "step": 6440 }, { "epoch": 0.35901008862382255, "grad_norm": 0.5895618796348572, "learning_rate": 7.297162229380994e-05, "loss": 1.8528, "step": 6441 }, { "epoch": 0.3590658268769857, "grad_norm": 0.5555623173713684, "learning_rate": 7.29637130736812e-05, "loss": 1.6619, "step": 6442 }, { "epoch": 0.3591215651301488, "grad_norm": 0.5527105331420898, "learning_rate": 7.295580312527739e-05, "loss": 1.8209, "step": 6443 }, { "epoch": 0.35917730338331194, "grad_norm": 0.5717308521270752, "learning_rate": 7.294789244884932e-05, "loss": 1.6109, "step": 6444 }, { "epoch": 0.3592330416364751, "grad_norm": 0.5484607815742493, "learning_rate": 7.293998104464792e-05, "loss": 1.7449, "step": 6445 }, { "epoch": 0.35928877988963825, "grad_norm": 0.5548183917999268, "learning_rate": 7.293206891292405e-05, "loss": 1.7952, "step": 6446 }, { "epoch": 0.3593445181428014, "grad_norm": 0.5666037201881409, "learning_rate": 7.292415605392867e-05, "loss": 1.8784, "step": 6447 }, { "epoch": 0.35940025639596457, "grad_norm": 0.5922662615776062, "learning_rate": 7.291624246791272e-05, "loss": 1.8764, "step": 6448 }, { "epoch": 0.3594559946491277, "grad_norm": 0.5456053018569946, "learning_rate": 7.290832815512716e-05, "loss": 1.7389, "step": 6449 }, { "epoch": 0.3595117329022908, "grad_norm": 0.5417848229408264, "learning_rate": 7.290041311582301e-05, "loss": 1.591, "step": 6450 }, { "epoch": 0.359567471155454, "grad_norm": 0.5787496566772461, "learning_rate": 7.289249735025127e-05, "loss": 1.765, "step": 6451 }, { "epoch": 0.35962320940861714, "grad_norm": 0.5513389110565186, "learning_rate": 7.288458085866298e-05, "loss": 1.6685, "step": 6452 }, { "epoch": 0.35967894766178027, "grad_norm": 0.5737441182136536, "learning_rate": 7.287666364130921e-05, "loss": 1.6956, "step": 6453 }, { "epoch": 0.35973468591494345, "grad_norm": 0.6044551134109497, "learning_rate": 7.286874569844106e-05, "loss": 1.7829, "step": 6454 }, { "epoch": 0.3597904241681066, "grad_norm": 0.5688374638557434, "learning_rate": 7.286082703030961e-05, "loss": 1.8747, "step": 6455 }, { "epoch": 0.3598461624212697, "grad_norm": 0.5276156067848206, "learning_rate": 7.285290763716604e-05, "loss": 1.5944, "step": 6456 }, { "epoch": 0.35990190067443284, "grad_norm": 0.5913518667221069, "learning_rate": 7.284498751926147e-05, "loss": 1.6307, "step": 6457 }, { "epoch": 0.359957638927596, "grad_norm": 0.5470561981201172, "learning_rate": 7.283706667684709e-05, "loss": 1.6096, "step": 6458 }, { "epoch": 0.36001337718075915, "grad_norm": 0.5165275931358337, "learning_rate": 7.28291451101741e-05, "loss": 1.6963, "step": 6459 }, { "epoch": 0.3600691154339223, "grad_norm": 0.552894651889801, "learning_rate": 7.282122281949374e-05, "loss": 1.7304, "step": 6460 }, { "epoch": 0.36012485368708547, "grad_norm": 0.573884129524231, "learning_rate": 7.281329980505724e-05, "loss": 1.8304, "step": 6461 }, { "epoch": 0.3601805919402486, "grad_norm": 0.5113431811332703, "learning_rate": 7.280537606711589e-05, "loss": 1.509, "step": 6462 }, { "epoch": 0.3602363301934117, "grad_norm": 0.54507976770401, "learning_rate": 7.279745160592097e-05, "loss": 1.765, "step": 6463 }, { "epoch": 0.3602920684465749, "grad_norm": 0.5524507761001587, "learning_rate": 7.278952642172381e-05, "loss": 1.6604, "step": 6464 }, { "epoch": 0.36034780669973804, "grad_norm": 0.5713779926300049, "learning_rate": 7.278160051477574e-05, "loss": 1.6273, "step": 6465 }, { "epoch": 0.36040354495290117, "grad_norm": 0.5713092684745789, "learning_rate": 7.277367388532812e-05, "loss": 1.7693, "step": 6466 }, { "epoch": 0.3604592832060643, "grad_norm": 0.5316145420074463, "learning_rate": 7.276574653363236e-05, "loss": 1.6402, "step": 6467 }, { "epoch": 0.3605150214592275, "grad_norm": 0.5453936457633972, "learning_rate": 7.275781845993983e-05, "loss": 1.9642, "step": 6468 }, { "epoch": 0.3605707597123906, "grad_norm": 0.5773400068283081, "learning_rate": 7.274988966450201e-05, "loss": 1.8417, "step": 6469 }, { "epoch": 0.36062649796555374, "grad_norm": 0.5517837405204773, "learning_rate": 7.274196014757032e-05, "loss": 1.6307, "step": 6470 }, { "epoch": 0.3606822362187169, "grad_norm": 0.5454963445663452, "learning_rate": 7.273402990939626e-05, "loss": 1.7725, "step": 6471 }, { "epoch": 0.36073797447188005, "grad_norm": 0.5993366837501526, "learning_rate": 7.272609895023129e-05, "loss": 1.831, "step": 6472 }, { "epoch": 0.3607937127250432, "grad_norm": 0.5621082186698914, "learning_rate": 7.2718167270327e-05, "loss": 1.4942, "step": 6473 }, { "epoch": 0.36084945097820637, "grad_norm": 0.5455790758132935, "learning_rate": 7.271023486993488e-05, "loss": 1.722, "step": 6474 }, { "epoch": 0.3609051892313695, "grad_norm": 0.5093836784362793, "learning_rate": 7.270230174930653e-05, "loss": 1.5921, "step": 6475 }, { "epoch": 0.3609609274845326, "grad_norm": 0.5746651887893677, "learning_rate": 7.269436790869352e-05, "loss": 1.7303, "step": 6476 }, { "epoch": 0.3610166657376958, "grad_norm": 0.5042871832847595, "learning_rate": 7.268643334834748e-05, "loss": 1.4386, "step": 6477 }, { "epoch": 0.36107240399085894, "grad_norm": 0.6014384627342224, "learning_rate": 7.267849806852005e-05, "loss": 1.7803, "step": 6478 }, { "epoch": 0.36112814224402207, "grad_norm": 0.49684464931488037, "learning_rate": 7.267056206946289e-05, "loss": 1.6513, "step": 6479 }, { "epoch": 0.3611838804971852, "grad_norm": 0.6013120412826538, "learning_rate": 7.266262535142767e-05, "loss": 1.718, "step": 6480 }, { "epoch": 0.3612396187503484, "grad_norm": 0.5482946038246155, "learning_rate": 7.26546879146661e-05, "loss": 1.8295, "step": 6481 }, { "epoch": 0.3612953570035115, "grad_norm": 0.5593370199203491, "learning_rate": 7.264674975942994e-05, "loss": 1.8042, "step": 6482 }, { "epoch": 0.36135109525667464, "grad_norm": 0.5430756211280823, "learning_rate": 7.26388108859709e-05, "loss": 1.6976, "step": 6483 }, { "epoch": 0.3614068335098378, "grad_norm": 0.5408653020858765, "learning_rate": 7.263087129454078e-05, "loss": 1.5425, "step": 6484 }, { "epoch": 0.36146257176300095, "grad_norm": 0.5399406552314758, "learning_rate": 7.262293098539134e-05, "loss": 1.7552, "step": 6485 }, { "epoch": 0.3615183100161641, "grad_norm": 0.5077804923057556, "learning_rate": 7.261498995877447e-05, "loss": 1.5728, "step": 6486 }, { "epoch": 0.36157404826932726, "grad_norm": 0.5409159660339355, "learning_rate": 7.260704821494196e-05, "loss": 1.7926, "step": 6487 }, { "epoch": 0.3616297865224904, "grad_norm": 0.4922293424606323, "learning_rate": 7.259910575414569e-05, "loss": 1.46, "step": 6488 }, { "epoch": 0.3616855247756535, "grad_norm": 0.530104398727417, "learning_rate": 7.259116257663753e-05, "loss": 1.4995, "step": 6489 }, { "epoch": 0.36174126302881665, "grad_norm": 0.5683631896972656, "learning_rate": 7.258321868266943e-05, "loss": 1.6736, "step": 6490 }, { "epoch": 0.36179700128197984, "grad_norm": 0.5562074184417725, "learning_rate": 7.25752740724933e-05, "loss": 1.6224, "step": 6491 }, { "epoch": 0.36185273953514296, "grad_norm": 0.6077651381492615, "learning_rate": 7.256732874636109e-05, "loss": 1.7814, "step": 6492 }, { "epoch": 0.3619084777883061, "grad_norm": 0.5739646553993225, "learning_rate": 7.255938270452479e-05, "loss": 1.7024, "step": 6493 }, { "epoch": 0.3619642160414693, "grad_norm": 0.5540484189987183, "learning_rate": 7.25514359472364e-05, "loss": 1.5576, "step": 6494 }, { "epoch": 0.3620199542946324, "grad_norm": 0.5674034953117371, "learning_rate": 7.254348847474797e-05, "loss": 1.8389, "step": 6495 }, { "epoch": 0.36207569254779554, "grad_norm": 0.5664230585098267, "learning_rate": 7.253554028731148e-05, "loss": 1.7194, "step": 6496 }, { "epoch": 0.3621314308009587, "grad_norm": 0.5525626540184021, "learning_rate": 7.252759138517909e-05, "loss": 1.3394, "step": 6497 }, { "epoch": 0.36218716905412185, "grad_norm": 0.5549319982528687, "learning_rate": 7.251964176860281e-05, "loss": 1.6234, "step": 6498 }, { "epoch": 0.362242907307285, "grad_norm": 0.5454506874084473, "learning_rate": 7.25116914378348e-05, "loss": 1.8937, "step": 6499 }, { "epoch": 0.36229864556044816, "grad_norm": 0.5178475379943848, "learning_rate": 7.25037403931272e-05, "loss": 1.5599, "step": 6500 }, { "epoch": 0.3623543838136113, "grad_norm": 0.5836609601974487, "learning_rate": 7.249578863473216e-05, "loss": 1.8547, "step": 6501 }, { "epoch": 0.3624101220667744, "grad_norm": 0.5162068605422974, "learning_rate": 7.248783616290186e-05, "loss": 1.4538, "step": 6502 }, { "epoch": 0.36246586031993755, "grad_norm": 0.5959255695343018, "learning_rate": 7.24798829778885e-05, "loss": 1.8237, "step": 6503 }, { "epoch": 0.36252159857310073, "grad_norm": 0.5471253395080566, "learning_rate": 7.247192907994433e-05, "loss": 1.5705, "step": 6504 }, { "epoch": 0.36257733682626386, "grad_norm": 0.5264948010444641, "learning_rate": 7.246397446932159e-05, "loss": 1.6597, "step": 6505 }, { "epoch": 0.362633075079427, "grad_norm": 0.5829636454582214, "learning_rate": 7.245601914627255e-05, "loss": 1.9137, "step": 6506 }, { "epoch": 0.3626888133325902, "grad_norm": 0.5371459722518921, "learning_rate": 7.244806311104952e-05, "loss": 1.5883, "step": 6507 }, { "epoch": 0.3627445515857533, "grad_norm": 0.6225298643112183, "learning_rate": 7.24401063639048e-05, "loss": 1.9112, "step": 6508 }, { "epoch": 0.36280028983891643, "grad_norm": 0.5452820062637329, "learning_rate": 7.243214890509073e-05, "loss": 1.6557, "step": 6509 }, { "epoch": 0.3628560280920796, "grad_norm": 0.5052100419998169, "learning_rate": 7.24241907348597e-05, "loss": 1.4815, "step": 6510 }, { "epoch": 0.36291176634524275, "grad_norm": 0.5527931451797485, "learning_rate": 7.241623185346409e-05, "loss": 1.6867, "step": 6511 }, { "epoch": 0.3629675045984059, "grad_norm": 0.5412555932998657, "learning_rate": 7.240827226115629e-05, "loss": 1.5461, "step": 6512 }, { "epoch": 0.363023242851569, "grad_norm": 0.5910593271255493, "learning_rate": 7.240031195818874e-05, "loss": 1.7713, "step": 6513 }, { "epoch": 0.3630789811047322, "grad_norm": 0.5672844052314758, "learning_rate": 7.239235094481391e-05, "loss": 1.3757, "step": 6514 }, { "epoch": 0.3631347193578953, "grad_norm": 0.580847442150116, "learning_rate": 7.238438922128425e-05, "loss": 1.9571, "step": 6515 }, { "epoch": 0.36319045761105845, "grad_norm": 0.642082691192627, "learning_rate": 7.237642678785228e-05, "loss": 1.9311, "step": 6516 }, { "epoch": 0.36324619586422163, "grad_norm": 0.49659648537635803, "learning_rate": 7.236846364477052e-05, "loss": 1.6393, "step": 6517 }, { "epoch": 0.36330193411738476, "grad_norm": 0.5082789063453674, "learning_rate": 7.23604997922915e-05, "loss": 1.5183, "step": 6518 }, { "epoch": 0.3633576723705479, "grad_norm": 0.5978274941444397, "learning_rate": 7.235253523066781e-05, "loss": 1.8529, "step": 6519 }, { "epoch": 0.3634134106237111, "grad_norm": 0.5323169231414795, "learning_rate": 7.234456996015202e-05, "loss": 1.6463, "step": 6520 }, { "epoch": 0.3634691488768742, "grad_norm": 0.5250840187072754, "learning_rate": 7.233660398099675e-05, "loss": 1.4439, "step": 6521 }, { "epoch": 0.36352488713003733, "grad_norm": 0.566667914390564, "learning_rate": 7.232863729345464e-05, "loss": 1.5871, "step": 6522 }, { "epoch": 0.3635806253832005, "grad_norm": 0.5944371223449707, "learning_rate": 7.232066989777833e-05, "loss": 1.978, "step": 6523 }, { "epoch": 0.36363636363636365, "grad_norm": 0.556692361831665, "learning_rate": 7.231270179422051e-05, "loss": 1.5579, "step": 6524 }, { "epoch": 0.3636921018895268, "grad_norm": 0.5578793883323669, "learning_rate": 7.230473298303388e-05, "loss": 1.6899, "step": 6525 }, { "epoch": 0.3637478401426899, "grad_norm": 0.672099232673645, "learning_rate": 7.229676346447117e-05, "loss": 2.1324, "step": 6526 }, { "epoch": 0.3638035783958531, "grad_norm": 0.5312888622283936, "learning_rate": 7.228879323878512e-05, "loss": 1.453, "step": 6527 }, { "epoch": 0.3638593166490162, "grad_norm": 0.5675061345100403, "learning_rate": 7.22808223062285e-05, "loss": 1.8623, "step": 6528 }, { "epoch": 0.36391505490217935, "grad_norm": 0.49803319573402405, "learning_rate": 7.227285066705412e-05, "loss": 1.41, "step": 6529 }, { "epoch": 0.36397079315534253, "grad_norm": 0.5489189028739929, "learning_rate": 7.226487832151476e-05, "loss": 1.6551, "step": 6530 }, { "epoch": 0.36402653140850566, "grad_norm": 0.5769960284233093, "learning_rate": 7.225690526986326e-05, "loss": 1.7853, "step": 6531 }, { "epoch": 0.3640822696616688, "grad_norm": 0.5400393605232239, "learning_rate": 7.224893151235251e-05, "loss": 1.5544, "step": 6532 }, { "epoch": 0.364138007914832, "grad_norm": 0.5720942616462708, "learning_rate": 7.224095704923537e-05, "loss": 1.9824, "step": 6533 }, { "epoch": 0.3641937461679951, "grad_norm": 0.5403176546096802, "learning_rate": 7.223298188076475e-05, "loss": 1.5978, "step": 6534 }, { "epoch": 0.36424948442115823, "grad_norm": 0.5350765585899353, "learning_rate": 7.222500600719356e-05, "loss": 1.5945, "step": 6535 }, { "epoch": 0.36430522267432136, "grad_norm": 0.542413592338562, "learning_rate": 7.221702942877477e-05, "loss": 1.717, "step": 6536 }, { "epoch": 0.36436096092748455, "grad_norm": 0.5472922921180725, "learning_rate": 7.220905214576134e-05, "loss": 1.6535, "step": 6537 }, { "epoch": 0.3644166991806477, "grad_norm": 0.5479559302330017, "learning_rate": 7.220107415840626e-05, "loss": 1.5444, "step": 6538 }, { "epoch": 0.3644724374338108, "grad_norm": 0.5131190419197083, "learning_rate": 7.219309546696255e-05, "loss": 1.3543, "step": 6539 }, { "epoch": 0.364528175686974, "grad_norm": 0.5852962732315063, "learning_rate": 7.218511607168326e-05, "loss": 1.7422, "step": 6540 }, { "epoch": 0.3645839139401371, "grad_norm": 0.5998173356056213, "learning_rate": 7.21771359728214e-05, "loss": 1.9125, "step": 6541 }, { "epoch": 0.36463965219330025, "grad_norm": 0.5412231683731079, "learning_rate": 7.216915517063012e-05, "loss": 1.8743, "step": 6542 }, { "epoch": 0.36469539044646343, "grad_norm": 0.5305824279785156, "learning_rate": 7.216117366536249e-05, "loss": 1.476, "step": 6543 }, { "epoch": 0.36475112869962656, "grad_norm": 0.586646556854248, "learning_rate": 7.215319145727161e-05, "loss": 1.7591, "step": 6544 }, { "epoch": 0.3648068669527897, "grad_norm": 0.5975636839866638, "learning_rate": 7.214520854661071e-05, "loss": 1.9996, "step": 6545 }, { "epoch": 0.3648626052059529, "grad_norm": 0.543255627155304, "learning_rate": 7.213722493363288e-05, "loss": 1.6909, "step": 6546 }, { "epoch": 0.364918343459116, "grad_norm": 0.5423970222473145, "learning_rate": 7.212924061859135e-05, "loss": 1.6429, "step": 6547 }, { "epoch": 0.36497408171227913, "grad_norm": 0.5859336256980896, "learning_rate": 7.212125560173933e-05, "loss": 1.9055, "step": 6548 }, { "epoch": 0.36502981996544226, "grad_norm": 0.5425530672073364, "learning_rate": 7.211326988333006e-05, "loss": 1.7073, "step": 6549 }, { "epoch": 0.36508555821860544, "grad_norm": 0.580575168132782, "learning_rate": 7.210528346361678e-05, "loss": 1.6739, "step": 6550 }, { "epoch": 0.3651412964717686, "grad_norm": 0.599686324596405, "learning_rate": 7.209729634285282e-05, "loss": 1.7179, "step": 6551 }, { "epoch": 0.3651970347249317, "grad_norm": 0.5199704170227051, "learning_rate": 7.208930852129143e-05, "loss": 1.7484, "step": 6552 }, { "epoch": 0.3652527729780949, "grad_norm": 0.5557204484939575, "learning_rate": 7.208131999918599e-05, "loss": 1.6169, "step": 6553 }, { "epoch": 0.365308511231258, "grad_norm": 0.5307885408401489, "learning_rate": 7.20733307767898e-05, "loss": 1.4029, "step": 6554 }, { "epoch": 0.36536424948442114, "grad_norm": 0.5462751388549805, "learning_rate": 7.206534085435625e-05, "loss": 1.6399, "step": 6555 }, { "epoch": 0.36541998773758433, "grad_norm": 0.5815526247024536, "learning_rate": 7.205735023213877e-05, "loss": 1.7943, "step": 6556 }, { "epoch": 0.36547572599074746, "grad_norm": 0.5783229470252991, "learning_rate": 7.204935891039071e-05, "loss": 1.7919, "step": 6557 }, { "epoch": 0.3655314642439106, "grad_norm": 0.5671087503433228, "learning_rate": 7.204136688936556e-05, "loss": 1.8125, "step": 6558 }, { "epoch": 0.3655872024970737, "grad_norm": 0.5661280751228333, "learning_rate": 7.203337416931675e-05, "loss": 1.6377, "step": 6559 }, { "epoch": 0.3656429407502369, "grad_norm": 0.5448043942451477, "learning_rate": 7.202538075049781e-05, "loss": 1.6929, "step": 6560 }, { "epoch": 0.36569867900340003, "grad_norm": 0.5517578125, "learning_rate": 7.201738663316217e-05, "loss": 1.7849, "step": 6561 }, { "epoch": 0.36575441725656316, "grad_norm": 0.5554426312446594, "learning_rate": 7.200939181756341e-05, "loss": 1.3314, "step": 6562 }, { "epoch": 0.36581015550972634, "grad_norm": 0.5693673491477966, "learning_rate": 7.200139630395507e-05, "loss": 1.8517, "step": 6563 }, { "epoch": 0.36586589376288947, "grad_norm": 0.5405746698379517, "learning_rate": 7.199340009259072e-05, "loss": 1.58, "step": 6564 }, { "epoch": 0.3659216320160526, "grad_norm": 0.633660078048706, "learning_rate": 7.198540318372394e-05, "loss": 1.9478, "step": 6565 }, { "epoch": 0.3659773702692158, "grad_norm": 0.5665812492370605, "learning_rate": 7.197740557760834e-05, "loss": 1.6334, "step": 6566 }, { "epoch": 0.3660331085223789, "grad_norm": 0.549199104309082, "learning_rate": 7.196940727449759e-05, "loss": 1.4779, "step": 6567 }, { "epoch": 0.36608884677554204, "grad_norm": 0.49754953384399414, "learning_rate": 7.196140827464533e-05, "loss": 1.5101, "step": 6568 }, { "epoch": 0.3661445850287052, "grad_norm": 0.5829338431358337, "learning_rate": 7.195340857830524e-05, "loss": 1.7219, "step": 6569 }, { "epoch": 0.36620032328186836, "grad_norm": 0.5498637557029724, "learning_rate": 7.194540818573103e-05, "loss": 1.6491, "step": 6570 }, { "epoch": 0.3662560615350315, "grad_norm": 0.5562663674354553, "learning_rate": 7.193740709717643e-05, "loss": 1.7846, "step": 6571 }, { "epoch": 0.3663117997881946, "grad_norm": 0.5268211364746094, "learning_rate": 7.192940531289517e-05, "loss": 1.5521, "step": 6572 }, { "epoch": 0.3663675380413578, "grad_norm": 0.5425642132759094, "learning_rate": 7.192140283314104e-05, "loss": 1.7968, "step": 6573 }, { "epoch": 0.3664232762945209, "grad_norm": 0.5653149485588074, "learning_rate": 7.191339965816781e-05, "loss": 1.6086, "step": 6574 }, { "epoch": 0.36647901454768406, "grad_norm": 0.5728870630264282, "learning_rate": 7.190539578822932e-05, "loss": 1.8264, "step": 6575 }, { "epoch": 0.36653475280084724, "grad_norm": 0.5501007437705994, "learning_rate": 7.189739122357939e-05, "loss": 1.8426, "step": 6576 }, { "epoch": 0.36659049105401037, "grad_norm": 0.5318872332572937, "learning_rate": 7.188938596447188e-05, "loss": 1.7968, "step": 6577 }, { "epoch": 0.3666462293071735, "grad_norm": 0.5750231146812439, "learning_rate": 7.188138001116065e-05, "loss": 1.6745, "step": 6578 }, { "epoch": 0.3667019675603367, "grad_norm": 0.6171157956123352, "learning_rate": 7.187337336389966e-05, "loss": 2.0264, "step": 6579 }, { "epoch": 0.3667577058134998, "grad_norm": 0.5361387133598328, "learning_rate": 7.186536602294278e-05, "loss": 1.5105, "step": 6580 }, { "epoch": 0.36681344406666294, "grad_norm": 0.5726244449615479, "learning_rate": 7.185735798854396e-05, "loss": 1.6055, "step": 6581 }, { "epoch": 0.36686918231982607, "grad_norm": 0.5350404381752014, "learning_rate": 7.184934926095721e-05, "loss": 1.7493, "step": 6582 }, { "epoch": 0.36692492057298925, "grad_norm": 0.5755828022956848, "learning_rate": 7.184133984043646e-05, "loss": 1.6443, "step": 6583 }, { "epoch": 0.3669806588261524, "grad_norm": 0.5558964610099792, "learning_rate": 7.183332972723578e-05, "loss": 1.816, "step": 6584 }, { "epoch": 0.3670363970793155, "grad_norm": 0.5483201146125793, "learning_rate": 7.182531892160917e-05, "loss": 1.6545, "step": 6585 }, { "epoch": 0.3670921353324787, "grad_norm": 0.5599815249443054, "learning_rate": 7.18173074238107e-05, "loss": 1.634, "step": 6586 }, { "epoch": 0.3671478735856418, "grad_norm": 0.5529213547706604, "learning_rate": 7.180929523409443e-05, "loss": 1.7378, "step": 6587 }, { "epoch": 0.36720361183880496, "grad_norm": 0.5131180286407471, "learning_rate": 7.180128235271449e-05, "loss": 1.5528, "step": 6588 }, { "epoch": 0.36725935009196814, "grad_norm": 0.591602623462677, "learning_rate": 7.179326877992497e-05, "loss": 1.7482, "step": 6589 }, { "epoch": 0.36731508834513127, "grad_norm": 0.4902382791042328, "learning_rate": 7.178525451598003e-05, "loss": 1.4865, "step": 6590 }, { "epoch": 0.3673708265982944, "grad_norm": 0.5887609720230103, "learning_rate": 7.177723956113383e-05, "loss": 1.9031, "step": 6591 }, { "epoch": 0.3674265648514576, "grad_norm": 0.5403375625610352, "learning_rate": 7.176922391564056e-05, "loss": 1.6702, "step": 6592 }, { "epoch": 0.3674823031046207, "grad_norm": 0.5793707370758057, "learning_rate": 7.176120757975444e-05, "loss": 1.6571, "step": 6593 }, { "epoch": 0.36753804135778384, "grad_norm": 0.5770851373672485, "learning_rate": 7.175319055372969e-05, "loss": 1.7841, "step": 6594 }, { "epoch": 0.36759377961094697, "grad_norm": 0.5472514629364014, "learning_rate": 7.174517283782058e-05, "loss": 1.6785, "step": 6595 }, { "epoch": 0.36764951786411015, "grad_norm": 0.5961628556251526, "learning_rate": 7.173715443228133e-05, "loss": 1.6604, "step": 6596 }, { "epoch": 0.3677052561172733, "grad_norm": 0.5890954732894897, "learning_rate": 7.172913533736632e-05, "loss": 1.7003, "step": 6597 }, { "epoch": 0.3677609943704364, "grad_norm": 0.6537253260612488, "learning_rate": 7.17211155533298e-05, "loss": 1.9955, "step": 6598 }, { "epoch": 0.3678167326235996, "grad_norm": 0.5514366030693054, "learning_rate": 7.171309508042615e-05, "loss": 1.5601, "step": 6599 }, { "epoch": 0.3678724708767627, "grad_norm": 0.6790293455123901, "learning_rate": 7.170507391890972e-05, "loss": 2.1675, "step": 6600 }, { "epoch": 0.36792820912992585, "grad_norm": 0.5294934511184692, "learning_rate": 7.16970520690349e-05, "loss": 1.6509, "step": 6601 }, { "epoch": 0.36798394738308904, "grad_norm": 0.5617215037345886, "learning_rate": 7.168902953105608e-05, "loss": 1.7301, "step": 6602 }, { "epoch": 0.36803968563625217, "grad_norm": 0.5187042355537415, "learning_rate": 7.16810063052277e-05, "loss": 1.4945, "step": 6603 }, { "epoch": 0.3680954238894153, "grad_norm": 0.5646756291389465, "learning_rate": 7.16729823918042e-05, "loss": 1.8281, "step": 6604 }, { "epoch": 0.3681511621425784, "grad_norm": 0.5496782064437866, "learning_rate": 7.166495779104007e-05, "loss": 1.6996, "step": 6605 }, { "epoch": 0.3682069003957416, "grad_norm": 0.6056029796600342, "learning_rate": 7.16569325031898e-05, "loss": 1.9787, "step": 6606 }, { "epoch": 0.36826263864890474, "grad_norm": 0.5624659061431885, "learning_rate": 7.164890652850789e-05, "loss": 1.7931, "step": 6607 }, { "epoch": 0.36831837690206787, "grad_norm": 0.5342402458190918, "learning_rate": 7.16408798672489e-05, "loss": 1.664, "step": 6608 }, { "epoch": 0.36837411515523105, "grad_norm": 0.5402200818061829, "learning_rate": 7.163285251966736e-05, "loss": 1.6754, "step": 6609 }, { "epoch": 0.3684298534083942, "grad_norm": 0.5262821316719055, "learning_rate": 7.162482448601789e-05, "loss": 1.5501, "step": 6610 }, { "epoch": 0.3684855916615573, "grad_norm": 0.5371507406234741, "learning_rate": 7.161679576655503e-05, "loss": 1.6168, "step": 6611 }, { "epoch": 0.3685413299147205, "grad_norm": 0.5895312428474426, "learning_rate": 7.160876636153349e-05, "loss": 1.8576, "step": 6612 }, { "epoch": 0.3685970681678836, "grad_norm": 0.5309399962425232, "learning_rate": 7.160073627120784e-05, "loss": 1.5803, "step": 6613 }, { "epoch": 0.36865280642104675, "grad_norm": 0.564697265625, "learning_rate": 7.159270549583278e-05, "loss": 1.2999, "step": 6614 }, { "epoch": 0.36870854467420994, "grad_norm": 0.5483527183532715, "learning_rate": 7.158467403566299e-05, "loss": 1.559, "step": 6615 }, { "epoch": 0.36876428292737307, "grad_norm": 0.47662925720214844, "learning_rate": 7.15766418909532e-05, "loss": 1.2871, "step": 6616 }, { "epoch": 0.3688200211805362, "grad_norm": 0.5505543947219849, "learning_rate": 7.156860906195811e-05, "loss": 1.717, "step": 6617 }, { "epoch": 0.3688757594336993, "grad_norm": 0.5837799310684204, "learning_rate": 7.156057554893251e-05, "loss": 1.8828, "step": 6618 }, { "epoch": 0.3689314976868625, "grad_norm": 0.6020135283470154, "learning_rate": 7.155254135213117e-05, "loss": 1.6727, "step": 6619 }, { "epoch": 0.36898723594002564, "grad_norm": 0.5805865526199341, "learning_rate": 7.154450647180886e-05, "loss": 1.7273, "step": 6620 }, { "epoch": 0.36904297419318877, "grad_norm": 0.5338916182518005, "learning_rate": 7.153647090822043e-05, "loss": 1.5732, "step": 6621 }, { "epoch": 0.36909871244635195, "grad_norm": 0.5388802886009216, "learning_rate": 7.152843466162069e-05, "loss": 1.5612, "step": 6622 }, { "epoch": 0.3691544506995151, "grad_norm": 0.5497878789901733, "learning_rate": 7.152039773226456e-05, "loss": 1.6601, "step": 6623 }, { "epoch": 0.3692101889526782, "grad_norm": 0.5147888660430908, "learning_rate": 7.151236012040685e-05, "loss": 1.6467, "step": 6624 }, { "epoch": 0.3692659272058414, "grad_norm": 0.5906471014022827, "learning_rate": 7.150432182630252e-05, "loss": 1.6429, "step": 6625 }, { "epoch": 0.3693216654590045, "grad_norm": 0.5193469524383545, "learning_rate": 7.149628285020648e-05, "loss": 1.7369, "step": 6626 }, { "epoch": 0.36937740371216765, "grad_norm": 0.5903412699699402, "learning_rate": 7.148824319237367e-05, "loss": 1.7329, "step": 6627 }, { "epoch": 0.3694331419653308, "grad_norm": 0.5230131149291992, "learning_rate": 7.148020285305907e-05, "loss": 1.5495, "step": 6628 }, { "epoch": 0.36948888021849396, "grad_norm": 0.5554400086402893, "learning_rate": 7.147216183251768e-05, "loss": 1.7592, "step": 6629 }, { "epoch": 0.3695446184716571, "grad_norm": 0.4992237985134125, "learning_rate": 7.146412013100451e-05, "loss": 1.5094, "step": 6630 }, { "epoch": 0.3696003567248202, "grad_norm": 0.6239908933639526, "learning_rate": 7.14560777487746e-05, "loss": 1.9804, "step": 6631 }, { "epoch": 0.3696560949779834, "grad_norm": 0.49736112356185913, "learning_rate": 7.144803468608298e-05, "loss": 1.4165, "step": 6632 }, { "epoch": 0.36971183323114654, "grad_norm": 0.5291538834571838, "learning_rate": 7.143999094318477e-05, "loss": 1.6362, "step": 6633 }, { "epoch": 0.36976757148430967, "grad_norm": 0.5881434679031372, "learning_rate": 7.143194652033505e-05, "loss": 1.8459, "step": 6634 }, { "epoch": 0.36982330973747285, "grad_norm": 0.5663610100746155, "learning_rate": 7.142390141778895e-05, "loss": 1.655, "step": 6635 }, { "epoch": 0.369879047990636, "grad_norm": 0.6780499219894409, "learning_rate": 7.141585563580158e-05, "loss": 1.8284, "step": 6636 }, { "epoch": 0.3699347862437991, "grad_norm": 0.544389009475708, "learning_rate": 7.140780917462814e-05, "loss": 1.6024, "step": 6637 }, { "epoch": 0.3699905244969623, "grad_norm": 0.5259643197059631, "learning_rate": 7.139976203452383e-05, "loss": 1.6143, "step": 6638 }, { "epoch": 0.3700462627501254, "grad_norm": 0.5904932022094727, "learning_rate": 7.139171421574383e-05, "loss": 1.7714, "step": 6639 }, { "epoch": 0.37010200100328855, "grad_norm": 0.5398536920547485, "learning_rate": 7.138366571854338e-05, "loss": 1.5943, "step": 6640 }, { "epoch": 0.3701577392564517, "grad_norm": 0.5698688626289368, "learning_rate": 7.137561654317772e-05, "loss": 1.7892, "step": 6641 }, { "epoch": 0.37021347750961486, "grad_norm": 0.5498561859130859, "learning_rate": 7.136756668990213e-05, "loss": 1.7051, "step": 6642 }, { "epoch": 0.370269215762778, "grad_norm": 0.5418841242790222, "learning_rate": 7.13595161589719e-05, "loss": 1.6284, "step": 6643 }, { "epoch": 0.3703249540159411, "grad_norm": 0.5735422968864441, "learning_rate": 7.135146495064236e-05, "loss": 1.5837, "step": 6644 }, { "epoch": 0.3703806922691043, "grad_norm": 0.593471348285675, "learning_rate": 7.134341306516885e-05, "loss": 1.891, "step": 6645 }, { "epoch": 0.37043643052226743, "grad_norm": 0.519626796245575, "learning_rate": 7.13353605028067e-05, "loss": 1.676, "step": 6646 }, { "epoch": 0.37049216877543056, "grad_norm": 0.59029620885849, "learning_rate": 7.132730726381134e-05, "loss": 1.8638, "step": 6647 }, { "epoch": 0.37054790702859375, "grad_norm": 0.6374014019966125, "learning_rate": 7.13192533484381e-05, "loss": 2.0887, "step": 6648 }, { "epoch": 0.3706036452817569, "grad_norm": 0.5250412821769714, "learning_rate": 7.131119875694246e-05, "loss": 1.5408, "step": 6649 }, { "epoch": 0.37065938353492, "grad_norm": 0.5467897653579712, "learning_rate": 7.130314348957986e-05, "loss": 1.4246, "step": 6650 }, { "epoch": 0.37071512178808314, "grad_norm": 0.5109268426895142, "learning_rate": 7.129508754660575e-05, "loss": 1.4972, "step": 6651 }, { "epoch": 0.3707708600412463, "grad_norm": 0.5759547352790833, "learning_rate": 7.128703092827562e-05, "loss": 1.9089, "step": 6652 }, { "epoch": 0.37082659829440945, "grad_norm": 0.6243898272514343, "learning_rate": 7.127897363484497e-05, "loss": 1.9196, "step": 6653 }, { "epoch": 0.3708823365475726, "grad_norm": 0.5852481722831726, "learning_rate": 7.127091566656936e-05, "loss": 1.7842, "step": 6654 }, { "epoch": 0.37093807480073576, "grad_norm": 0.5579434037208557, "learning_rate": 7.12628570237043e-05, "loss": 1.6261, "step": 6655 }, { "epoch": 0.3709938130538989, "grad_norm": 0.5315961837768555, "learning_rate": 7.125479770650539e-05, "loss": 1.6085, "step": 6656 }, { "epoch": 0.371049551307062, "grad_norm": 0.5678053498268127, "learning_rate": 7.124673771522824e-05, "loss": 1.905, "step": 6657 }, { "epoch": 0.3711052895602252, "grad_norm": 0.5308210849761963, "learning_rate": 7.123867705012843e-05, "loss": 1.5081, "step": 6658 }, { "epoch": 0.37116102781338833, "grad_norm": 0.5750522017478943, "learning_rate": 7.123061571146161e-05, "loss": 1.6793, "step": 6659 }, { "epoch": 0.37121676606655146, "grad_norm": 0.5785144567489624, "learning_rate": 7.122255369948346e-05, "loss": 1.6402, "step": 6660 }, { "epoch": 0.37127250431971465, "grad_norm": 0.5107117891311646, "learning_rate": 7.121449101444964e-05, "loss": 1.6232, "step": 6661 }, { "epoch": 0.3713282425728778, "grad_norm": 0.5365573763847351, "learning_rate": 7.120642765661584e-05, "loss": 1.7163, "step": 6662 }, { "epoch": 0.3713839808260409, "grad_norm": 0.5924217104911804, "learning_rate": 7.119836362623781e-05, "loss": 1.9706, "step": 6663 }, { "epoch": 0.37143971907920403, "grad_norm": 0.5683318972587585, "learning_rate": 7.119029892357128e-05, "loss": 1.9116, "step": 6664 }, { "epoch": 0.3714954573323672, "grad_norm": 0.524502694606781, "learning_rate": 7.118223354887201e-05, "loss": 1.5862, "step": 6665 }, { "epoch": 0.37155119558553035, "grad_norm": 0.5245027542114258, "learning_rate": 7.11741675023958e-05, "loss": 1.5945, "step": 6666 }, { "epoch": 0.3716069338386935, "grad_norm": 0.5658608675003052, "learning_rate": 7.116610078439845e-05, "loss": 1.5802, "step": 6667 }, { "epoch": 0.37166267209185666, "grad_norm": 0.5938420295715332, "learning_rate": 7.115803339513578e-05, "loss": 2.005, "step": 6668 }, { "epoch": 0.3717184103450198, "grad_norm": 0.5456317663192749, "learning_rate": 7.114996533486366e-05, "loss": 1.5013, "step": 6669 }, { "epoch": 0.3717741485981829, "grad_norm": 0.5922924280166626, "learning_rate": 7.114189660383794e-05, "loss": 2.0418, "step": 6670 }, { "epoch": 0.3718298868513461, "grad_norm": 0.5821951627731323, "learning_rate": 7.113382720231454e-05, "loss": 1.7955, "step": 6671 }, { "epoch": 0.37188562510450923, "grad_norm": 0.5134814381599426, "learning_rate": 7.112575713054936e-05, "loss": 1.4315, "step": 6672 }, { "epoch": 0.37194136335767236, "grad_norm": 0.5751433968544006, "learning_rate": 7.111768638879833e-05, "loss": 1.566, "step": 6673 }, { "epoch": 0.3719971016108355, "grad_norm": 0.5614348649978638, "learning_rate": 7.110961497731742e-05, "loss": 1.8572, "step": 6674 }, { "epoch": 0.3720528398639987, "grad_norm": 0.5680375099182129, "learning_rate": 7.110154289636259e-05, "loss": 2.0372, "step": 6675 }, { "epoch": 0.3721085781171618, "grad_norm": 0.5367892980575562, "learning_rate": 7.109347014618985e-05, "loss": 1.6665, "step": 6676 }, { "epoch": 0.37216431637032493, "grad_norm": 0.563017725944519, "learning_rate": 7.108539672705523e-05, "loss": 1.747, "step": 6677 }, { "epoch": 0.3722200546234881, "grad_norm": 0.5716055631637573, "learning_rate": 7.107732263921475e-05, "loss": 1.4182, "step": 6678 }, { "epoch": 0.37227579287665125, "grad_norm": 0.514310896396637, "learning_rate": 7.106924788292448e-05, "loss": 1.6223, "step": 6679 }, { "epoch": 0.3723315311298144, "grad_norm": 0.5039160251617432, "learning_rate": 7.106117245844054e-05, "loss": 1.5979, "step": 6680 }, { "epoch": 0.37238726938297756, "grad_norm": 0.5815281867980957, "learning_rate": 7.105309636601898e-05, "loss": 1.9983, "step": 6681 }, { "epoch": 0.3724430076361407, "grad_norm": 0.5450384616851807, "learning_rate": 7.104501960591595e-05, "loss": 1.5488, "step": 6682 }, { "epoch": 0.3724987458893038, "grad_norm": 0.5386560559272766, "learning_rate": 7.103694217838761e-05, "loss": 1.6376, "step": 6683 }, { "epoch": 0.372554484142467, "grad_norm": 0.5220578908920288, "learning_rate": 7.102886408369012e-05, "loss": 1.4654, "step": 6684 }, { "epoch": 0.37261022239563013, "grad_norm": 0.5630038976669312, "learning_rate": 7.102078532207966e-05, "loss": 1.7554, "step": 6685 }, { "epoch": 0.37266596064879326, "grad_norm": 0.5405006408691406, "learning_rate": 7.101270589381245e-05, "loss": 1.8247, "step": 6686 }, { "epoch": 0.3727216989019564, "grad_norm": 0.5460960865020752, "learning_rate": 7.100462579914474e-05, "loss": 1.7902, "step": 6687 }, { "epoch": 0.3727774371551196, "grad_norm": 0.5519078969955444, "learning_rate": 7.099654503833273e-05, "loss": 1.7138, "step": 6688 }, { "epoch": 0.3728331754082827, "grad_norm": 0.5574856400489807, "learning_rate": 7.098846361163273e-05, "loss": 1.6607, "step": 6689 }, { "epoch": 0.37288891366144583, "grad_norm": 0.5525651574134827, "learning_rate": 7.098038151930107e-05, "loss": 1.8834, "step": 6690 }, { "epoch": 0.372944651914609, "grad_norm": 0.5278156399726868, "learning_rate": 7.097229876159401e-05, "loss": 1.67, "step": 6691 }, { "epoch": 0.37300039016777214, "grad_norm": 0.5362699627876282, "learning_rate": 7.096421533876792e-05, "loss": 1.6881, "step": 6692 }, { "epoch": 0.3730561284209353, "grad_norm": 0.522748589515686, "learning_rate": 7.095613125107915e-05, "loss": 1.6077, "step": 6693 }, { "epoch": 0.37311186667409846, "grad_norm": 0.5335802435874939, "learning_rate": 7.094804649878407e-05, "loss": 1.6124, "step": 6694 }, { "epoch": 0.3731676049272616, "grad_norm": 0.5322664976119995, "learning_rate": 7.093996108213909e-05, "loss": 1.735, "step": 6695 }, { "epoch": 0.3732233431804247, "grad_norm": 0.5863260626792908, "learning_rate": 7.093187500140064e-05, "loss": 1.9465, "step": 6696 }, { "epoch": 0.37327908143358784, "grad_norm": 0.5546720623970032, "learning_rate": 7.092378825682517e-05, "loss": 1.6817, "step": 6697 }, { "epoch": 0.37333481968675103, "grad_norm": 0.5397077798843384, "learning_rate": 7.091570084866909e-05, "loss": 1.7072, "step": 6698 }, { "epoch": 0.37339055793991416, "grad_norm": 0.5567345023155212, "learning_rate": 7.090761277718897e-05, "loss": 1.7315, "step": 6699 }, { "epoch": 0.3734462961930773, "grad_norm": 0.5560916662216187, "learning_rate": 7.089952404264126e-05, "loss": 1.5599, "step": 6700 }, { "epoch": 0.37350203444624047, "grad_norm": 0.5497678518295288, "learning_rate": 7.089143464528249e-05, "loss": 1.6328, "step": 6701 }, { "epoch": 0.3735577726994036, "grad_norm": 0.5806947946548462, "learning_rate": 7.088334458536921e-05, "loss": 1.8025, "step": 6702 }, { "epoch": 0.37361351095256673, "grad_norm": 0.6178561449050903, "learning_rate": 7.087525386315802e-05, "loss": 1.6715, "step": 6703 }, { "epoch": 0.3736692492057299, "grad_norm": 0.5702304244041443, "learning_rate": 7.086716247890548e-05, "loss": 1.7321, "step": 6704 }, { "epoch": 0.37372498745889304, "grad_norm": 0.5194035172462463, "learning_rate": 7.08590704328682e-05, "loss": 1.5648, "step": 6705 }, { "epoch": 0.37378072571205617, "grad_norm": 0.5901757478713989, "learning_rate": 7.085097772530283e-05, "loss": 1.9348, "step": 6706 }, { "epoch": 0.37383646396521936, "grad_norm": 0.7031030654907227, "learning_rate": 7.084288435646603e-05, "loss": 1.5634, "step": 6707 }, { "epoch": 0.3738922022183825, "grad_norm": 0.5556403398513794, "learning_rate": 7.083479032661445e-05, "loss": 1.6525, "step": 6708 }, { "epoch": 0.3739479404715456, "grad_norm": 0.5691899061203003, "learning_rate": 7.082669563600478e-05, "loss": 1.885, "step": 6709 }, { "epoch": 0.37400367872470874, "grad_norm": 0.5547059774398804, "learning_rate": 7.081860028489377e-05, "loss": 1.8645, "step": 6710 }, { "epoch": 0.37405941697787193, "grad_norm": 0.5635570287704468, "learning_rate": 7.081050427353814e-05, "loss": 1.8752, "step": 6711 }, { "epoch": 0.37411515523103506, "grad_norm": 0.5423487424850464, "learning_rate": 7.080240760219465e-05, "loss": 1.5953, "step": 6712 }, { "epoch": 0.3741708934841982, "grad_norm": 0.5141568183898926, "learning_rate": 7.079431027112006e-05, "loss": 1.4812, "step": 6713 }, { "epoch": 0.37422663173736137, "grad_norm": 0.5988462567329407, "learning_rate": 7.078621228057121e-05, "loss": 1.8588, "step": 6714 }, { "epoch": 0.3742823699905245, "grad_norm": 0.5320055484771729, "learning_rate": 7.077811363080489e-05, "loss": 1.745, "step": 6715 }, { "epoch": 0.37433810824368763, "grad_norm": 0.5388814806938171, "learning_rate": 7.077001432207795e-05, "loss": 1.511, "step": 6716 }, { "epoch": 0.3743938464968508, "grad_norm": 0.537324070930481, "learning_rate": 7.076191435464725e-05, "loss": 1.6644, "step": 6717 }, { "epoch": 0.37444958475001394, "grad_norm": 0.533687174320221, "learning_rate": 7.075381372876967e-05, "loss": 1.73, "step": 6718 }, { "epoch": 0.37450532300317707, "grad_norm": 0.5057275295257568, "learning_rate": 7.074571244470214e-05, "loss": 1.6284, "step": 6719 }, { "epoch": 0.3745610612563402, "grad_norm": 0.6067156195640564, "learning_rate": 7.073761050270156e-05, "loss": 1.84, "step": 6720 }, { "epoch": 0.3746167995095034, "grad_norm": 0.5253334641456604, "learning_rate": 7.072950790302487e-05, "loss": 1.4598, "step": 6721 }, { "epoch": 0.3746725377626665, "grad_norm": 0.521193265914917, "learning_rate": 7.072140464592907e-05, "loss": 1.5442, "step": 6722 }, { "epoch": 0.37472827601582964, "grad_norm": 0.5262565612792969, "learning_rate": 7.071330073167112e-05, "loss": 1.6898, "step": 6723 }, { "epoch": 0.3747840142689928, "grad_norm": 0.6259338855743408, "learning_rate": 7.070519616050804e-05, "loss": 1.731, "step": 6724 }, { "epoch": 0.37483975252215596, "grad_norm": 0.5520288348197937, "learning_rate": 7.069709093269687e-05, "loss": 1.796, "step": 6725 }, { "epoch": 0.3748954907753191, "grad_norm": 0.5660863518714905, "learning_rate": 7.068898504849462e-05, "loss": 1.656, "step": 6726 }, { "epoch": 0.37495122902848227, "grad_norm": 0.5522897839546204, "learning_rate": 7.06808785081584e-05, "loss": 1.6656, "step": 6727 }, { "epoch": 0.3750069672816454, "grad_norm": 0.6100639700889587, "learning_rate": 7.067277131194529e-05, "loss": 1.7658, "step": 6728 }, { "epoch": 0.3750627055348085, "grad_norm": 0.5829086899757385, "learning_rate": 7.066466346011242e-05, "loss": 1.6342, "step": 6729 }, { "epoch": 0.3751184437879717, "grad_norm": 0.6315231323242188, "learning_rate": 7.06565549529169e-05, "loss": 1.7829, "step": 6730 }, { "epoch": 0.37517418204113484, "grad_norm": 0.6006489992141724, "learning_rate": 7.064844579061588e-05, "loss": 1.8819, "step": 6731 }, { "epoch": 0.37522992029429797, "grad_norm": 0.5952304005622864, "learning_rate": 7.064033597346658e-05, "loss": 1.6654, "step": 6732 }, { "epoch": 0.3752856585474611, "grad_norm": 0.5768652558326721, "learning_rate": 7.063222550172612e-05, "loss": 1.6577, "step": 6733 }, { "epoch": 0.3753413968006243, "grad_norm": 0.5706788301467896, "learning_rate": 7.062411437565179e-05, "loss": 1.7532, "step": 6734 }, { "epoch": 0.3753971350537874, "grad_norm": 0.6298890113830566, "learning_rate": 7.06160025955008e-05, "loss": 1.7744, "step": 6735 }, { "epoch": 0.37545287330695054, "grad_norm": 0.5873239636421204, "learning_rate": 7.06078901615304e-05, "loss": 1.9847, "step": 6736 }, { "epoch": 0.3755086115601137, "grad_norm": 0.5103023648262024, "learning_rate": 7.059977707399787e-05, "loss": 1.4559, "step": 6737 }, { "epoch": 0.37556434981327685, "grad_norm": 0.521653950214386, "learning_rate": 7.059166333316054e-05, "loss": 1.6796, "step": 6738 }, { "epoch": 0.37562008806644, "grad_norm": 0.5209727883338928, "learning_rate": 7.058354893927568e-05, "loss": 1.5015, "step": 6739 }, { "epoch": 0.37567582631960317, "grad_norm": 0.6425443887710571, "learning_rate": 7.057543389260068e-05, "loss": 1.8178, "step": 6740 }, { "epoch": 0.3757315645727663, "grad_norm": 0.5647505521774292, "learning_rate": 7.056731819339286e-05, "loss": 1.7513, "step": 6741 }, { "epoch": 0.3757873028259294, "grad_norm": 0.5992183089256287, "learning_rate": 7.055920184190964e-05, "loss": 1.6351, "step": 6742 }, { "epoch": 0.37584304107909255, "grad_norm": 0.5495748519897461, "learning_rate": 7.055108483840839e-05, "loss": 1.6854, "step": 6743 }, { "epoch": 0.37589877933225574, "grad_norm": 0.5780972242355347, "learning_rate": 7.054296718314656e-05, "loss": 1.7937, "step": 6744 }, { "epoch": 0.37595451758541887, "grad_norm": 0.5518954992294312, "learning_rate": 7.053484887638158e-05, "loss": 1.6708, "step": 6745 }, { "epoch": 0.376010255838582, "grad_norm": 0.5211352109909058, "learning_rate": 7.052672991837093e-05, "loss": 1.6565, "step": 6746 }, { "epoch": 0.3760659940917452, "grad_norm": 0.5192275643348694, "learning_rate": 7.051861030937207e-05, "loss": 1.5376, "step": 6747 }, { "epoch": 0.3761217323449083, "grad_norm": 0.5492019057273865, "learning_rate": 7.051049004964254e-05, "loss": 1.7518, "step": 6748 }, { "epoch": 0.37617747059807144, "grad_norm": 0.5412474274635315, "learning_rate": 7.050236913943984e-05, "loss": 1.5384, "step": 6749 }, { "epoch": 0.3762332088512346, "grad_norm": 0.5172974467277527, "learning_rate": 7.049424757902153e-05, "loss": 1.6072, "step": 6750 }, { "epoch": 0.37628894710439775, "grad_norm": 0.5415205955505371, "learning_rate": 7.048612536864517e-05, "loss": 1.8342, "step": 6751 }, { "epoch": 0.3763446853575609, "grad_norm": 0.5428817868232727, "learning_rate": 7.047800250856837e-05, "loss": 1.6988, "step": 6752 }, { "epoch": 0.37640042361072407, "grad_norm": 0.5195114612579346, "learning_rate": 7.046987899904871e-05, "loss": 1.4647, "step": 6753 }, { "epoch": 0.3764561618638872, "grad_norm": 0.5440792441368103, "learning_rate": 7.046175484034384e-05, "loss": 1.5224, "step": 6754 }, { "epoch": 0.3765119001170503, "grad_norm": 0.5353301763534546, "learning_rate": 7.045363003271141e-05, "loss": 1.664, "step": 6755 }, { "epoch": 0.37656763837021345, "grad_norm": 0.5722842812538147, "learning_rate": 7.044550457640909e-05, "loss": 1.6422, "step": 6756 }, { "epoch": 0.37662337662337664, "grad_norm": 0.5732778906822205, "learning_rate": 7.043737847169455e-05, "loss": 2.0161, "step": 6757 }, { "epoch": 0.37667911487653977, "grad_norm": 0.5180158019065857, "learning_rate": 7.042925171882557e-05, "loss": 1.6388, "step": 6758 }, { "epoch": 0.3767348531297029, "grad_norm": 0.530694305896759, "learning_rate": 7.042112431805979e-05, "loss": 1.546, "step": 6759 }, { "epoch": 0.3767905913828661, "grad_norm": 0.5620813965797424, "learning_rate": 7.041299626965503e-05, "loss": 1.6727, "step": 6760 }, { "epoch": 0.3768463296360292, "grad_norm": 0.5627542734146118, "learning_rate": 7.040486757386904e-05, "loss": 1.7527, "step": 6761 }, { "epoch": 0.37690206788919234, "grad_norm": 0.588291347026825, "learning_rate": 7.039673823095963e-05, "loss": 1.9415, "step": 6762 }, { "epoch": 0.3769578061423555, "grad_norm": 0.5307551026344299, "learning_rate": 7.03886082411846e-05, "loss": 1.6316, "step": 6763 }, { "epoch": 0.37701354439551865, "grad_norm": 0.5484150648117065, "learning_rate": 7.038047760480179e-05, "loss": 1.6363, "step": 6764 }, { "epoch": 0.3770692826486818, "grad_norm": 0.5301684737205505, "learning_rate": 7.037234632206905e-05, "loss": 1.7243, "step": 6765 }, { "epoch": 0.3771250209018449, "grad_norm": 0.5907619595527649, "learning_rate": 7.036421439324427e-05, "loss": 1.6807, "step": 6766 }, { "epoch": 0.3771807591550081, "grad_norm": 0.5170425772666931, "learning_rate": 7.035608181858533e-05, "loss": 1.6641, "step": 6767 }, { "epoch": 0.3772364974081712, "grad_norm": 0.5344756245613098, "learning_rate": 7.034794859835016e-05, "loss": 1.8226, "step": 6768 }, { "epoch": 0.37729223566133435, "grad_norm": 0.5386238098144531, "learning_rate": 7.033981473279672e-05, "loss": 1.6291, "step": 6769 }, { "epoch": 0.37734797391449754, "grad_norm": 0.5417985916137695, "learning_rate": 7.033168022218292e-05, "loss": 1.5797, "step": 6770 }, { "epoch": 0.37740371216766067, "grad_norm": 0.5583431124687195, "learning_rate": 7.032354506676678e-05, "loss": 1.7165, "step": 6771 }, { "epoch": 0.3774594504208238, "grad_norm": 0.5974751114845276, "learning_rate": 7.031540926680627e-05, "loss": 1.9454, "step": 6772 }, { "epoch": 0.377515188673987, "grad_norm": 0.5629299283027649, "learning_rate": 7.030727282255944e-05, "loss": 1.9527, "step": 6773 }, { "epoch": 0.3775709269271501, "grad_norm": 0.49648937582969666, "learning_rate": 7.02991357342843e-05, "loss": 1.4055, "step": 6774 }, { "epoch": 0.37762666518031324, "grad_norm": 0.5776923298835754, "learning_rate": 7.029099800223895e-05, "loss": 1.5683, "step": 6775 }, { "epoch": 0.3776824034334764, "grad_norm": 0.5667086839675903, "learning_rate": 7.028285962668144e-05, "loss": 1.6576, "step": 6776 }, { "epoch": 0.37773814168663955, "grad_norm": 0.51173996925354, "learning_rate": 7.027472060786988e-05, "loss": 1.6046, "step": 6777 }, { "epoch": 0.3777938799398027, "grad_norm": 0.6762179732322693, "learning_rate": 7.026658094606238e-05, "loss": 1.8251, "step": 6778 }, { "epoch": 0.3778496181929658, "grad_norm": 0.6333464980125427, "learning_rate": 7.02584406415171e-05, "loss": 1.9974, "step": 6779 }, { "epoch": 0.377905356446129, "grad_norm": 0.5379152297973633, "learning_rate": 7.02502996944922e-05, "loss": 1.5211, "step": 6780 }, { "epoch": 0.3779610946992921, "grad_norm": 0.5208351016044617, "learning_rate": 7.024215810524586e-05, "loss": 1.7317, "step": 6781 }, { "epoch": 0.37801683295245525, "grad_norm": 0.5434418320655823, "learning_rate": 7.023401587403629e-05, "loss": 1.6749, "step": 6782 }, { "epoch": 0.37807257120561844, "grad_norm": 0.5639735460281372, "learning_rate": 7.022587300112171e-05, "loss": 1.7105, "step": 6783 }, { "epoch": 0.37812830945878156, "grad_norm": 0.600032389163971, "learning_rate": 7.021772948676037e-05, "loss": 1.8057, "step": 6784 }, { "epoch": 0.3781840477119447, "grad_norm": 0.5152847766876221, "learning_rate": 7.020958533121051e-05, "loss": 1.6275, "step": 6785 }, { "epoch": 0.3782397859651079, "grad_norm": 0.5553915500640869, "learning_rate": 7.020144053473044e-05, "loss": 1.786, "step": 6786 }, { "epoch": 0.378295524218271, "grad_norm": 0.5452811121940613, "learning_rate": 7.019329509757845e-05, "loss": 1.6452, "step": 6787 }, { "epoch": 0.37835126247143414, "grad_norm": 0.5100104212760925, "learning_rate": 7.01851490200129e-05, "loss": 1.5128, "step": 6788 }, { "epoch": 0.37840700072459726, "grad_norm": 0.6309191584587097, "learning_rate": 7.017700230229208e-05, "loss": 1.4683, "step": 6789 }, { "epoch": 0.37846273897776045, "grad_norm": 0.5344750881195068, "learning_rate": 7.01688549446744e-05, "loss": 1.6131, "step": 6790 }, { "epoch": 0.3785184772309236, "grad_norm": 0.5286291837692261, "learning_rate": 7.016070694741824e-05, "loss": 1.6499, "step": 6791 }, { "epoch": 0.3785742154840867, "grad_norm": 0.5597365498542786, "learning_rate": 7.015255831078201e-05, "loss": 1.6677, "step": 6792 }, { "epoch": 0.3786299537372499, "grad_norm": 0.5482022166252136, "learning_rate": 7.01444090350241e-05, "loss": 1.6498, "step": 6793 }, { "epoch": 0.378685691990413, "grad_norm": 0.6198036670684814, "learning_rate": 7.0136259120403e-05, "loss": 1.8393, "step": 6794 }, { "epoch": 0.37874143024357615, "grad_norm": 0.555736243724823, "learning_rate": 7.012810856717717e-05, "loss": 1.5817, "step": 6795 }, { "epoch": 0.37879716849673933, "grad_norm": 0.5894885659217834, "learning_rate": 7.011995737560507e-05, "loss": 1.736, "step": 6796 }, { "epoch": 0.37885290674990246, "grad_norm": 0.5784539580345154, "learning_rate": 7.011180554594525e-05, "loss": 1.7195, "step": 6797 }, { "epoch": 0.3789086450030656, "grad_norm": 0.5761838555335999, "learning_rate": 7.010365307845621e-05, "loss": 1.5784, "step": 6798 }, { "epoch": 0.3789643832562288, "grad_norm": 0.5359389185905457, "learning_rate": 7.00954999733965e-05, "loss": 1.4703, "step": 6799 }, { "epoch": 0.3790201215093919, "grad_norm": 0.5606504678726196, "learning_rate": 7.008734623102471e-05, "loss": 1.7026, "step": 6800 }, { "epoch": 0.37907585976255503, "grad_norm": 0.5452861785888672, "learning_rate": 7.007919185159942e-05, "loss": 1.6358, "step": 6801 }, { "epoch": 0.37913159801571816, "grad_norm": 0.533334493637085, "learning_rate": 7.007103683537922e-05, "loss": 1.5224, "step": 6802 }, { "epoch": 0.37918733626888135, "grad_norm": 0.5216323137283325, "learning_rate": 7.006288118262277e-05, "loss": 1.5611, "step": 6803 }, { "epoch": 0.3792430745220445, "grad_norm": 0.6083248853683472, "learning_rate": 7.005472489358868e-05, "loss": 1.9112, "step": 6804 }, { "epoch": 0.3792988127752076, "grad_norm": 0.5337701439857483, "learning_rate": 7.004656796853565e-05, "loss": 1.678, "step": 6805 }, { "epoch": 0.3793545510283708, "grad_norm": 0.5296239256858826, "learning_rate": 7.003841040772237e-05, "loss": 1.6372, "step": 6806 }, { "epoch": 0.3794102892815339, "grad_norm": 0.5512758493423462, "learning_rate": 7.003025221140754e-05, "loss": 1.7838, "step": 6807 }, { "epoch": 0.37946602753469705, "grad_norm": 0.5666672587394714, "learning_rate": 7.00220933798499e-05, "loss": 1.8518, "step": 6808 }, { "epoch": 0.37952176578786023, "grad_norm": 0.5516249537467957, "learning_rate": 7.001393391330819e-05, "loss": 1.4928, "step": 6809 }, { "epoch": 0.37957750404102336, "grad_norm": 0.5139819979667664, "learning_rate": 7.000577381204118e-05, "loss": 1.5464, "step": 6810 }, { "epoch": 0.3796332422941865, "grad_norm": 0.5297854542732239, "learning_rate": 6.999761307630767e-05, "loss": 1.4929, "step": 6811 }, { "epoch": 0.3796889805473496, "grad_norm": 0.5862724184989929, "learning_rate": 6.998945170636647e-05, "loss": 1.8435, "step": 6812 }, { "epoch": 0.3797447188005128, "grad_norm": 0.5517110228538513, "learning_rate": 6.998128970247641e-05, "loss": 1.5962, "step": 6813 }, { "epoch": 0.37980045705367593, "grad_norm": 0.5306249260902405, "learning_rate": 6.997312706489634e-05, "loss": 1.4978, "step": 6814 }, { "epoch": 0.37985619530683906, "grad_norm": 0.5715779662132263, "learning_rate": 6.996496379388512e-05, "loss": 1.7663, "step": 6815 }, { "epoch": 0.37991193356000225, "grad_norm": 0.5692317485809326, "learning_rate": 6.995679988970167e-05, "loss": 1.9011, "step": 6816 }, { "epoch": 0.3799676718131654, "grad_norm": 0.5604211091995239, "learning_rate": 6.994863535260488e-05, "loss": 1.5928, "step": 6817 }, { "epoch": 0.3800234100663285, "grad_norm": 0.5591232776641846, "learning_rate": 6.994047018285368e-05, "loss": 1.6347, "step": 6818 }, { "epoch": 0.3800791483194917, "grad_norm": 0.515835702419281, "learning_rate": 6.993230438070702e-05, "loss": 1.4441, "step": 6819 }, { "epoch": 0.3801348865726548, "grad_norm": 0.5194911360740662, "learning_rate": 6.99241379464239e-05, "loss": 1.6603, "step": 6820 }, { "epoch": 0.38019062482581795, "grad_norm": 0.49259036779403687, "learning_rate": 6.991597088026327e-05, "loss": 1.5785, "step": 6821 }, { "epoch": 0.38024636307898113, "grad_norm": 0.5865880846977234, "learning_rate": 6.990780318248416e-05, "loss": 1.7017, "step": 6822 }, { "epoch": 0.38030210133214426, "grad_norm": 0.532753050327301, "learning_rate": 6.989963485334562e-05, "loss": 1.7205, "step": 6823 }, { "epoch": 0.3803578395853074, "grad_norm": 0.6024113297462463, "learning_rate": 6.989146589310667e-05, "loss": 1.8499, "step": 6824 }, { "epoch": 0.3804135778384705, "grad_norm": 0.5912168622016907, "learning_rate": 6.988329630202641e-05, "loss": 1.7783, "step": 6825 }, { "epoch": 0.3804693160916337, "grad_norm": 0.5647505521774292, "learning_rate": 6.98751260803639e-05, "loss": 1.6106, "step": 6826 }, { "epoch": 0.38052505434479683, "grad_norm": 0.5149972438812256, "learning_rate": 6.98669552283783e-05, "loss": 1.652, "step": 6827 }, { "epoch": 0.38058079259795996, "grad_norm": 0.5642407536506653, "learning_rate": 6.98587837463287e-05, "loss": 1.6075, "step": 6828 }, { "epoch": 0.38063653085112314, "grad_norm": 0.6054338812828064, "learning_rate": 6.985061163447426e-05, "loss": 1.7205, "step": 6829 }, { "epoch": 0.3806922691042863, "grad_norm": 0.5490162372589111, "learning_rate": 6.984243889307415e-05, "loss": 1.605, "step": 6830 }, { "epoch": 0.3807480073574494, "grad_norm": 0.5481693744659424, "learning_rate": 6.983426552238756e-05, "loss": 1.6532, "step": 6831 }, { "epoch": 0.3808037456106126, "grad_norm": 0.5470540523529053, "learning_rate": 6.982609152267374e-05, "loss": 1.856, "step": 6832 }, { "epoch": 0.3808594838637757, "grad_norm": 0.5047014355659485, "learning_rate": 6.981791689419186e-05, "loss": 1.5632, "step": 6833 }, { "epoch": 0.38091522211693885, "grad_norm": 0.5213363766670227, "learning_rate": 6.980974163720123e-05, "loss": 1.648, "step": 6834 }, { "epoch": 0.380970960370102, "grad_norm": 0.5108797550201416, "learning_rate": 6.980156575196107e-05, "loss": 1.7048, "step": 6835 }, { "epoch": 0.38102669862326516, "grad_norm": 0.5571927428245544, "learning_rate": 6.979338923873073e-05, "loss": 1.7984, "step": 6836 }, { "epoch": 0.3810824368764283, "grad_norm": 0.5656031966209412, "learning_rate": 6.978521209776945e-05, "loss": 1.6214, "step": 6837 }, { "epoch": 0.3811381751295914, "grad_norm": 0.5520498752593994, "learning_rate": 6.977703432933661e-05, "loss": 1.5048, "step": 6838 }, { "epoch": 0.3811939133827546, "grad_norm": 0.5377273559570312, "learning_rate": 6.976885593369155e-05, "loss": 1.4111, "step": 6839 }, { "epoch": 0.38124965163591773, "grad_norm": 0.5396257042884827, "learning_rate": 6.976067691109365e-05, "loss": 1.6715, "step": 6840 }, { "epoch": 0.38130538988908086, "grad_norm": 0.5259842872619629, "learning_rate": 6.975249726180227e-05, "loss": 1.586, "step": 6841 }, { "epoch": 0.38136112814224404, "grad_norm": 0.5793870091438293, "learning_rate": 6.974431698607686e-05, "loss": 1.8532, "step": 6842 }, { "epoch": 0.3814168663954072, "grad_norm": 0.6075243353843689, "learning_rate": 6.973613608417683e-05, "loss": 1.8658, "step": 6843 }, { "epoch": 0.3814726046485703, "grad_norm": 0.5244048833847046, "learning_rate": 6.972795455636163e-05, "loss": 1.5298, "step": 6844 }, { "epoch": 0.3815283429017335, "grad_norm": 0.5625903010368347, "learning_rate": 6.971977240289073e-05, "loss": 1.7494, "step": 6845 }, { "epoch": 0.3815840811548966, "grad_norm": 0.5776612758636475, "learning_rate": 6.971158962402362e-05, "loss": 1.9495, "step": 6846 }, { "epoch": 0.38163981940805974, "grad_norm": 0.5811514258384705, "learning_rate": 6.970340622001983e-05, "loss": 1.6167, "step": 6847 }, { "epoch": 0.3816955576612229, "grad_norm": 0.5879440307617188, "learning_rate": 6.969522219113886e-05, "loss": 1.7636, "step": 6848 }, { "epoch": 0.38175129591438606, "grad_norm": 0.6386079788208008, "learning_rate": 6.968703753764027e-05, "loss": 1.779, "step": 6849 }, { "epoch": 0.3818070341675492, "grad_norm": 0.5324746966362, "learning_rate": 6.967885225978365e-05, "loss": 1.5693, "step": 6850 }, { "epoch": 0.3818627724207123, "grad_norm": 0.6155705451965332, "learning_rate": 6.967066635782855e-05, "loss": 1.8075, "step": 6851 }, { "epoch": 0.3819185106738755, "grad_norm": 0.5880451202392578, "learning_rate": 6.966247983203462e-05, "loss": 1.8192, "step": 6852 }, { "epoch": 0.38197424892703863, "grad_norm": 0.5279741287231445, "learning_rate": 6.965429268266147e-05, "loss": 1.5787, "step": 6853 }, { "epoch": 0.38202998718020176, "grad_norm": 0.5816035270690918, "learning_rate": 6.964610490996874e-05, "loss": 1.7935, "step": 6854 }, { "epoch": 0.38208572543336494, "grad_norm": 0.5708805918693542, "learning_rate": 6.963791651421612e-05, "loss": 1.6204, "step": 6855 }, { "epoch": 0.38214146368652807, "grad_norm": 0.5362871885299683, "learning_rate": 6.962972749566326e-05, "loss": 1.6198, "step": 6856 }, { "epoch": 0.3821972019396912, "grad_norm": 0.5008870363235474, "learning_rate": 6.962153785456991e-05, "loss": 1.3949, "step": 6857 }, { "epoch": 0.38225294019285433, "grad_norm": 0.5772041082382202, "learning_rate": 6.961334759119577e-05, "loss": 1.7137, "step": 6858 }, { "epoch": 0.3823086784460175, "grad_norm": 0.5443426966667175, "learning_rate": 6.960515670580061e-05, "loss": 1.809, "step": 6859 }, { "epoch": 0.38236441669918064, "grad_norm": 0.6082087755203247, "learning_rate": 6.959696519864418e-05, "loss": 1.8777, "step": 6860 }, { "epoch": 0.38242015495234377, "grad_norm": 0.5430213809013367, "learning_rate": 6.958877306998627e-05, "loss": 1.7168, "step": 6861 }, { "epoch": 0.38247589320550696, "grad_norm": 0.5611394047737122, "learning_rate": 6.95805803200867e-05, "loss": 1.7136, "step": 6862 }, { "epoch": 0.3825316314586701, "grad_norm": 0.5467121005058289, "learning_rate": 6.957238694920527e-05, "loss": 1.7348, "step": 6863 }, { "epoch": 0.3825873697118332, "grad_norm": 0.5907519459724426, "learning_rate": 6.956419295760184e-05, "loss": 1.8087, "step": 6864 }, { "epoch": 0.3826431079649964, "grad_norm": 0.4940342307090759, "learning_rate": 6.95559983455363e-05, "loss": 1.226, "step": 6865 }, { "epoch": 0.3826988462181595, "grad_norm": 0.525205135345459, "learning_rate": 6.954780311326849e-05, "loss": 1.6166, "step": 6866 }, { "epoch": 0.38275458447132266, "grad_norm": 0.5510271191596985, "learning_rate": 6.953960726105835e-05, "loss": 1.6143, "step": 6867 }, { "epoch": 0.38281032272448584, "grad_norm": 0.5778586268424988, "learning_rate": 6.953141078916578e-05, "loss": 1.8417, "step": 6868 }, { "epoch": 0.38286606097764897, "grad_norm": 0.5931724309921265, "learning_rate": 6.952321369785075e-05, "loss": 1.6908, "step": 6869 }, { "epoch": 0.3829217992308121, "grad_norm": 0.5995519161224365, "learning_rate": 6.951501598737318e-05, "loss": 1.9328, "step": 6870 }, { "epoch": 0.38297753748397523, "grad_norm": 0.5441159009933472, "learning_rate": 6.95068176579931e-05, "loss": 1.7226, "step": 6871 }, { "epoch": 0.3830332757371384, "grad_norm": 0.5795645117759705, "learning_rate": 6.94986187099705e-05, "loss": 1.8162, "step": 6872 }, { "epoch": 0.38308901399030154, "grad_norm": 0.5668213367462158, "learning_rate": 6.949041914356541e-05, "loss": 1.5981, "step": 6873 }, { "epoch": 0.38314475224346467, "grad_norm": 0.6034721732139587, "learning_rate": 6.948221895903784e-05, "loss": 1.688, "step": 6874 }, { "epoch": 0.38320049049662785, "grad_norm": 0.5386607050895691, "learning_rate": 6.94740181566479e-05, "loss": 1.6411, "step": 6875 }, { "epoch": 0.383256228749791, "grad_norm": 0.5482555627822876, "learning_rate": 6.946581673665561e-05, "loss": 1.3411, "step": 6876 }, { "epoch": 0.3833119670029541, "grad_norm": 0.5288286805152893, "learning_rate": 6.945761469932114e-05, "loss": 1.5896, "step": 6877 }, { "epoch": 0.3833677052561173, "grad_norm": 0.5721820592880249, "learning_rate": 6.944941204490456e-05, "loss": 1.7555, "step": 6878 }, { "epoch": 0.3834234435092804, "grad_norm": 0.5338029861450195, "learning_rate": 6.944120877366604e-05, "loss": 1.8117, "step": 6879 }, { "epoch": 0.38347918176244356, "grad_norm": 0.5430106520652771, "learning_rate": 6.943300488586572e-05, "loss": 1.5363, "step": 6880 }, { "epoch": 0.3835349200156067, "grad_norm": 0.5485236644744873, "learning_rate": 6.942480038176379e-05, "loss": 1.4549, "step": 6881 }, { "epoch": 0.38359065826876987, "grad_norm": 0.5767553448677063, "learning_rate": 6.941659526162045e-05, "loss": 1.5041, "step": 6882 }, { "epoch": 0.383646396521933, "grad_norm": 0.5788490176200867, "learning_rate": 6.940838952569589e-05, "loss": 1.8509, "step": 6883 }, { "epoch": 0.3837021347750961, "grad_norm": 0.5562904477119446, "learning_rate": 6.94001831742504e-05, "loss": 1.6337, "step": 6884 }, { "epoch": 0.3837578730282593, "grad_norm": 0.5514802932739258, "learning_rate": 6.939197620754419e-05, "loss": 1.6887, "step": 6885 }, { "epoch": 0.38381361128142244, "grad_norm": 0.6278872489929199, "learning_rate": 6.938376862583757e-05, "loss": 1.6762, "step": 6886 }, { "epoch": 0.38386934953458557, "grad_norm": 0.5348507761955261, "learning_rate": 6.937556042939083e-05, "loss": 1.5778, "step": 6887 }, { "epoch": 0.38392508778774875, "grad_norm": 0.555674135684967, "learning_rate": 6.936735161846429e-05, "loss": 1.6806, "step": 6888 }, { "epoch": 0.3839808260409119, "grad_norm": 0.5161069631576538, "learning_rate": 6.935914219331825e-05, "loss": 1.5607, "step": 6889 }, { "epoch": 0.384036564294075, "grad_norm": 0.5375397205352783, "learning_rate": 6.93509321542131e-05, "loss": 1.6835, "step": 6890 }, { "epoch": 0.3840923025472382, "grad_norm": 0.4695841073989868, "learning_rate": 6.934272150140921e-05, "loss": 1.3228, "step": 6891 }, { "epoch": 0.3841480408004013, "grad_norm": 0.5479111075401306, "learning_rate": 6.933451023516697e-05, "loss": 1.6331, "step": 6892 }, { "epoch": 0.38420377905356445, "grad_norm": 0.5705395936965942, "learning_rate": 6.932629835574679e-05, "loss": 1.7666, "step": 6893 }, { "epoch": 0.3842595173067276, "grad_norm": 0.5568275451660156, "learning_rate": 6.93180858634091e-05, "loss": 1.5809, "step": 6894 }, { "epoch": 0.38431525555989077, "grad_norm": 0.6088882088661194, "learning_rate": 6.930987275841439e-05, "loss": 1.7695, "step": 6895 }, { "epoch": 0.3843709938130539, "grad_norm": 0.5949798822402954, "learning_rate": 6.930165904102305e-05, "loss": 1.8917, "step": 6896 }, { "epoch": 0.384426732066217, "grad_norm": 0.557823657989502, "learning_rate": 6.929344471149566e-05, "loss": 1.8922, "step": 6897 }, { "epoch": 0.3844824703193802, "grad_norm": 0.5406614542007446, "learning_rate": 6.928522977009268e-05, "loss": 1.6488, "step": 6898 }, { "epoch": 0.38453820857254334, "grad_norm": 0.5692750811576843, "learning_rate": 6.927701421707466e-05, "loss": 1.6886, "step": 6899 }, { "epoch": 0.38459394682570647, "grad_norm": 0.5827295780181885, "learning_rate": 6.926879805270212e-05, "loss": 1.6532, "step": 6900 }, { "epoch": 0.38464968507886965, "grad_norm": 0.5955531001091003, "learning_rate": 6.926058127723568e-05, "loss": 1.6202, "step": 6901 }, { "epoch": 0.3847054233320328, "grad_norm": 0.5544630885124207, "learning_rate": 6.925236389093588e-05, "loss": 1.1835, "step": 6902 }, { "epoch": 0.3847611615851959, "grad_norm": 0.6354855298995972, "learning_rate": 6.924414589406335e-05, "loss": 1.9214, "step": 6903 }, { "epoch": 0.38481689983835904, "grad_norm": 0.6088757514953613, "learning_rate": 6.923592728687871e-05, "loss": 1.8236, "step": 6904 }, { "epoch": 0.3848726380915222, "grad_norm": 0.5689512491226196, "learning_rate": 6.922770806964263e-05, "loss": 1.5128, "step": 6905 }, { "epoch": 0.38492837634468535, "grad_norm": 0.5286409854888916, "learning_rate": 6.921948824261573e-05, "loss": 1.5956, "step": 6906 }, { "epoch": 0.3849841145978485, "grad_norm": 0.5316895842552185, "learning_rate": 6.921126780605873e-05, "loss": 1.5846, "step": 6907 }, { "epoch": 0.38503985285101167, "grad_norm": 0.5461425185203552, "learning_rate": 6.920304676023233e-05, "loss": 1.6645, "step": 6908 }, { "epoch": 0.3850955911041748, "grad_norm": 0.5628203749656677, "learning_rate": 6.919482510539723e-05, "loss": 1.6028, "step": 6909 }, { "epoch": 0.3851513293573379, "grad_norm": 0.5715482234954834, "learning_rate": 6.918660284181421e-05, "loss": 1.7378, "step": 6910 }, { "epoch": 0.3852070676105011, "grad_norm": 0.6020052433013916, "learning_rate": 6.9178379969744e-05, "loss": 1.8591, "step": 6911 }, { "epoch": 0.38526280586366424, "grad_norm": 0.5738694071769714, "learning_rate": 6.917015648944741e-05, "loss": 1.527, "step": 6912 }, { "epoch": 0.38531854411682737, "grad_norm": 0.5757240653038025, "learning_rate": 6.916193240118522e-05, "loss": 1.6982, "step": 6913 }, { "epoch": 0.38537428236999055, "grad_norm": 0.5647144913673401, "learning_rate": 6.915370770521825e-05, "loss": 1.6709, "step": 6914 }, { "epoch": 0.3854300206231537, "grad_norm": 0.5539698004722595, "learning_rate": 6.914548240180736e-05, "loss": 1.8178, "step": 6915 }, { "epoch": 0.3854857588763168, "grad_norm": 0.5621739625930786, "learning_rate": 6.913725649121337e-05, "loss": 1.8038, "step": 6916 }, { "epoch": 0.38554149712947994, "grad_norm": 0.5707613229751587, "learning_rate": 6.91290299736972e-05, "loss": 1.7155, "step": 6917 }, { "epoch": 0.3855972353826431, "grad_norm": 0.5707844495773315, "learning_rate": 6.912080284951972e-05, "loss": 1.7316, "step": 6918 }, { "epoch": 0.38565297363580625, "grad_norm": 0.5531010627746582, "learning_rate": 6.911257511894188e-05, "loss": 1.7607, "step": 6919 }, { "epoch": 0.3857087118889694, "grad_norm": 0.6005899906158447, "learning_rate": 6.910434678222457e-05, "loss": 1.8731, "step": 6920 }, { "epoch": 0.38576445014213256, "grad_norm": 0.5527727603912354, "learning_rate": 6.909611783962877e-05, "loss": 1.3704, "step": 6921 }, { "epoch": 0.3858201883952957, "grad_norm": 0.5586572885513306, "learning_rate": 6.908788829141544e-05, "loss": 1.6253, "step": 6922 }, { "epoch": 0.3858759266484588, "grad_norm": 0.6035952568054199, "learning_rate": 6.907965813784558e-05, "loss": 1.9226, "step": 6923 }, { "epoch": 0.385931664901622, "grad_norm": 0.5370834469795227, "learning_rate": 6.907142737918023e-05, "loss": 1.5934, "step": 6924 }, { "epoch": 0.38598740315478514, "grad_norm": 0.5954363346099854, "learning_rate": 6.906319601568038e-05, "loss": 1.8197, "step": 6925 }, { "epoch": 0.38604314140794826, "grad_norm": 0.5880860686302185, "learning_rate": 6.90549640476071e-05, "loss": 1.9775, "step": 6926 }, { "epoch": 0.3860988796611114, "grad_norm": 0.6047815084457397, "learning_rate": 6.904673147522147e-05, "loss": 1.9008, "step": 6927 }, { "epoch": 0.3861546179142746, "grad_norm": 0.6101181507110596, "learning_rate": 6.903849829878457e-05, "loss": 1.9632, "step": 6928 }, { "epoch": 0.3862103561674377, "grad_norm": 0.5670501589775085, "learning_rate": 6.903026451855748e-05, "loss": 1.7489, "step": 6929 }, { "epoch": 0.38626609442060084, "grad_norm": 0.6123764514923096, "learning_rate": 6.902203013480137e-05, "loss": 1.7719, "step": 6930 }, { "epoch": 0.386321832673764, "grad_norm": 0.53583824634552, "learning_rate": 6.901379514777739e-05, "loss": 1.5504, "step": 6931 }, { "epoch": 0.38637757092692715, "grad_norm": 0.5257768630981445, "learning_rate": 6.900555955774666e-05, "loss": 1.6045, "step": 6932 }, { "epoch": 0.3864333091800903, "grad_norm": 0.5276762843132019, "learning_rate": 6.899732336497038e-05, "loss": 1.7366, "step": 6933 }, { "epoch": 0.38648904743325346, "grad_norm": 0.555980384349823, "learning_rate": 6.898908656970979e-05, "loss": 1.3954, "step": 6934 }, { "epoch": 0.3865447856864166, "grad_norm": 0.5937703847885132, "learning_rate": 6.898084917222609e-05, "loss": 1.791, "step": 6935 }, { "epoch": 0.3866005239395797, "grad_norm": 0.5324926376342773, "learning_rate": 6.89726111727805e-05, "loss": 1.7835, "step": 6936 }, { "epoch": 0.3866562621927429, "grad_norm": 0.569644033908844, "learning_rate": 6.896437257163432e-05, "loss": 1.651, "step": 6937 }, { "epoch": 0.38671200044590603, "grad_norm": 0.5893319249153137, "learning_rate": 6.89561333690488e-05, "loss": 1.8836, "step": 6938 }, { "epoch": 0.38676773869906916, "grad_norm": 0.5247541666030884, "learning_rate": 6.894789356528526e-05, "loss": 1.5643, "step": 6939 }, { "epoch": 0.3868234769522323, "grad_norm": 0.5343844890594482, "learning_rate": 6.893965316060501e-05, "loss": 1.6483, "step": 6940 }, { "epoch": 0.3868792152053955, "grad_norm": 0.5714672803878784, "learning_rate": 6.893141215526938e-05, "loss": 1.5949, "step": 6941 }, { "epoch": 0.3869349534585586, "grad_norm": 0.5850149989128113, "learning_rate": 6.892317054953975e-05, "loss": 1.7971, "step": 6942 }, { "epoch": 0.38699069171172173, "grad_norm": 0.570669412612915, "learning_rate": 6.891492834367746e-05, "loss": 1.8339, "step": 6943 }, { "epoch": 0.3870464299648849, "grad_norm": 0.5296490788459778, "learning_rate": 6.890668553794392e-05, "loss": 1.6175, "step": 6944 }, { "epoch": 0.38710216821804805, "grad_norm": 0.5491392612457275, "learning_rate": 6.889844213260057e-05, "loss": 1.7679, "step": 6945 }, { "epoch": 0.3871579064712112, "grad_norm": 0.5886465907096863, "learning_rate": 6.88901981279088e-05, "loss": 1.5769, "step": 6946 }, { "epoch": 0.38721364472437436, "grad_norm": 0.5220004916191101, "learning_rate": 6.88819535241301e-05, "loss": 1.4678, "step": 6947 }, { "epoch": 0.3872693829775375, "grad_norm": 0.5555586814880371, "learning_rate": 6.887370832152592e-05, "loss": 1.6784, "step": 6948 }, { "epoch": 0.3873251212307006, "grad_norm": 0.5332651138305664, "learning_rate": 6.886546252035775e-05, "loss": 1.6139, "step": 6949 }, { "epoch": 0.38738085948386375, "grad_norm": 0.5473794341087341, "learning_rate": 6.88572161208871e-05, "loss": 1.8137, "step": 6950 }, { "epoch": 0.38743659773702693, "grad_norm": 0.5803813934326172, "learning_rate": 6.88489691233755e-05, "loss": 1.5237, "step": 6951 }, { "epoch": 0.38749233599019006, "grad_norm": 0.5329601168632507, "learning_rate": 6.884072152808451e-05, "loss": 1.686, "step": 6952 }, { "epoch": 0.3875480742433532, "grad_norm": 0.5633809566497803, "learning_rate": 6.883247333527567e-05, "loss": 1.9771, "step": 6953 }, { "epoch": 0.3876038124965164, "grad_norm": 0.6174986958503723, "learning_rate": 6.882422454521058e-05, "loss": 1.7549, "step": 6954 }, { "epoch": 0.3876595507496795, "grad_norm": 0.5496551394462585, "learning_rate": 6.881597515815084e-05, "loss": 1.7045, "step": 6955 }, { "epoch": 0.38771528900284263, "grad_norm": 0.5577127933502197, "learning_rate": 6.880772517435807e-05, "loss": 1.5901, "step": 6956 }, { "epoch": 0.3877710272560058, "grad_norm": 0.5230315327644348, "learning_rate": 6.879947459409393e-05, "loss": 1.5849, "step": 6957 }, { "epoch": 0.38782676550916895, "grad_norm": 0.5241686105728149, "learning_rate": 6.879122341762003e-05, "loss": 1.8152, "step": 6958 }, { "epoch": 0.3878825037623321, "grad_norm": 0.5810775756835938, "learning_rate": 6.878297164519812e-05, "loss": 1.7573, "step": 6959 }, { "epoch": 0.38793824201549526, "grad_norm": 0.5543670058250427, "learning_rate": 6.877471927708985e-05, "loss": 1.7487, "step": 6960 }, { "epoch": 0.3879939802686584, "grad_norm": 0.5780448317527771, "learning_rate": 6.876646631355693e-05, "loss": 1.8512, "step": 6961 }, { "epoch": 0.3880497185218215, "grad_norm": 0.6595468521118164, "learning_rate": 6.875821275486113e-05, "loss": 2.1185, "step": 6962 }, { "epoch": 0.38810545677498465, "grad_norm": 0.5663919448852539, "learning_rate": 6.874995860126419e-05, "loss": 1.6607, "step": 6963 }, { "epoch": 0.38816119502814783, "grad_norm": 0.6084817051887512, "learning_rate": 6.874170385302789e-05, "loss": 1.4841, "step": 6964 }, { "epoch": 0.38821693328131096, "grad_norm": 0.5507417321205139, "learning_rate": 6.8733448510414e-05, "loss": 1.7557, "step": 6965 }, { "epoch": 0.3882726715344741, "grad_norm": 0.5766531825065613, "learning_rate": 6.872519257368437e-05, "loss": 1.7722, "step": 6966 }, { "epoch": 0.3883284097876373, "grad_norm": 0.5653195381164551, "learning_rate": 6.871693604310077e-05, "loss": 1.8058, "step": 6967 }, { "epoch": 0.3883841480408004, "grad_norm": 0.6037474274635315, "learning_rate": 6.87086789189251e-05, "loss": 1.8542, "step": 6968 }, { "epoch": 0.38843988629396353, "grad_norm": 0.5463787317276001, "learning_rate": 6.870042120141923e-05, "loss": 1.7221, "step": 6969 }, { "epoch": 0.3884956245471267, "grad_norm": 0.5135644674301147, "learning_rate": 6.869216289084503e-05, "loss": 1.5492, "step": 6970 }, { "epoch": 0.38855136280028985, "grad_norm": 0.5640287399291992, "learning_rate": 6.86839039874644e-05, "loss": 1.4507, "step": 6971 }, { "epoch": 0.388607101053453, "grad_norm": 0.5661764144897461, "learning_rate": 6.867564449153925e-05, "loss": 1.7683, "step": 6972 }, { "epoch": 0.3886628393066161, "grad_norm": 0.5671542882919312, "learning_rate": 6.866738440333157e-05, "loss": 1.7076, "step": 6973 }, { "epoch": 0.3887185775597793, "grad_norm": 0.5259964466094971, "learning_rate": 6.865912372310328e-05, "loss": 1.542, "step": 6974 }, { "epoch": 0.3887743158129424, "grad_norm": 0.5321882963180542, "learning_rate": 6.865086245111638e-05, "loss": 1.6909, "step": 6975 }, { "epoch": 0.38883005406610555, "grad_norm": 0.5812041759490967, "learning_rate": 6.864260058763286e-05, "loss": 1.8409, "step": 6976 }, { "epoch": 0.38888579231926873, "grad_norm": 0.5516645312309265, "learning_rate": 6.863433813291477e-05, "loss": 1.5931, "step": 6977 }, { "epoch": 0.38894153057243186, "grad_norm": 0.612776517868042, "learning_rate": 6.86260750872241e-05, "loss": 1.7741, "step": 6978 }, { "epoch": 0.388997268825595, "grad_norm": 0.5400133728981018, "learning_rate": 6.861781145082293e-05, "loss": 1.6731, "step": 6979 }, { "epoch": 0.3890530070787582, "grad_norm": 0.5253887176513672, "learning_rate": 6.860954722397332e-05, "loss": 1.6809, "step": 6980 }, { "epoch": 0.3891087453319213, "grad_norm": 0.5338975191116333, "learning_rate": 6.860128240693737e-05, "loss": 1.7078, "step": 6981 }, { "epoch": 0.38916448358508443, "grad_norm": 0.6083932518959045, "learning_rate": 6.85930169999772e-05, "loss": 1.7694, "step": 6982 }, { "epoch": 0.3892202218382476, "grad_norm": 0.5741243958473206, "learning_rate": 6.858475100335496e-05, "loss": 1.7516, "step": 6983 }, { "epoch": 0.38927596009141074, "grad_norm": 0.5835102200508118, "learning_rate": 6.857648441733275e-05, "loss": 1.7409, "step": 6984 }, { "epoch": 0.3893316983445739, "grad_norm": 0.5485714673995972, "learning_rate": 6.856821724217276e-05, "loss": 1.7237, "step": 6985 }, { "epoch": 0.389387436597737, "grad_norm": 0.5908092856407166, "learning_rate": 6.855994947813719e-05, "loss": 1.8842, "step": 6986 }, { "epoch": 0.3894431748509002, "grad_norm": 0.5635112524032593, "learning_rate": 6.855168112548823e-05, "loss": 1.8356, "step": 6987 }, { "epoch": 0.3894989131040633, "grad_norm": 0.6175239086151123, "learning_rate": 6.85434121844881e-05, "loss": 2.1173, "step": 6988 }, { "epoch": 0.38955465135722644, "grad_norm": 0.5377556085586548, "learning_rate": 6.853514265539907e-05, "loss": 1.6531, "step": 6989 }, { "epoch": 0.38961038961038963, "grad_norm": 0.5529573559761047, "learning_rate": 6.852687253848337e-05, "loss": 1.7125, "step": 6990 }, { "epoch": 0.38966612786355276, "grad_norm": 0.5733687877655029, "learning_rate": 6.85186018340033e-05, "loss": 1.8723, "step": 6991 }, { "epoch": 0.3897218661167159, "grad_norm": 0.5605233311653137, "learning_rate": 6.851033054222115e-05, "loss": 1.9066, "step": 6992 }, { "epoch": 0.38977760436987907, "grad_norm": 0.5196309089660645, "learning_rate": 6.850205866339923e-05, "loss": 1.6027, "step": 6993 }, { "epoch": 0.3898333426230422, "grad_norm": 0.5691904425621033, "learning_rate": 6.849378619779989e-05, "loss": 1.7806, "step": 6994 }, { "epoch": 0.38988908087620533, "grad_norm": 0.5791077017784119, "learning_rate": 6.848551314568548e-05, "loss": 1.8153, "step": 6995 }, { "epoch": 0.38994481912936846, "grad_norm": 0.5611302256584167, "learning_rate": 6.847723950731837e-05, "loss": 1.7705, "step": 6996 }, { "epoch": 0.39000055738253164, "grad_norm": 0.6004642248153687, "learning_rate": 6.846896528296094e-05, "loss": 1.6717, "step": 6997 }, { "epoch": 0.39005629563569477, "grad_norm": 0.5229793787002563, "learning_rate": 6.846069047287562e-05, "loss": 1.6567, "step": 6998 }, { "epoch": 0.3901120338888579, "grad_norm": 0.5206711888313293, "learning_rate": 6.845241507732483e-05, "loss": 1.3903, "step": 6999 }, { "epoch": 0.3901677721420211, "grad_norm": 0.6022440791130066, "learning_rate": 6.844413909657104e-05, "loss": 1.8607, "step": 7000 }, { "epoch": 0.3902235103951842, "grad_norm": 0.5634634494781494, "learning_rate": 6.843586253087666e-05, "loss": 1.6199, "step": 7001 }, { "epoch": 0.39027924864834734, "grad_norm": 0.5622709393501282, "learning_rate": 6.842758538050422e-05, "loss": 1.5923, "step": 7002 }, { "epoch": 0.39033498690151053, "grad_norm": 0.5336858034133911, "learning_rate": 6.841930764571623e-05, "loss": 1.6086, "step": 7003 }, { "epoch": 0.39039072515467366, "grad_norm": 0.6216438412666321, "learning_rate": 6.841102932677517e-05, "loss": 1.8973, "step": 7004 }, { "epoch": 0.3904464634078368, "grad_norm": 0.5596641898155212, "learning_rate": 6.840275042394363e-05, "loss": 1.4897, "step": 7005 }, { "epoch": 0.39050220166099997, "grad_norm": 0.5638755559921265, "learning_rate": 6.839447093748413e-05, "loss": 1.7267, "step": 7006 }, { "epoch": 0.3905579399141631, "grad_norm": 0.5759851932525635, "learning_rate": 6.838619086765925e-05, "loss": 1.9025, "step": 7007 }, { "epoch": 0.39061367816732623, "grad_norm": 0.5657535791397095, "learning_rate": 6.83779102147316e-05, "loss": 1.6509, "step": 7008 }, { "epoch": 0.39066941642048936, "grad_norm": 0.5276607275009155, "learning_rate": 6.83696289789638e-05, "loss": 1.6244, "step": 7009 }, { "epoch": 0.39072515467365254, "grad_norm": 0.6091243624687195, "learning_rate": 6.836134716061845e-05, "loss": 1.7403, "step": 7010 }, { "epoch": 0.39078089292681567, "grad_norm": 0.5518734455108643, "learning_rate": 6.835306475995823e-05, "loss": 1.6201, "step": 7011 }, { "epoch": 0.3908366311799788, "grad_norm": 0.5169443488121033, "learning_rate": 6.834478177724581e-05, "loss": 1.5593, "step": 7012 }, { "epoch": 0.390892369433142, "grad_norm": 0.5405734181404114, "learning_rate": 6.833649821274386e-05, "loss": 1.6275, "step": 7013 }, { "epoch": 0.3909481076863051, "grad_norm": 0.639498233795166, "learning_rate": 6.83282140667151e-05, "loss": 1.9288, "step": 7014 }, { "epoch": 0.39100384593946824, "grad_norm": 0.5509902238845825, "learning_rate": 6.831992933942225e-05, "loss": 1.6756, "step": 7015 }, { "epoch": 0.3910595841926314, "grad_norm": 0.6026686429977417, "learning_rate": 6.831164403112806e-05, "loss": 1.8422, "step": 7016 }, { "epoch": 0.39111532244579456, "grad_norm": 0.4942910969257355, "learning_rate": 6.830335814209527e-05, "loss": 1.407, "step": 7017 }, { "epoch": 0.3911710606989577, "grad_norm": 0.5921064615249634, "learning_rate": 6.829507167258671e-05, "loss": 1.7507, "step": 7018 }, { "epoch": 0.3912267989521208, "grad_norm": 0.5901893377304077, "learning_rate": 6.828678462286511e-05, "loss": 1.9612, "step": 7019 }, { "epoch": 0.391282537205284, "grad_norm": 0.5834552049636841, "learning_rate": 6.827849699319333e-05, "loss": 1.8656, "step": 7020 }, { "epoch": 0.3913382754584471, "grad_norm": 0.5791158080101013, "learning_rate": 6.827020878383418e-05, "loss": 1.6849, "step": 7021 }, { "epoch": 0.39139401371161026, "grad_norm": 0.6698895692825317, "learning_rate": 6.826191999505056e-05, "loss": 1.9619, "step": 7022 }, { "epoch": 0.39144975196477344, "grad_norm": 0.5854638814926147, "learning_rate": 6.82536306271053e-05, "loss": 1.6066, "step": 7023 }, { "epoch": 0.39150549021793657, "grad_norm": 0.5511733293533325, "learning_rate": 6.82453406802613e-05, "loss": 1.8761, "step": 7024 }, { "epoch": 0.3915612284710997, "grad_norm": 0.5574920177459717, "learning_rate": 6.823705015478148e-05, "loss": 1.494, "step": 7025 }, { "epoch": 0.3916169667242629, "grad_norm": 0.5293987989425659, "learning_rate": 6.822875905092876e-05, "loss": 1.4918, "step": 7026 }, { "epoch": 0.391672704977426, "grad_norm": 0.5626353621482849, "learning_rate": 6.822046736896607e-05, "loss": 1.7521, "step": 7027 }, { "epoch": 0.39172844323058914, "grad_norm": 0.5664160847663879, "learning_rate": 6.821217510915639e-05, "loss": 1.5782, "step": 7028 }, { "epoch": 0.3917841814837523, "grad_norm": 0.5288576483726501, "learning_rate": 6.820388227176271e-05, "loss": 1.4754, "step": 7029 }, { "epoch": 0.39183991973691545, "grad_norm": 0.5488860607147217, "learning_rate": 6.819558885704801e-05, "loss": 1.6245, "step": 7030 }, { "epoch": 0.3918956579900786, "grad_norm": 0.5747123956680298, "learning_rate": 6.818729486527533e-05, "loss": 1.7134, "step": 7031 }, { "epoch": 0.3919513962432417, "grad_norm": 0.5334782600402832, "learning_rate": 6.817900029670769e-05, "loss": 1.6473, "step": 7032 }, { "epoch": 0.3920071344964049, "grad_norm": 0.5332539081573486, "learning_rate": 6.817070515160815e-05, "loss": 1.4961, "step": 7033 }, { "epoch": 0.392062872749568, "grad_norm": 0.5700680017471313, "learning_rate": 6.816240943023977e-05, "loss": 1.8336, "step": 7034 }, { "epoch": 0.39211861100273115, "grad_norm": 0.5893431901931763, "learning_rate": 6.815411313286568e-05, "loss": 1.8517, "step": 7035 }, { "epoch": 0.39217434925589434, "grad_norm": 0.5954105854034424, "learning_rate": 6.814581625974897e-05, "loss": 1.8405, "step": 7036 }, { "epoch": 0.39223008750905747, "grad_norm": 0.5694375038146973, "learning_rate": 6.813751881115275e-05, "loss": 1.7636, "step": 7037 }, { "epoch": 0.3922858257622206, "grad_norm": 0.6035060286521912, "learning_rate": 6.812922078734019e-05, "loss": 1.8142, "step": 7038 }, { "epoch": 0.3923415640153838, "grad_norm": 0.6111207008361816, "learning_rate": 6.812092218857444e-05, "loss": 1.7048, "step": 7039 }, { "epoch": 0.3923973022685469, "grad_norm": 0.5596774220466614, "learning_rate": 6.811262301511869e-05, "loss": 1.652, "step": 7040 }, { "epoch": 0.39245304052171004, "grad_norm": 0.5244095921516418, "learning_rate": 6.810432326723615e-05, "loss": 1.325, "step": 7041 }, { "epoch": 0.39250877877487317, "grad_norm": 0.5797486305236816, "learning_rate": 6.809602294519004e-05, "loss": 1.7832, "step": 7042 }, { "epoch": 0.39256451702803635, "grad_norm": 0.5226321816444397, "learning_rate": 6.808772204924357e-05, "loss": 1.6449, "step": 7043 }, { "epoch": 0.3926202552811995, "grad_norm": 0.5220246911048889, "learning_rate": 6.807942057966003e-05, "loss": 1.6308, "step": 7044 }, { "epoch": 0.3926759935343626, "grad_norm": 0.7185441255569458, "learning_rate": 6.807111853670268e-05, "loss": 1.6675, "step": 7045 }, { "epoch": 0.3927317317875258, "grad_norm": 0.6072642803192139, "learning_rate": 6.806281592063481e-05, "loss": 1.8951, "step": 7046 }, { "epoch": 0.3927874700406889, "grad_norm": 0.5583004355430603, "learning_rate": 6.805451273171972e-05, "loss": 1.686, "step": 7047 }, { "epoch": 0.39284320829385205, "grad_norm": 0.5066385865211487, "learning_rate": 6.804620897022076e-05, "loss": 1.407, "step": 7048 }, { "epoch": 0.39289894654701524, "grad_norm": 0.5519012212753296, "learning_rate": 6.803790463640127e-05, "loss": 1.8137, "step": 7049 }, { "epoch": 0.39295468480017837, "grad_norm": 0.5573792457580566, "learning_rate": 6.802959973052461e-05, "loss": 1.7861, "step": 7050 }, { "epoch": 0.3930104230533415, "grad_norm": 0.5672924518585205, "learning_rate": 6.802129425285417e-05, "loss": 1.6572, "step": 7051 }, { "epoch": 0.3930661613065047, "grad_norm": 0.5737549066543579, "learning_rate": 6.801298820365333e-05, "loss": 1.7467, "step": 7052 }, { "epoch": 0.3931218995596678, "grad_norm": 0.5474954843521118, "learning_rate": 6.800468158318554e-05, "loss": 1.7429, "step": 7053 }, { "epoch": 0.39317763781283094, "grad_norm": 0.549497127532959, "learning_rate": 6.799637439171424e-05, "loss": 1.764, "step": 7054 }, { "epoch": 0.39323337606599407, "grad_norm": 0.5415019392967224, "learning_rate": 6.798806662950286e-05, "loss": 1.4691, "step": 7055 }, { "epoch": 0.39328911431915725, "grad_norm": 0.5431099534034729, "learning_rate": 6.797975829681487e-05, "loss": 1.5577, "step": 7056 }, { "epoch": 0.3933448525723204, "grad_norm": 0.549314558506012, "learning_rate": 6.79714493939138e-05, "loss": 1.7471, "step": 7057 }, { "epoch": 0.3934005908254835, "grad_norm": 0.5444470047950745, "learning_rate": 6.796313992106313e-05, "loss": 1.765, "step": 7058 }, { "epoch": 0.3934563290786467, "grad_norm": 0.57083660364151, "learning_rate": 6.795482987852638e-05, "loss": 1.9101, "step": 7059 }, { "epoch": 0.3935120673318098, "grad_norm": 0.5475842952728271, "learning_rate": 6.794651926656711e-05, "loss": 1.8193, "step": 7060 }, { "epoch": 0.39356780558497295, "grad_norm": 0.5259652733802795, "learning_rate": 6.793820808544891e-05, "loss": 1.3794, "step": 7061 }, { "epoch": 0.39362354383813614, "grad_norm": 0.5105850100517273, "learning_rate": 6.792989633543531e-05, "loss": 1.5634, "step": 7062 }, { "epoch": 0.39367928209129927, "grad_norm": 0.5771433711051941, "learning_rate": 6.792158401678994e-05, "loss": 1.6858, "step": 7063 }, { "epoch": 0.3937350203444624, "grad_norm": 0.5675138235092163, "learning_rate": 6.791327112977644e-05, "loss": 1.8272, "step": 7064 }, { "epoch": 0.3937907585976255, "grad_norm": 0.5633112788200378, "learning_rate": 6.790495767465839e-05, "loss": 1.7226, "step": 7065 }, { "epoch": 0.3938464968507887, "grad_norm": 0.5350648760795593, "learning_rate": 6.789664365169947e-05, "loss": 1.5082, "step": 7066 }, { "epoch": 0.39390223510395184, "grad_norm": 0.5656428337097168, "learning_rate": 6.788832906116338e-05, "loss": 1.4914, "step": 7067 }, { "epoch": 0.39395797335711497, "grad_norm": 0.5312878489494324, "learning_rate": 6.78800139033138e-05, "loss": 1.5864, "step": 7068 }, { "epoch": 0.39401371161027815, "grad_norm": 0.6321331262588501, "learning_rate": 6.787169817841442e-05, "loss": 1.9452, "step": 7069 }, { "epoch": 0.3940694498634413, "grad_norm": 0.5593883991241455, "learning_rate": 6.786338188672896e-05, "loss": 1.7637, "step": 7070 }, { "epoch": 0.3941251881166044, "grad_norm": 0.5405465960502625, "learning_rate": 6.785506502852118e-05, "loss": 1.6875, "step": 7071 }, { "epoch": 0.3941809263697676, "grad_norm": 0.5527162551879883, "learning_rate": 6.784674760405482e-05, "loss": 1.6496, "step": 7072 }, { "epoch": 0.3942366646229307, "grad_norm": 0.5357568264007568, "learning_rate": 6.78384296135937e-05, "loss": 1.7234, "step": 7073 }, { "epoch": 0.39429240287609385, "grad_norm": 0.5588380694389343, "learning_rate": 6.783011105740162e-05, "loss": 1.9166, "step": 7074 }, { "epoch": 0.39434814112925703, "grad_norm": 0.7392244338989258, "learning_rate": 6.782179193574234e-05, "loss": 1.6746, "step": 7075 }, { "epoch": 0.39440387938242016, "grad_norm": 0.5365987420082092, "learning_rate": 6.781347224887974e-05, "loss": 1.6615, "step": 7076 }, { "epoch": 0.3944596176355833, "grad_norm": 0.5493837594985962, "learning_rate": 6.780515199707766e-05, "loss": 1.7271, "step": 7077 }, { "epoch": 0.3945153558887464, "grad_norm": 0.5309239029884338, "learning_rate": 6.779683118059997e-05, "loss": 1.5172, "step": 7078 }, { "epoch": 0.3945710941419096, "grad_norm": 0.5167561769485474, "learning_rate": 6.778850979971057e-05, "loss": 1.5777, "step": 7079 }, { "epoch": 0.39462683239507274, "grad_norm": 0.5119823217391968, "learning_rate": 6.778018785467332e-05, "loss": 1.5685, "step": 7080 }, { "epoch": 0.39468257064823586, "grad_norm": 0.5578561425209045, "learning_rate": 6.777186534575222e-05, "loss": 1.6626, "step": 7081 }, { "epoch": 0.39473830890139905, "grad_norm": 0.535065233707428, "learning_rate": 6.776354227321114e-05, "loss": 1.5554, "step": 7082 }, { "epoch": 0.3947940471545622, "grad_norm": 0.5996119976043701, "learning_rate": 6.775521863731408e-05, "loss": 1.613, "step": 7083 }, { "epoch": 0.3948497854077253, "grad_norm": 0.5490982532501221, "learning_rate": 6.7746894438325e-05, "loss": 1.6554, "step": 7084 }, { "epoch": 0.3949055236608885, "grad_norm": 0.5607420802116394, "learning_rate": 6.773856967650789e-05, "loss": 1.7542, "step": 7085 }, { "epoch": 0.3949612619140516, "grad_norm": 0.594559907913208, "learning_rate": 6.773024435212678e-05, "loss": 1.8008, "step": 7086 }, { "epoch": 0.39501700016721475, "grad_norm": 0.5436771512031555, "learning_rate": 6.77219184654457e-05, "loss": 1.6853, "step": 7087 }, { "epoch": 0.3950727384203779, "grad_norm": 0.6430955529212952, "learning_rate": 6.771359201672868e-05, "loss": 1.877, "step": 7088 }, { "epoch": 0.39512847667354106, "grad_norm": 0.5667055249214172, "learning_rate": 6.770526500623982e-05, "loss": 1.5347, "step": 7089 }, { "epoch": 0.3951842149267042, "grad_norm": 0.5299628376960754, "learning_rate": 6.769693743424317e-05, "loss": 1.6611, "step": 7090 }, { "epoch": 0.3952399531798673, "grad_norm": 0.6088326573371887, "learning_rate": 6.768860930100285e-05, "loss": 1.991, "step": 7091 }, { "epoch": 0.3952956914330305, "grad_norm": 0.5899388790130615, "learning_rate": 6.768028060678296e-05, "loss": 1.8402, "step": 7092 }, { "epoch": 0.39535142968619363, "grad_norm": 0.5693525075912476, "learning_rate": 6.767195135184765e-05, "loss": 1.6969, "step": 7093 }, { "epoch": 0.39540716793935676, "grad_norm": 0.5347588658332825, "learning_rate": 6.766362153646111e-05, "loss": 1.6525, "step": 7094 }, { "epoch": 0.39546290619251995, "grad_norm": 0.5795377492904663, "learning_rate": 6.765529116088745e-05, "loss": 1.7744, "step": 7095 }, { "epoch": 0.3955186444456831, "grad_norm": 0.5230005979537964, "learning_rate": 6.764696022539091e-05, "loss": 1.6068, "step": 7096 }, { "epoch": 0.3955743826988462, "grad_norm": 0.5676483511924744, "learning_rate": 6.763862873023567e-05, "loss": 1.6501, "step": 7097 }, { "epoch": 0.3956301209520094, "grad_norm": 0.5104279518127441, "learning_rate": 6.763029667568597e-05, "loss": 1.5805, "step": 7098 }, { "epoch": 0.3956858592051725, "grad_norm": 0.575018048286438, "learning_rate": 6.762196406200604e-05, "loss": 1.7185, "step": 7099 }, { "epoch": 0.39574159745833565, "grad_norm": 0.5459030270576477, "learning_rate": 6.761363088946017e-05, "loss": 1.7264, "step": 7100 }, { "epoch": 0.3957973357114988, "grad_norm": 0.5303768515586853, "learning_rate": 6.760529715831262e-05, "loss": 1.6626, "step": 7101 }, { "epoch": 0.39585307396466196, "grad_norm": 0.5729551911354065, "learning_rate": 6.759696286882769e-05, "loss": 1.827, "step": 7102 }, { "epoch": 0.3959088122178251, "grad_norm": 0.578536331653595, "learning_rate": 6.758862802126969e-05, "loss": 1.8003, "step": 7103 }, { "epoch": 0.3959645504709882, "grad_norm": 0.5476341247558594, "learning_rate": 6.758029261590296e-05, "loss": 1.7641, "step": 7104 }, { "epoch": 0.3960202887241514, "grad_norm": 0.5585542917251587, "learning_rate": 6.757195665299186e-05, "loss": 1.6907, "step": 7105 }, { "epoch": 0.39607602697731453, "grad_norm": 0.5314999222755432, "learning_rate": 6.756362013280072e-05, "loss": 1.5457, "step": 7106 }, { "epoch": 0.39613176523047766, "grad_norm": 0.5275375247001648, "learning_rate": 6.755528305559398e-05, "loss": 1.6021, "step": 7107 }, { "epoch": 0.39618750348364085, "grad_norm": 0.5544595122337341, "learning_rate": 6.7546945421636e-05, "loss": 1.5837, "step": 7108 }, { "epoch": 0.396243241736804, "grad_norm": 0.6334085464477539, "learning_rate": 6.753860723119122e-05, "loss": 2.096, "step": 7109 }, { "epoch": 0.3962989799899671, "grad_norm": 0.5980644822120667, "learning_rate": 6.753026848452407e-05, "loss": 1.9298, "step": 7110 }, { "epoch": 0.39635471824313023, "grad_norm": 0.5179347991943359, "learning_rate": 6.752192918189902e-05, "loss": 1.702, "step": 7111 }, { "epoch": 0.3964104564962934, "grad_norm": 0.5576172471046448, "learning_rate": 6.751358932358052e-05, "loss": 1.6217, "step": 7112 }, { "epoch": 0.39646619474945655, "grad_norm": 0.5886361002922058, "learning_rate": 6.750524890983309e-05, "loss": 1.9734, "step": 7113 }, { "epoch": 0.3965219330026197, "grad_norm": 0.573229193687439, "learning_rate": 6.749690794092125e-05, "loss": 1.9415, "step": 7114 }, { "epoch": 0.39657767125578286, "grad_norm": 1.0474965572357178, "learning_rate": 6.748856641710948e-05, "loss": 2.0009, "step": 7115 }, { "epoch": 0.396633409508946, "grad_norm": 0.5304273366928101, "learning_rate": 6.748022433866236e-05, "loss": 1.7601, "step": 7116 }, { "epoch": 0.3966891477621091, "grad_norm": 0.5350653529167175, "learning_rate": 6.747188170584444e-05, "loss": 1.7173, "step": 7117 }, { "epoch": 0.3967448860152723, "grad_norm": 0.5216551423072815, "learning_rate": 6.746353851892028e-05, "loss": 1.7054, "step": 7118 }, { "epoch": 0.39680062426843543, "grad_norm": 0.5482343435287476, "learning_rate": 6.745519477815451e-05, "loss": 1.6456, "step": 7119 }, { "epoch": 0.39685636252159856, "grad_norm": 0.5794587135314941, "learning_rate": 6.744685048381174e-05, "loss": 1.7264, "step": 7120 }, { "epoch": 0.39691210077476174, "grad_norm": 0.5834348797798157, "learning_rate": 6.743850563615659e-05, "loss": 1.7025, "step": 7121 }, { "epoch": 0.3969678390279249, "grad_norm": 0.5380405187606812, "learning_rate": 6.743016023545373e-05, "loss": 1.5742, "step": 7122 }, { "epoch": 0.397023577281088, "grad_norm": 0.5725619792938232, "learning_rate": 6.742181428196777e-05, "loss": 1.8845, "step": 7123 }, { "epoch": 0.39707931553425113, "grad_norm": 0.5491376519203186, "learning_rate": 6.741346777596347e-05, "loss": 1.6998, "step": 7124 }, { "epoch": 0.3971350537874143, "grad_norm": 0.5111629962921143, "learning_rate": 6.74051207177055e-05, "loss": 1.4712, "step": 7125 }, { "epoch": 0.39719079204057745, "grad_norm": 0.5327715277671814, "learning_rate": 6.739677310745856e-05, "loss": 1.4259, "step": 7126 }, { "epoch": 0.3972465302937406, "grad_norm": 0.585437536239624, "learning_rate": 6.738842494548742e-05, "loss": 1.6437, "step": 7127 }, { "epoch": 0.39730226854690376, "grad_norm": 0.4905366599559784, "learning_rate": 6.738007623205682e-05, "loss": 1.537, "step": 7128 }, { "epoch": 0.3973580068000669, "grad_norm": 0.578807532787323, "learning_rate": 6.737172696743155e-05, "loss": 1.7359, "step": 7129 }, { "epoch": 0.39741374505323, "grad_norm": 0.5269452333450317, "learning_rate": 6.736337715187638e-05, "loss": 1.632, "step": 7130 }, { "epoch": 0.3974694833063932, "grad_norm": 0.6212645769119263, "learning_rate": 6.735502678565611e-05, "loss": 1.6633, "step": 7131 }, { "epoch": 0.39752522155955633, "grad_norm": 0.5281040668487549, "learning_rate": 6.734667586903557e-05, "loss": 1.6349, "step": 7132 }, { "epoch": 0.39758095981271946, "grad_norm": 0.6241141557693481, "learning_rate": 6.733832440227963e-05, "loss": 1.8522, "step": 7133 }, { "epoch": 0.3976366980658826, "grad_norm": 0.5351576805114746, "learning_rate": 6.732997238565311e-05, "loss": 1.8608, "step": 7134 }, { "epoch": 0.3976924363190458, "grad_norm": 0.6173853278160095, "learning_rate": 6.732161981942093e-05, "loss": 1.7628, "step": 7135 }, { "epoch": 0.3977481745722089, "grad_norm": 0.5938517451286316, "learning_rate": 6.731326670384794e-05, "loss": 1.7216, "step": 7136 }, { "epoch": 0.39780391282537203, "grad_norm": 0.5863813161849976, "learning_rate": 6.730491303919907e-05, "loss": 1.6816, "step": 7137 }, { "epoch": 0.3978596510785352, "grad_norm": 0.6825369596481323, "learning_rate": 6.729655882573928e-05, "loss": 1.9808, "step": 7138 }, { "epoch": 0.39791538933169834, "grad_norm": 0.5284822583198547, "learning_rate": 6.728820406373346e-05, "loss": 1.8237, "step": 7139 }, { "epoch": 0.3979711275848615, "grad_norm": 0.554270327091217, "learning_rate": 6.727984875344663e-05, "loss": 1.61, "step": 7140 }, { "epoch": 0.39802686583802466, "grad_norm": 0.6326965093612671, "learning_rate": 6.727149289514373e-05, "loss": 2.1011, "step": 7141 }, { "epoch": 0.3980826040911878, "grad_norm": 0.5701342225074768, "learning_rate": 6.72631364890898e-05, "loss": 1.6724, "step": 7142 }, { "epoch": 0.3981383423443509, "grad_norm": 0.5414735078811646, "learning_rate": 6.725477953554979e-05, "loss": 1.5425, "step": 7143 }, { "epoch": 0.3981940805975141, "grad_norm": 0.5954646468162537, "learning_rate": 6.72464220347888e-05, "loss": 1.6308, "step": 7144 }, { "epoch": 0.39824981885067723, "grad_norm": 0.6013423204421997, "learning_rate": 6.723806398707185e-05, "loss": 1.8022, "step": 7145 }, { "epoch": 0.39830555710384036, "grad_norm": 0.5645208954811096, "learning_rate": 6.722970539266403e-05, "loss": 1.4448, "step": 7146 }, { "epoch": 0.3983612953570035, "grad_norm": 0.6153306365013123, "learning_rate": 6.72213462518304e-05, "loss": 1.7358, "step": 7147 }, { "epoch": 0.39841703361016667, "grad_norm": 0.5638027191162109, "learning_rate": 6.721298656483608e-05, "loss": 1.4709, "step": 7148 }, { "epoch": 0.3984727718633298, "grad_norm": 0.5619633197784424, "learning_rate": 6.720462633194618e-05, "loss": 1.6085, "step": 7149 }, { "epoch": 0.39852851011649293, "grad_norm": 0.5597891211509705, "learning_rate": 6.719626555342585e-05, "loss": 1.8059, "step": 7150 }, { "epoch": 0.3985842483696561, "grad_norm": 0.5170794725418091, "learning_rate": 6.718790422954021e-05, "loss": 1.7492, "step": 7151 }, { "epoch": 0.39863998662281924, "grad_norm": 0.5071738362312317, "learning_rate": 6.717954236055449e-05, "loss": 1.6074, "step": 7152 }, { "epoch": 0.39869572487598237, "grad_norm": 0.5328095555305481, "learning_rate": 6.717117994673384e-05, "loss": 1.3657, "step": 7153 }, { "epoch": 0.39875146312914556, "grad_norm": 0.5484116673469543, "learning_rate": 6.716281698834346e-05, "loss": 1.6112, "step": 7154 }, { "epoch": 0.3988072013823087, "grad_norm": 0.5871725678443909, "learning_rate": 6.715445348564862e-05, "loss": 1.9087, "step": 7155 }, { "epoch": 0.3988629396354718, "grad_norm": 0.5913428068161011, "learning_rate": 6.714608943891452e-05, "loss": 2.0278, "step": 7156 }, { "epoch": 0.39891867788863494, "grad_norm": 0.5644116997718811, "learning_rate": 6.713772484840645e-05, "loss": 1.63, "step": 7157 }, { "epoch": 0.3989744161417981, "grad_norm": 0.5353809595108032, "learning_rate": 6.712935971438962e-05, "loss": 1.6313, "step": 7158 }, { "epoch": 0.39903015439496126, "grad_norm": 0.5755419731140137, "learning_rate": 6.712099403712942e-05, "loss": 1.7367, "step": 7159 }, { "epoch": 0.3990858926481244, "grad_norm": 0.5571795105934143, "learning_rate": 6.711262781689109e-05, "loss": 1.8337, "step": 7160 }, { "epoch": 0.39914163090128757, "grad_norm": 0.5910276174545288, "learning_rate": 6.710426105394e-05, "loss": 1.8474, "step": 7161 }, { "epoch": 0.3991973691544507, "grad_norm": 0.5713383555412292, "learning_rate": 6.709589374854144e-05, "loss": 1.4712, "step": 7162 }, { "epoch": 0.3992531074076138, "grad_norm": 0.6179262399673462, "learning_rate": 6.708752590096082e-05, "loss": 1.6399, "step": 7163 }, { "epoch": 0.399308845660777, "grad_norm": 0.5618530511856079, "learning_rate": 6.707915751146351e-05, "loss": 1.6822, "step": 7164 }, { "epoch": 0.39936458391394014, "grad_norm": 0.5299525260925293, "learning_rate": 6.70707885803149e-05, "loss": 1.4796, "step": 7165 }, { "epoch": 0.39942032216710327, "grad_norm": 0.5534185767173767, "learning_rate": 6.706241910778041e-05, "loss": 1.844, "step": 7166 }, { "epoch": 0.39947606042026645, "grad_norm": 0.5665568709373474, "learning_rate": 6.705404909412547e-05, "loss": 1.787, "step": 7167 }, { "epoch": 0.3995317986734296, "grad_norm": 0.6122377514839172, "learning_rate": 6.704567853961552e-05, "loss": 1.7695, "step": 7168 }, { "epoch": 0.3995875369265927, "grad_norm": 0.5161054730415344, "learning_rate": 6.703730744451601e-05, "loss": 1.5939, "step": 7169 }, { "epoch": 0.39964327517975584, "grad_norm": 0.569864809513092, "learning_rate": 6.702893580909247e-05, "loss": 1.7385, "step": 7170 }, { "epoch": 0.399699013432919, "grad_norm": 0.5484759211540222, "learning_rate": 6.702056363361036e-05, "loss": 1.6495, "step": 7171 }, { "epoch": 0.39975475168608215, "grad_norm": 0.5385055541992188, "learning_rate": 6.701219091833522e-05, "loss": 1.8867, "step": 7172 }, { "epoch": 0.3998104899392453, "grad_norm": 0.5519033074378967, "learning_rate": 6.700381766353255e-05, "loss": 1.7746, "step": 7173 }, { "epoch": 0.39986622819240847, "grad_norm": 0.6148980259895325, "learning_rate": 6.699544386946795e-05, "loss": 1.8656, "step": 7174 }, { "epoch": 0.3999219664455716, "grad_norm": 0.569527268409729, "learning_rate": 6.698706953640693e-05, "loss": 1.6071, "step": 7175 }, { "epoch": 0.3999777046987347, "grad_norm": 0.5626715421676636, "learning_rate": 6.697869466461513e-05, "loss": 1.8849, "step": 7176 }, { "epoch": 0.4000334429518979, "grad_norm": 0.5838245153427124, "learning_rate": 6.69703192543581e-05, "loss": 1.7764, "step": 7177 }, { "epoch": 0.40008918120506104, "grad_norm": 0.552139937877655, "learning_rate": 6.696194330590151e-05, "loss": 1.6598, "step": 7178 }, { "epoch": 0.40014491945822417, "grad_norm": 0.5443406105041504, "learning_rate": 6.695356681951099e-05, "loss": 1.6139, "step": 7179 }, { "epoch": 0.4002006577113873, "grad_norm": 0.5214937329292297, "learning_rate": 6.694518979545214e-05, "loss": 1.6783, "step": 7180 }, { "epoch": 0.4002563959645505, "grad_norm": 0.5553892254829407, "learning_rate": 6.69368122339907e-05, "loss": 1.6699, "step": 7181 }, { "epoch": 0.4003121342177136, "grad_norm": 0.5150647163391113, "learning_rate": 6.692843413539229e-05, "loss": 1.532, "step": 7182 }, { "epoch": 0.40036787247087674, "grad_norm": 0.5763303637504578, "learning_rate": 6.692005549992268e-05, "loss": 1.9554, "step": 7183 }, { "epoch": 0.4004236107240399, "grad_norm": 0.5533180832862854, "learning_rate": 6.691167632784754e-05, "loss": 1.4465, "step": 7184 }, { "epoch": 0.40047934897720305, "grad_norm": 0.5495351552963257, "learning_rate": 6.690329661943265e-05, "loss": 1.6263, "step": 7185 }, { "epoch": 0.4005350872303662, "grad_norm": 0.5440528988838196, "learning_rate": 6.689491637494371e-05, "loss": 1.8053, "step": 7186 }, { "epoch": 0.40059082548352937, "grad_norm": 0.5240649580955505, "learning_rate": 6.688653559464655e-05, "loss": 1.6647, "step": 7187 }, { "epoch": 0.4006465637366925, "grad_norm": 0.5496859550476074, "learning_rate": 6.687815427880694e-05, "loss": 1.7904, "step": 7188 }, { "epoch": 0.4007023019898556, "grad_norm": 0.5740963816642761, "learning_rate": 6.686977242769067e-05, "loss": 1.8628, "step": 7189 }, { "epoch": 0.4007580402430188, "grad_norm": 0.5899214148521423, "learning_rate": 6.686139004156358e-05, "loss": 1.6146, "step": 7190 }, { "epoch": 0.40081377849618194, "grad_norm": 0.5265205502510071, "learning_rate": 6.68530071206915e-05, "loss": 1.683, "step": 7191 }, { "epoch": 0.40086951674934507, "grad_norm": 0.560076892375946, "learning_rate": 6.684462366534032e-05, "loss": 1.6757, "step": 7192 }, { "epoch": 0.4009252550025082, "grad_norm": 0.5472216010093689, "learning_rate": 6.683623967577586e-05, "loss": 1.7725, "step": 7193 }, { "epoch": 0.4009809932556714, "grad_norm": 0.5014883875846863, "learning_rate": 6.682785515226407e-05, "loss": 1.4681, "step": 7194 }, { "epoch": 0.4010367315088345, "grad_norm": 0.5076844692230225, "learning_rate": 6.681947009507079e-05, "loss": 1.4126, "step": 7195 }, { "epoch": 0.40109246976199764, "grad_norm": 0.5327789187431335, "learning_rate": 6.681108450446202e-05, "loss": 1.6593, "step": 7196 }, { "epoch": 0.4011482080151608, "grad_norm": 0.6164959073066711, "learning_rate": 6.680269838070364e-05, "loss": 1.9668, "step": 7197 }, { "epoch": 0.40120394626832395, "grad_norm": 0.5150039792060852, "learning_rate": 6.679431172406163e-05, "loss": 1.4285, "step": 7198 }, { "epoch": 0.4012596845214871, "grad_norm": 0.5839514136314392, "learning_rate": 6.678592453480198e-05, "loss": 1.8469, "step": 7199 }, { "epoch": 0.40131542277465027, "grad_norm": 0.6449024677276611, "learning_rate": 6.677753681319066e-05, "loss": 2.1511, "step": 7200 }, { "epoch": 0.4013711610278134, "grad_norm": 0.5425246357917786, "learning_rate": 6.676914855949372e-05, "loss": 1.8045, "step": 7201 }, { "epoch": 0.4014268992809765, "grad_norm": 0.5886958241462708, "learning_rate": 6.676075977397715e-05, "loss": 1.7844, "step": 7202 }, { "epoch": 0.40148263753413965, "grad_norm": 0.5560657382011414, "learning_rate": 6.675237045690699e-05, "loss": 1.7289, "step": 7203 }, { "epoch": 0.40153837578730284, "grad_norm": 0.5133156776428223, "learning_rate": 6.674398060854931e-05, "loss": 1.4584, "step": 7204 }, { "epoch": 0.40159411404046597, "grad_norm": 0.5923200845718384, "learning_rate": 6.67355902291702e-05, "loss": 1.8035, "step": 7205 }, { "epoch": 0.4016498522936291, "grad_norm": 0.5706618428230286, "learning_rate": 6.672719931903574e-05, "loss": 1.781, "step": 7206 }, { "epoch": 0.4017055905467923, "grad_norm": 0.548729419708252, "learning_rate": 6.671880787841204e-05, "loss": 1.7033, "step": 7207 }, { "epoch": 0.4017613287999554, "grad_norm": 0.5980433225631714, "learning_rate": 6.671041590756524e-05, "loss": 1.7048, "step": 7208 }, { "epoch": 0.40181706705311854, "grad_norm": 0.5054447054862976, "learning_rate": 6.670202340676149e-05, "loss": 1.6601, "step": 7209 }, { "epoch": 0.4018728053062817, "grad_norm": 0.5414553880691528, "learning_rate": 6.669363037626689e-05, "loss": 1.619, "step": 7210 }, { "epoch": 0.40192854355944485, "grad_norm": 0.5375347137451172, "learning_rate": 6.66852368163477e-05, "loss": 1.6898, "step": 7211 }, { "epoch": 0.401984281812608, "grad_norm": 0.5620880722999573, "learning_rate": 6.667684272727007e-05, "loss": 1.4842, "step": 7212 }, { "epoch": 0.40204002006577116, "grad_norm": 0.5257782936096191, "learning_rate": 6.666844810930021e-05, "loss": 1.5747, "step": 7213 }, { "epoch": 0.4020957583189343, "grad_norm": 0.586007297039032, "learning_rate": 6.666005296270439e-05, "loss": 1.9183, "step": 7214 }, { "epoch": 0.4021514965720974, "grad_norm": 0.5531460642814636, "learning_rate": 6.66516572877488e-05, "loss": 1.708, "step": 7215 }, { "epoch": 0.40220723482526055, "grad_norm": 0.544386625289917, "learning_rate": 6.664326108469974e-05, "loss": 1.5666, "step": 7216 }, { "epoch": 0.40226297307842374, "grad_norm": 0.5806384682655334, "learning_rate": 6.663486435382347e-05, "loss": 1.8389, "step": 7217 }, { "epoch": 0.40231871133158686, "grad_norm": 0.6060808300971985, "learning_rate": 6.66264670953863e-05, "loss": 1.91, "step": 7218 }, { "epoch": 0.40237444958475, "grad_norm": 0.5704980492591858, "learning_rate": 6.661806930965452e-05, "loss": 1.6892, "step": 7219 }, { "epoch": 0.4024301878379132, "grad_norm": 0.5570072531700134, "learning_rate": 6.660967099689448e-05, "loss": 1.6718, "step": 7220 }, { "epoch": 0.4024859260910763, "grad_norm": 0.5326122641563416, "learning_rate": 6.66012721573725e-05, "loss": 1.7055, "step": 7221 }, { "epoch": 0.40254166434423944, "grad_norm": 0.5099365711212158, "learning_rate": 6.659287279135499e-05, "loss": 1.6732, "step": 7222 }, { "epoch": 0.4025974025974026, "grad_norm": 0.5786659717559814, "learning_rate": 6.658447289910827e-05, "loss": 1.4223, "step": 7223 }, { "epoch": 0.40265314085056575, "grad_norm": 0.5925951600074768, "learning_rate": 6.657607248089879e-05, "loss": 1.8696, "step": 7224 }, { "epoch": 0.4027088791037289, "grad_norm": 0.5589519739151001, "learning_rate": 6.65676715369929e-05, "loss": 1.5046, "step": 7225 }, { "epoch": 0.402764617356892, "grad_norm": 0.5450175404548645, "learning_rate": 6.655927006765709e-05, "loss": 1.6517, "step": 7226 }, { "epoch": 0.4028203556100552, "grad_norm": 0.563928484916687, "learning_rate": 6.655086807315778e-05, "loss": 1.8544, "step": 7227 }, { "epoch": 0.4028760938632183, "grad_norm": 0.5899096131324768, "learning_rate": 6.654246555376144e-05, "loss": 1.7556, "step": 7228 }, { "epoch": 0.40293183211638145, "grad_norm": 0.5601338744163513, "learning_rate": 6.653406250973451e-05, "loss": 1.7469, "step": 7229 }, { "epoch": 0.40298757036954463, "grad_norm": 0.5789577960968018, "learning_rate": 6.652565894134355e-05, "loss": 1.6428, "step": 7230 }, { "epoch": 0.40304330862270776, "grad_norm": 0.5229625701904297, "learning_rate": 6.651725484885503e-05, "loss": 1.4699, "step": 7231 }, { "epoch": 0.4030990468758709, "grad_norm": 0.5528407096862793, "learning_rate": 6.650885023253548e-05, "loss": 1.8881, "step": 7232 }, { "epoch": 0.4031547851290341, "grad_norm": 0.5682995319366455, "learning_rate": 6.650044509265147e-05, "loss": 1.8263, "step": 7233 }, { "epoch": 0.4032105233821972, "grad_norm": 0.5219863057136536, "learning_rate": 6.649203942946954e-05, "loss": 1.5232, "step": 7234 }, { "epoch": 0.40326626163536033, "grad_norm": 0.5359931588172913, "learning_rate": 6.648363324325627e-05, "loss": 1.5617, "step": 7235 }, { "epoch": 0.4033219998885235, "grad_norm": 0.5631711483001709, "learning_rate": 6.647522653427825e-05, "loss": 1.7428, "step": 7236 }, { "epoch": 0.40337773814168665, "grad_norm": 0.5994919538497925, "learning_rate": 6.646681930280211e-05, "loss": 1.5538, "step": 7237 }, { "epoch": 0.4034334763948498, "grad_norm": 0.5310835242271423, "learning_rate": 6.645841154909448e-05, "loss": 1.5501, "step": 7238 }, { "epoch": 0.4034892146480129, "grad_norm": 0.7443162798881531, "learning_rate": 6.6450003273422e-05, "loss": 1.7322, "step": 7239 }, { "epoch": 0.4035449529011761, "grad_norm": 0.5354825258255005, "learning_rate": 6.644159447605131e-05, "loss": 1.6913, "step": 7240 }, { "epoch": 0.4036006911543392, "grad_norm": 0.5255858898162842, "learning_rate": 6.64331851572491e-05, "loss": 1.6574, "step": 7241 }, { "epoch": 0.40365642940750235, "grad_norm": 0.531148374080658, "learning_rate": 6.642477531728207e-05, "loss": 1.5934, "step": 7242 }, { "epoch": 0.40371216766066553, "grad_norm": 0.5981380939483643, "learning_rate": 6.641636495641694e-05, "loss": 1.8274, "step": 7243 }, { "epoch": 0.40376790591382866, "grad_norm": 0.5403674840927124, "learning_rate": 6.640795407492043e-05, "loss": 1.4047, "step": 7244 }, { "epoch": 0.4038236441669918, "grad_norm": 0.5610218048095703, "learning_rate": 6.639954267305928e-05, "loss": 1.8228, "step": 7245 }, { "epoch": 0.403879382420155, "grad_norm": 0.5543003678321838, "learning_rate": 6.639113075110025e-05, "loss": 1.8899, "step": 7246 }, { "epoch": 0.4039351206733181, "grad_norm": 0.5696173906326294, "learning_rate": 6.63827183093101e-05, "loss": 1.6491, "step": 7247 }, { "epoch": 0.40399085892648123, "grad_norm": 0.5595298409461975, "learning_rate": 6.637430534795567e-05, "loss": 1.7502, "step": 7248 }, { "epoch": 0.40404659717964436, "grad_norm": 0.5707483291625977, "learning_rate": 6.636589186730373e-05, "loss": 1.6643, "step": 7249 }, { "epoch": 0.40410233543280755, "grad_norm": 0.5698502063751221, "learning_rate": 6.635747786762113e-05, "loss": 1.5516, "step": 7250 }, { "epoch": 0.4041580736859707, "grad_norm": 0.5298511385917664, "learning_rate": 6.63490633491747e-05, "loss": 1.5581, "step": 7251 }, { "epoch": 0.4042138119391338, "grad_norm": 0.5572474598884583, "learning_rate": 6.63406483122313e-05, "loss": 1.7449, "step": 7252 }, { "epoch": 0.404269550192297, "grad_norm": 0.5807195901870728, "learning_rate": 6.633223275705781e-05, "loss": 1.6806, "step": 7253 }, { "epoch": 0.4043252884454601, "grad_norm": 0.5467732548713684, "learning_rate": 6.632381668392111e-05, "loss": 1.742, "step": 7254 }, { "epoch": 0.40438102669862325, "grad_norm": 0.5687143206596375, "learning_rate": 6.631540009308813e-05, "loss": 1.7586, "step": 7255 }, { "epoch": 0.40443676495178643, "grad_norm": 0.5853325128555298, "learning_rate": 6.630698298482578e-05, "loss": 1.8601, "step": 7256 }, { "epoch": 0.40449250320494956, "grad_norm": 0.5176242589950562, "learning_rate": 6.629856535940101e-05, "loss": 1.5131, "step": 7257 }, { "epoch": 0.4045482414581127, "grad_norm": 0.5749338865280151, "learning_rate": 6.629014721708076e-05, "loss": 1.6167, "step": 7258 }, { "epoch": 0.4046039797112759, "grad_norm": 0.6350910663604736, "learning_rate": 6.628172855813203e-05, "loss": 1.6698, "step": 7259 }, { "epoch": 0.404659717964439, "grad_norm": 0.538773238658905, "learning_rate": 6.627330938282182e-05, "loss": 1.7449, "step": 7260 }, { "epoch": 0.40471545621760213, "grad_norm": 0.5643429160118103, "learning_rate": 6.62648896914171e-05, "loss": 1.6906, "step": 7261 }, { "epoch": 0.40477119447076526, "grad_norm": 0.5482378005981445, "learning_rate": 6.62564694841849e-05, "loss": 1.651, "step": 7262 }, { "epoch": 0.40482693272392845, "grad_norm": 0.556492805480957, "learning_rate": 6.624804876139227e-05, "loss": 1.6232, "step": 7263 }, { "epoch": 0.4048826709770916, "grad_norm": 0.5243347883224487, "learning_rate": 6.623962752330627e-05, "loss": 1.5745, "step": 7264 }, { "epoch": 0.4049384092302547, "grad_norm": 0.5533580780029297, "learning_rate": 6.623120577019396e-05, "loss": 1.621, "step": 7265 }, { "epoch": 0.4049941474834179, "grad_norm": 0.6168079376220703, "learning_rate": 6.622278350232246e-05, "loss": 1.8571, "step": 7266 }, { "epoch": 0.405049885736581, "grad_norm": 0.5359664559364319, "learning_rate": 6.621436071995884e-05, "loss": 1.5815, "step": 7267 }, { "epoch": 0.40510562398974415, "grad_norm": 0.6080171465873718, "learning_rate": 6.620593742337022e-05, "loss": 1.7069, "step": 7268 }, { "epoch": 0.40516136224290733, "grad_norm": 0.5019293427467346, "learning_rate": 6.619751361282377e-05, "loss": 1.5408, "step": 7269 }, { "epoch": 0.40521710049607046, "grad_norm": 0.5557806491851807, "learning_rate": 6.618908928858663e-05, "loss": 1.7405, "step": 7270 }, { "epoch": 0.4052728387492336, "grad_norm": 0.5392197370529175, "learning_rate": 6.618066445092595e-05, "loss": 1.5968, "step": 7271 }, { "epoch": 0.4053285770023967, "grad_norm": 0.621353030204773, "learning_rate": 6.617223910010896e-05, "loss": 1.8194, "step": 7272 }, { "epoch": 0.4053843152555599, "grad_norm": 0.5642111301422119, "learning_rate": 6.61638132364028e-05, "loss": 1.4983, "step": 7273 }, { "epoch": 0.40544005350872303, "grad_norm": 0.5767485499382019, "learning_rate": 6.615538686007476e-05, "loss": 1.6838, "step": 7274 }, { "epoch": 0.40549579176188616, "grad_norm": 0.5635485649108887, "learning_rate": 6.614695997139202e-05, "loss": 1.87, "step": 7275 }, { "epoch": 0.40555153001504934, "grad_norm": 0.617825448513031, "learning_rate": 6.613853257062186e-05, "loss": 1.839, "step": 7276 }, { "epoch": 0.4056072682682125, "grad_norm": 0.5892661213874817, "learning_rate": 6.613010465803153e-05, "loss": 1.7833, "step": 7277 }, { "epoch": 0.4056630065213756, "grad_norm": 0.6038499474525452, "learning_rate": 6.612167623388834e-05, "loss": 1.8361, "step": 7278 }, { "epoch": 0.4057187447745388, "grad_norm": 0.5470013618469238, "learning_rate": 6.611324729845958e-05, "loss": 1.8218, "step": 7279 }, { "epoch": 0.4057744830277019, "grad_norm": 0.5531765818595886, "learning_rate": 6.610481785201254e-05, "loss": 1.6214, "step": 7280 }, { "epoch": 0.40583022128086504, "grad_norm": 0.5488517880439758, "learning_rate": 6.60963878948146e-05, "loss": 1.5644, "step": 7281 }, { "epoch": 0.40588595953402823, "grad_norm": 0.5389445424079895, "learning_rate": 6.608795742713306e-05, "loss": 1.6407, "step": 7282 }, { "epoch": 0.40594169778719136, "grad_norm": 0.5432456731796265, "learning_rate": 6.607952644923534e-05, "loss": 1.6906, "step": 7283 }, { "epoch": 0.4059974360403545, "grad_norm": 0.5381740927696228, "learning_rate": 6.607109496138877e-05, "loss": 1.5545, "step": 7284 }, { "epoch": 0.4060531742935176, "grad_norm": 0.5759360194206238, "learning_rate": 6.606266296386078e-05, "loss": 1.3279, "step": 7285 }, { "epoch": 0.4061089125466808, "grad_norm": 0.5859653949737549, "learning_rate": 6.605423045691875e-05, "loss": 1.6515, "step": 7286 }, { "epoch": 0.40616465079984393, "grad_norm": 0.5650625228881836, "learning_rate": 6.604579744083015e-05, "loss": 1.7375, "step": 7287 }, { "epoch": 0.40622038905300706, "grad_norm": 0.5053606629371643, "learning_rate": 6.60373639158624e-05, "loss": 1.3345, "step": 7288 }, { "epoch": 0.40627612730617024, "grad_norm": 0.559548020362854, "learning_rate": 6.602892988228299e-05, "loss": 1.5881, "step": 7289 }, { "epoch": 0.40633186555933337, "grad_norm": 0.5711749196052551, "learning_rate": 6.602049534035937e-05, "loss": 1.6593, "step": 7290 }, { "epoch": 0.4063876038124965, "grad_norm": 0.5415685176849365, "learning_rate": 6.601206029035904e-05, "loss": 1.7801, "step": 7291 }, { "epoch": 0.4064433420656597, "grad_norm": 0.5906074643135071, "learning_rate": 6.60036247325495e-05, "loss": 1.8566, "step": 7292 }, { "epoch": 0.4064990803188228, "grad_norm": 0.5831937789916992, "learning_rate": 6.599518866719831e-05, "loss": 1.6081, "step": 7293 }, { "epoch": 0.40655481857198594, "grad_norm": 0.5068337917327881, "learning_rate": 6.5986752094573e-05, "loss": 1.5883, "step": 7294 }, { "epoch": 0.4066105568251491, "grad_norm": 0.5402857065200806, "learning_rate": 6.59783150149411e-05, "loss": 1.7286, "step": 7295 }, { "epoch": 0.40666629507831226, "grad_norm": 0.5793524980545044, "learning_rate": 6.596987742857024e-05, "loss": 1.782, "step": 7296 }, { "epoch": 0.4067220333314754, "grad_norm": 0.5685024261474609, "learning_rate": 6.596143933572795e-05, "loss": 1.6989, "step": 7297 }, { "epoch": 0.4067777715846385, "grad_norm": 0.5885668396949768, "learning_rate": 6.595300073668188e-05, "loss": 1.7724, "step": 7298 }, { "epoch": 0.4068335098378017, "grad_norm": 0.5693629384040833, "learning_rate": 6.594456163169963e-05, "loss": 1.7927, "step": 7299 }, { "epoch": 0.40688924809096483, "grad_norm": 0.6024751663208008, "learning_rate": 6.593612202104885e-05, "loss": 1.9269, "step": 7300 }, { "epoch": 0.40694498634412796, "grad_norm": 0.5218265652656555, "learning_rate": 6.59276819049972e-05, "loss": 1.6254, "step": 7301 }, { "epoch": 0.40700072459729114, "grad_norm": 0.6775539517402649, "learning_rate": 6.591924128381234e-05, "loss": 2.2446, "step": 7302 }, { "epoch": 0.40705646285045427, "grad_norm": 0.5457693338394165, "learning_rate": 6.591080015776196e-05, "loss": 1.7268, "step": 7303 }, { "epoch": 0.4071122011036174, "grad_norm": 0.5545173287391663, "learning_rate": 6.590235852711377e-05, "loss": 1.5403, "step": 7304 }, { "epoch": 0.4071679393567806, "grad_norm": 0.5415998697280884, "learning_rate": 6.589391639213549e-05, "loss": 1.7487, "step": 7305 }, { "epoch": 0.4072236776099437, "grad_norm": 0.535123884677887, "learning_rate": 6.588547375309484e-05, "loss": 1.8118, "step": 7306 }, { "epoch": 0.40727941586310684, "grad_norm": 0.5559954643249512, "learning_rate": 6.587703061025959e-05, "loss": 1.7792, "step": 7307 }, { "epoch": 0.40733515411626997, "grad_norm": 0.5952346920967102, "learning_rate": 6.586858696389748e-05, "loss": 1.8367, "step": 7308 }, { "epoch": 0.40739089236943316, "grad_norm": 0.5658838152885437, "learning_rate": 6.586014281427632e-05, "loss": 1.8874, "step": 7309 }, { "epoch": 0.4074466306225963, "grad_norm": 0.5443295240402222, "learning_rate": 6.585169816166392e-05, "loss": 1.6405, "step": 7310 }, { "epoch": 0.4075023688757594, "grad_norm": 0.5414347648620605, "learning_rate": 6.584325300632806e-05, "loss": 1.7544, "step": 7311 }, { "epoch": 0.4075581071289226, "grad_norm": 0.5387737154960632, "learning_rate": 6.583480734853658e-05, "loss": 1.6416, "step": 7312 }, { "epoch": 0.4076138453820857, "grad_norm": 0.5518178343772888, "learning_rate": 6.582636118855735e-05, "loss": 1.7322, "step": 7313 }, { "epoch": 0.40766958363524886, "grad_norm": 0.5452878475189209, "learning_rate": 6.58179145266582e-05, "loss": 1.7432, "step": 7314 }, { "epoch": 0.40772532188841204, "grad_norm": 0.5074037313461304, "learning_rate": 6.580946736310704e-05, "loss": 1.6643, "step": 7315 }, { "epoch": 0.40778106014157517, "grad_norm": 0.5745427012443542, "learning_rate": 6.580101969817175e-05, "loss": 1.8664, "step": 7316 }, { "epoch": 0.4078367983947383, "grad_norm": 0.5891657471656799, "learning_rate": 6.579257153212024e-05, "loss": 1.8217, "step": 7317 }, { "epoch": 0.4078925366479015, "grad_norm": 0.5395662188529968, "learning_rate": 6.578412286522044e-05, "loss": 1.5422, "step": 7318 }, { "epoch": 0.4079482749010646, "grad_norm": 0.5738537907600403, "learning_rate": 6.57756736977403e-05, "loss": 1.753, "step": 7319 }, { "epoch": 0.40800401315422774, "grad_norm": 0.5593982338905334, "learning_rate": 6.576722402994775e-05, "loss": 1.5805, "step": 7320 }, { "epoch": 0.40805975140739087, "grad_norm": 0.6101201772689819, "learning_rate": 6.575877386211077e-05, "loss": 1.742, "step": 7321 }, { "epoch": 0.40811548966055405, "grad_norm": 0.5429602265357971, "learning_rate": 6.57503231944974e-05, "loss": 1.7166, "step": 7322 }, { "epoch": 0.4081712279137172, "grad_norm": 0.5799590349197388, "learning_rate": 6.574187202737558e-05, "loss": 1.8698, "step": 7323 }, { "epoch": 0.4082269661668803, "grad_norm": 0.5671953558921814, "learning_rate": 6.573342036101339e-05, "loss": 1.5871, "step": 7324 }, { "epoch": 0.4082827044200435, "grad_norm": 0.5521631836891174, "learning_rate": 6.572496819567882e-05, "loss": 1.6091, "step": 7325 }, { "epoch": 0.4083384426732066, "grad_norm": 0.6058674454689026, "learning_rate": 6.571651553163994e-05, "loss": 1.9233, "step": 7326 }, { "epoch": 0.40839418092636975, "grad_norm": 0.5595351457595825, "learning_rate": 6.570806236916481e-05, "loss": 1.681, "step": 7327 }, { "epoch": 0.40844991917953294, "grad_norm": 0.5565963983535767, "learning_rate": 6.569960870852156e-05, "loss": 1.8081, "step": 7328 }, { "epoch": 0.40850565743269607, "grad_norm": 0.5626837015151978, "learning_rate": 6.569115454997823e-05, "loss": 1.7268, "step": 7329 }, { "epoch": 0.4085613956858592, "grad_norm": 0.5642188787460327, "learning_rate": 6.568269989380296e-05, "loss": 1.9007, "step": 7330 }, { "epoch": 0.4086171339390223, "grad_norm": 0.5992141962051392, "learning_rate": 6.56742447402639e-05, "loss": 1.8163, "step": 7331 }, { "epoch": 0.4086728721921855, "grad_norm": 0.5469499826431274, "learning_rate": 6.566578908962918e-05, "loss": 1.6564, "step": 7332 }, { "epoch": 0.40872861044534864, "grad_norm": 0.5719706416130066, "learning_rate": 6.565733294216697e-05, "loss": 1.3752, "step": 7333 }, { "epoch": 0.40878434869851177, "grad_norm": 0.5726919174194336, "learning_rate": 6.564887629814543e-05, "loss": 1.629, "step": 7334 }, { "epoch": 0.40884008695167495, "grad_norm": 0.6024767160415649, "learning_rate": 6.56404191578328e-05, "loss": 1.6818, "step": 7335 }, { "epoch": 0.4088958252048381, "grad_norm": 0.5598945021629333, "learning_rate": 6.563196152149725e-05, "loss": 1.6562, "step": 7336 }, { "epoch": 0.4089515634580012, "grad_norm": 0.6022909283638, "learning_rate": 6.562350338940704e-05, "loss": 1.6497, "step": 7337 }, { "epoch": 0.4090073017111644, "grad_norm": 0.5557130575180054, "learning_rate": 6.561504476183037e-05, "loss": 1.5777, "step": 7338 }, { "epoch": 0.4090630399643275, "grad_norm": 0.556742787361145, "learning_rate": 6.560658563903553e-05, "loss": 1.6048, "step": 7339 }, { "epoch": 0.40911877821749065, "grad_norm": 0.6215361952781677, "learning_rate": 6.559812602129078e-05, "loss": 1.85, "step": 7340 }, { "epoch": 0.40917451647065384, "grad_norm": 0.5431729555130005, "learning_rate": 6.558966590886443e-05, "loss": 1.7366, "step": 7341 }, { "epoch": 0.40923025472381697, "grad_norm": 0.5173145532608032, "learning_rate": 6.558120530202476e-05, "loss": 1.5962, "step": 7342 }, { "epoch": 0.4092859929769801, "grad_norm": 0.558746874332428, "learning_rate": 6.55727442010401e-05, "loss": 1.6842, "step": 7343 }, { "epoch": 0.4093417312301432, "grad_norm": 0.5484337210655212, "learning_rate": 6.55642826061788e-05, "loss": 1.8824, "step": 7344 }, { "epoch": 0.4093974694833064, "grad_norm": 0.5415590405464172, "learning_rate": 6.55558205177092e-05, "loss": 1.7393, "step": 7345 }, { "epoch": 0.40945320773646954, "grad_norm": 0.5736859440803528, "learning_rate": 6.554735793589967e-05, "loss": 1.6012, "step": 7346 }, { "epoch": 0.40950894598963267, "grad_norm": 0.5511910319328308, "learning_rate": 6.553889486101857e-05, "loss": 1.6051, "step": 7347 }, { "epoch": 0.40956468424279585, "grad_norm": 0.5481744408607483, "learning_rate": 6.553043129333436e-05, "loss": 1.6571, "step": 7348 }, { "epoch": 0.409620422495959, "grad_norm": 0.7418869733810425, "learning_rate": 6.55219672331154e-05, "loss": 1.6247, "step": 7349 }, { "epoch": 0.4096761607491221, "grad_norm": 0.5882282257080078, "learning_rate": 6.551350268063015e-05, "loss": 1.7125, "step": 7350 }, { "epoch": 0.4097318990022853, "grad_norm": 0.6087817549705505, "learning_rate": 6.550503763614702e-05, "loss": 1.9143, "step": 7351 }, { "epoch": 0.4097876372554484, "grad_norm": 0.5106980800628662, "learning_rate": 6.549657209993452e-05, "loss": 1.4884, "step": 7352 }, { "epoch": 0.40984337550861155, "grad_norm": 0.5542812347412109, "learning_rate": 6.548810607226109e-05, "loss": 1.6739, "step": 7353 }, { "epoch": 0.4098991137617747, "grad_norm": 0.6260994672775269, "learning_rate": 6.547963955339526e-05, "loss": 1.8902, "step": 7354 }, { "epoch": 0.40995485201493786, "grad_norm": 0.5681547522544861, "learning_rate": 6.547117254360549e-05, "loss": 1.8688, "step": 7355 }, { "epoch": 0.410010590268101, "grad_norm": 0.5453806519508362, "learning_rate": 6.546270504316033e-05, "loss": 1.7046, "step": 7356 }, { "epoch": 0.4100663285212641, "grad_norm": 0.5230925679206848, "learning_rate": 6.545423705232834e-05, "loss": 1.6008, "step": 7357 }, { "epoch": 0.4101220667744273, "grad_norm": 0.5534452795982361, "learning_rate": 6.544576857137804e-05, "loss": 1.806, "step": 7358 }, { "epoch": 0.41017780502759044, "grad_norm": 0.586466908454895, "learning_rate": 6.543729960057803e-05, "loss": 1.8252, "step": 7359 }, { "epoch": 0.41023354328075357, "grad_norm": 0.5712817311286926, "learning_rate": 6.542883014019686e-05, "loss": 1.6653, "step": 7360 }, { "epoch": 0.41028928153391675, "grad_norm": 0.5666759014129639, "learning_rate": 6.542036019050318e-05, "loss": 1.7503, "step": 7361 }, { "epoch": 0.4103450197870799, "grad_norm": 0.6092966198921204, "learning_rate": 6.541188975176557e-05, "loss": 2.0138, "step": 7362 }, { "epoch": 0.410400758040243, "grad_norm": 0.5910922884941101, "learning_rate": 6.540341882425267e-05, "loss": 1.8193, "step": 7363 }, { "epoch": 0.4104564962934062, "grad_norm": 0.5653868317604065, "learning_rate": 6.539494740823313e-05, "loss": 1.6905, "step": 7364 }, { "epoch": 0.4105122345465693, "grad_norm": 0.5556957721710205, "learning_rate": 6.538647550397563e-05, "loss": 1.5966, "step": 7365 }, { "epoch": 0.41056797279973245, "grad_norm": 0.6585522294044495, "learning_rate": 6.537800311174882e-05, "loss": 1.9665, "step": 7366 }, { "epoch": 0.4106237110528956, "grad_norm": 0.5647701621055603, "learning_rate": 6.536953023182143e-05, "loss": 1.7119, "step": 7367 }, { "epoch": 0.41067944930605876, "grad_norm": 0.5993644595146179, "learning_rate": 6.536105686446214e-05, "loss": 1.8307, "step": 7368 }, { "epoch": 0.4107351875592219, "grad_norm": 0.5878274440765381, "learning_rate": 6.535258300993969e-05, "loss": 1.6834, "step": 7369 }, { "epoch": 0.410790925812385, "grad_norm": 0.5731014609336853, "learning_rate": 6.534410866852283e-05, "loss": 1.7639, "step": 7370 }, { "epoch": 0.4108466640655482, "grad_norm": 0.558718204498291, "learning_rate": 6.533563384048029e-05, "loss": 1.68, "step": 7371 }, { "epoch": 0.41090240231871134, "grad_norm": 0.5906892418861389, "learning_rate": 6.532715852608087e-05, "loss": 1.6856, "step": 7372 }, { "epoch": 0.41095814057187446, "grad_norm": 0.5575792193412781, "learning_rate": 6.531868272559333e-05, "loss": 1.6829, "step": 7373 }, { "epoch": 0.41101387882503765, "grad_norm": 0.5349531769752502, "learning_rate": 6.531020643928649e-05, "loss": 1.666, "step": 7374 }, { "epoch": 0.4110696170782008, "grad_norm": 0.5200047492980957, "learning_rate": 6.530172966742918e-05, "loss": 1.5504, "step": 7375 }, { "epoch": 0.4111253553313639, "grad_norm": 0.599875271320343, "learning_rate": 6.529325241029022e-05, "loss": 1.8604, "step": 7376 }, { "epoch": 0.41118109358452704, "grad_norm": 0.5267208814620972, "learning_rate": 6.528477466813845e-05, "loss": 1.5969, "step": 7377 }, { "epoch": 0.4112368318376902, "grad_norm": 0.5209345817565918, "learning_rate": 6.527629644124273e-05, "loss": 1.5824, "step": 7378 }, { "epoch": 0.41129257009085335, "grad_norm": 0.5929481983184814, "learning_rate": 6.526781772987197e-05, "loss": 1.9316, "step": 7379 }, { "epoch": 0.4113483083440165, "grad_norm": 0.5629690885543823, "learning_rate": 6.525933853429505e-05, "loss": 1.6927, "step": 7380 }, { "epoch": 0.41140404659717966, "grad_norm": 0.5802732110023499, "learning_rate": 6.525085885478089e-05, "loss": 1.7149, "step": 7381 }, { "epoch": 0.4114597848503428, "grad_norm": 0.5767194032669067, "learning_rate": 6.524237869159838e-05, "loss": 1.6511, "step": 7382 }, { "epoch": 0.4115155231035059, "grad_norm": 0.5414605140686035, "learning_rate": 6.523389804501651e-05, "loss": 1.5401, "step": 7383 }, { "epoch": 0.4115712613566691, "grad_norm": 0.5376063585281372, "learning_rate": 6.52254169153042e-05, "loss": 1.6796, "step": 7384 }, { "epoch": 0.41162699960983223, "grad_norm": 0.5899385809898376, "learning_rate": 6.521693530273045e-05, "loss": 1.7729, "step": 7385 }, { "epoch": 0.41168273786299536, "grad_norm": 0.5602531433105469, "learning_rate": 6.520845320756421e-05, "loss": 1.6136, "step": 7386 }, { "epoch": 0.41173847611615855, "grad_norm": 0.5425115823745728, "learning_rate": 6.519997063007452e-05, "loss": 1.5817, "step": 7387 }, { "epoch": 0.4117942143693217, "grad_norm": 0.5449849963188171, "learning_rate": 6.51914875705304e-05, "loss": 1.6962, "step": 7388 }, { "epoch": 0.4118499526224848, "grad_norm": 0.5851723551750183, "learning_rate": 6.518300402920084e-05, "loss": 2.035, "step": 7389 }, { "epoch": 0.41190569087564793, "grad_norm": 0.5257713794708252, "learning_rate": 6.517452000635493e-05, "loss": 1.1806, "step": 7390 }, { "epoch": 0.4119614291288111, "grad_norm": 0.5605010390281677, "learning_rate": 6.516603550226171e-05, "loss": 1.7513, "step": 7391 }, { "epoch": 0.41201716738197425, "grad_norm": 0.6154865026473999, "learning_rate": 6.515755051719026e-05, "loss": 1.8616, "step": 7392 }, { "epoch": 0.4120729056351374, "grad_norm": 0.5920423269271851, "learning_rate": 6.51490650514097e-05, "loss": 1.7594, "step": 7393 }, { "epoch": 0.41212864388830056, "grad_norm": 0.545600414276123, "learning_rate": 6.514057910518913e-05, "loss": 1.5641, "step": 7394 }, { "epoch": 0.4121843821414637, "grad_norm": 0.5568488836288452, "learning_rate": 6.513209267879765e-05, "loss": 1.6398, "step": 7395 }, { "epoch": 0.4122401203946268, "grad_norm": 0.5209145545959473, "learning_rate": 6.512360577250443e-05, "loss": 1.4485, "step": 7396 }, { "epoch": 0.41229585864779, "grad_norm": 0.5175876021385193, "learning_rate": 6.511511838657859e-05, "loss": 1.6851, "step": 7397 }, { "epoch": 0.41235159690095313, "grad_norm": 0.5393850803375244, "learning_rate": 6.510663052128934e-05, "loss": 1.6724, "step": 7398 }, { "epoch": 0.41240733515411626, "grad_norm": 0.5579698085784912, "learning_rate": 6.509814217690582e-05, "loss": 1.7999, "step": 7399 }, { "epoch": 0.4124630734072794, "grad_norm": 0.5217966437339783, "learning_rate": 6.508965335369729e-05, "loss": 1.5216, "step": 7400 }, { "epoch": 0.4125188116604426, "grad_norm": 0.5507352352142334, "learning_rate": 6.508116405193292e-05, "loss": 1.5396, "step": 7401 }, { "epoch": 0.4125745499136057, "grad_norm": 0.5592759847640991, "learning_rate": 6.507267427188197e-05, "loss": 1.7238, "step": 7402 }, { "epoch": 0.41263028816676883, "grad_norm": 0.5734774470329285, "learning_rate": 6.506418401381365e-05, "loss": 1.7004, "step": 7403 }, { "epoch": 0.412686026419932, "grad_norm": 0.5572485327720642, "learning_rate": 6.505569327799726e-05, "loss": 1.5875, "step": 7404 }, { "epoch": 0.41274176467309515, "grad_norm": 0.5783054232597351, "learning_rate": 6.504720206470205e-05, "loss": 1.806, "step": 7405 }, { "epoch": 0.4127975029262583, "grad_norm": 0.5762080550193787, "learning_rate": 6.503871037419731e-05, "loss": 1.6241, "step": 7406 }, { "epoch": 0.41285324117942146, "grad_norm": 0.5752031207084656, "learning_rate": 6.50302182067524e-05, "loss": 1.5105, "step": 7407 }, { "epoch": 0.4129089794325846, "grad_norm": 0.5618080496788025, "learning_rate": 6.502172556263656e-05, "loss": 1.6661, "step": 7408 }, { "epoch": 0.4129647176857477, "grad_norm": 0.5460039377212524, "learning_rate": 6.501323244211919e-05, "loss": 1.5414, "step": 7409 }, { "epoch": 0.4130204559389109, "grad_norm": 0.5536362528800964, "learning_rate": 6.500473884546962e-05, "loss": 1.7312, "step": 7410 }, { "epoch": 0.41307619419207403, "grad_norm": 0.5220944285392761, "learning_rate": 6.499624477295722e-05, "loss": 1.4628, "step": 7411 }, { "epoch": 0.41313193244523716, "grad_norm": 0.5702623128890991, "learning_rate": 6.498775022485134e-05, "loss": 1.7568, "step": 7412 }, { "epoch": 0.4131876706984003, "grad_norm": 0.5831007361412048, "learning_rate": 6.497925520142143e-05, "loss": 1.8805, "step": 7413 }, { "epoch": 0.4132434089515635, "grad_norm": 0.5719270706176758, "learning_rate": 6.497075970293688e-05, "loss": 1.8406, "step": 7414 }, { "epoch": 0.4132991472047266, "grad_norm": 0.5721832513809204, "learning_rate": 6.496226372966711e-05, "loss": 1.8577, "step": 7415 }, { "epoch": 0.41335488545788973, "grad_norm": 0.5381945967674255, "learning_rate": 6.495376728188159e-05, "loss": 1.5441, "step": 7416 }, { "epoch": 0.4134106237110529, "grad_norm": 0.5105479955673218, "learning_rate": 6.494527035984974e-05, "loss": 1.7383, "step": 7417 }, { "epoch": 0.41346636196421604, "grad_norm": 0.5516504049301147, "learning_rate": 6.493677296384106e-05, "loss": 1.7542, "step": 7418 }, { "epoch": 0.4135221002173792, "grad_norm": 0.5726693868637085, "learning_rate": 6.492827509412501e-05, "loss": 1.887, "step": 7419 }, { "epoch": 0.41357783847054236, "grad_norm": 0.5425702333450317, "learning_rate": 6.491977675097114e-05, "loss": 1.6247, "step": 7420 }, { "epoch": 0.4136335767237055, "grad_norm": 0.7511564493179321, "learning_rate": 6.491127793464893e-05, "loss": 1.7428, "step": 7421 }, { "epoch": 0.4136893149768686, "grad_norm": 0.5151875019073486, "learning_rate": 6.490277864542792e-05, "loss": 1.6937, "step": 7422 }, { "epoch": 0.41374505323003175, "grad_norm": 0.5558873414993286, "learning_rate": 6.489427888357765e-05, "loss": 1.7254, "step": 7423 }, { "epoch": 0.41380079148319493, "grad_norm": 0.5704571008682251, "learning_rate": 6.488577864936771e-05, "loss": 1.6893, "step": 7424 }, { "epoch": 0.41385652973635806, "grad_norm": 0.5515883564949036, "learning_rate": 6.487727794306765e-05, "loss": 1.5928, "step": 7425 }, { "epoch": 0.4139122679895212, "grad_norm": 0.5346539616584778, "learning_rate": 6.48687767649471e-05, "loss": 1.5923, "step": 7426 }, { "epoch": 0.41396800624268437, "grad_norm": 0.48073434829711914, "learning_rate": 6.48602751152756e-05, "loss": 1.5783, "step": 7427 }, { "epoch": 0.4140237444958475, "grad_norm": 0.5613585114479065, "learning_rate": 6.485177299432284e-05, "loss": 1.7081, "step": 7428 }, { "epoch": 0.41407948274901063, "grad_norm": 0.5521184206008911, "learning_rate": 6.484327040235844e-05, "loss": 1.8141, "step": 7429 }, { "epoch": 0.4141352210021738, "grad_norm": 0.5570716857910156, "learning_rate": 6.483476733965202e-05, "loss": 1.8114, "step": 7430 }, { "epoch": 0.41419095925533694, "grad_norm": 0.5927569270133972, "learning_rate": 6.48262638064733e-05, "loss": 1.8538, "step": 7431 }, { "epoch": 0.4142466975085001, "grad_norm": 0.6198796629905701, "learning_rate": 6.48177598030919e-05, "loss": 1.8671, "step": 7432 }, { "epoch": 0.41430243576166326, "grad_norm": 0.562487781047821, "learning_rate": 6.480925532977758e-05, "loss": 1.6247, "step": 7433 }, { "epoch": 0.4143581740148264, "grad_norm": 0.5455536246299744, "learning_rate": 6.480075038680002e-05, "loss": 1.6946, "step": 7434 }, { "epoch": 0.4144139122679895, "grad_norm": 0.6041662096977234, "learning_rate": 6.479224497442897e-05, "loss": 1.9345, "step": 7435 }, { "epoch": 0.41446965052115264, "grad_norm": 0.5616452693939209, "learning_rate": 6.478373909293412e-05, "loss": 1.8108, "step": 7436 }, { "epoch": 0.41452538877431583, "grad_norm": 0.5593286752700806, "learning_rate": 6.477523274258528e-05, "loss": 1.6404, "step": 7437 }, { "epoch": 0.41458112702747896, "grad_norm": 0.5919610261917114, "learning_rate": 6.47667259236522e-05, "loss": 1.8287, "step": 7438 }, { "epoch": 0.4146368652806421, "grad_norm": 0.6362894177436829, "learning_rate": 6.475821863640467e-05, "loss": 1.8535, "step": 7439 }, { "epoch": 0.41469260353380527, "grad_norm": 0.4930521547794342, "learning_rate": 6.474971088111248e-05, "loss": 1.3973, "step": 7440 }, { "epoch": 0.4147483417869684, "grad_norm": 0.5308540463447571, "learning_rate": 6.474120265804549e-05, "loss": 1.5271, "step": 7441 }, { "epoch": 0.41480408004013153, "grad_norm": 0.5587360262870789, "learning_rate": 6.473269396747346e-05, "loss": 1.6953, "step": 7442 }, { "epoch": 0.4148598182932947, "grad_norm": 0.5565241575241089, "learning_rate": 6.47241848096663e-05, "loss": 1.7807, "step": 7443 }, { "epoch": 0.41491555654645784, "grad_norm": 0.6130486130714417, "learning_rate": 6.471567518489383e-05, "loss": 2.0551, "step": 7444 }, { "epoch": 0.41497129479962097, "grad_norm": 0.5374565720558167, "learning_rate": 6.470716509342594e-05, "loss": 1.6525, "step": 7445 }, { "epoch": 0.4150270330527841, "grad_norm": 0.5470364093780518, "learning_rate": 6.469865453553254e-05, "loss": 1.7753, "step": 7446 }, { "epoch": 0.4150827713059473, "grad_norm": 0.5423111319541931, "learning_rate": 6.46901435114835e-05, "loss": 1.6718, "step": 7447 }, { "epoch": 0.4151385095591104, "grad_norm": 0.630453884601593, "learning_rate": 6.468163202154877e-05, "loss": 1.7607, "step": 7448 }, { "epoch": 0.41519424781227354, "grad_norm": 0.5870693325996399, "learning_rate": 6.467312006599828e-05, "loss": 1.8854, "step": 7449 }, { "epoch": 0.4152499860654367, "grad_norm": 0.6026604771614075, "learning_rate": 6.466460764510196e-05, "loss": 1.6298, "step": 7450 }, { "epoch": 0.41530572431859986, "grad_norm": 0.5341464281082153, "learning_rate": 6.465609475912977e-05, "loss": 1.5961, "step": 7451 }, { "epoch": 0.415361462571763, "grad_norm": 0.5364176630973816, "learning_rate": 6.464758140835173e-05, "loss": 1.6091, "step": 7452 }, { "epoch": 0.41541720082492617, "grad_norm": 0.5682061910629272, "learning_rate": 6.463906759303779e-05, "loss": 1.6807, "step": 7453 }, { "epoch": 0.4154729390780893, "grad_norm": 0.5520201325416565, "learning_rate": 6.463055331345798e-05, "loss": 1.8693, "step": 7454 }, { "epoch": 0.4155286773312524, "grad_norm": 0.5386977195739746, "learning_rate": 6.462203856988233e-05, "loss": 1.5473, "step": 7455 }, { "epoch": 0.4155844155844156, "grad_norm": 0.5517452955245972, "learning_rate": 6.461352336258088e-05, "loss": 1.5523, "step": 7456 }, { "epoch": 0.41564015383757874, "grad_norm": 0.6362208127975464, "learning_rate": 6.460500769182365e-05, "loss": 1.6515, "step": 7457 }, { "epoch": 0.41569589209074187, "grad_norm": 0.5483435392379761, "learning_rate": 6.459649155788075e-05, "loss": 1.6962, "step": 7458 }, { "epoch": 0.415751630343905, "grad_norm": 0.5627394914627075, "learning_rate": 6.458797496102222e-05, "loss": 1.5808, "step": 7459 }, { "epoch": 0.4158073685970682, "grad_norm": 0.5749256610870361, "learning_rate": 6.45794579015182e-05, "loss": 1.6652, "step": 7460 }, { "epoch": 0.4158631068502313, "grad_norm": 0.561033308506012, "learning_rate": 6.457094037963877e-05, "loss": 1.5447, "step": 7461 }, { "epoch": 0.41591884510339444, "grad_norm": 0.6188123822212219, "learning_rate": 6.456242239565405e-05, "loss": 1.8373, "step": 7462 }, { "epoch": 0.4159745833565576, "grad_norm": 0.5495220422744751, "learning_rate": 6.455390394983422e-05, "loss": 1.7338, "step": 7463 }, { "epoch": 0.41603032160972075, "grad_norm": 0.5390871167182922, "learning_rate": 6.454538504244938e-05, "loss": 1.5552, "step": 7464 }, { "epoch": 0.4160860598628839, "grad_norm": 0.5653820633888245, "learning_rate": 6.453686567376976e-05, "loss": 1.692, "step": 7465 }, { "epoch": 0.41614179811604707, "grad_norm": 0.5153915286064148, "learning_rate": 6.45283458440655e-05, "loss": 1.6676, "step": 7466 }, { "epoch": 0.4161975363692102, "grad_norm": 0.5695963501930237, "learning_rate": 6.451982555360682e-05, "loss": 1.6982, "step": 7467 }, { "epoch": 0.4162532746223733, "grad_norm": 0.6078826785087585, "learning_rate": 6.451130480266395e-05, "loss": 1.762, "step": 7468 }, { "epoch": 0.41630901287553645, "grad_norm": 0.5621688961982727, "learning_rate": 6.450278359150708e-05, "loss": 1.5914, "step": 7469 }, { "epoch": 0.41636475112869964, "grad_norm": 0.5914077162742615, "learning_rate": 6.449426192040649e-05, "loss": 1.909, "step": 7470 }, { "epoch": 0.41642048938186277, "grad_norm": 0.5638688802719116, "learning_rate": 6.448573978963239e-05, "loss": 1.8037, "step": 7471 }, { "epoch": 0.4164762276350259, "grad_norm": 0.569990336894989, "learning_rate": 6.44772171994551e-05, "loss": 1.5707, "step": 7472 }, { "epoch": 0.4165319658881891, "grad_norm": 0.5680502653121948, "learning_rate": 6.446869415014488e-05, "loss": 1.6062, "step": 7473 }, { "epoch": 0.4165877041413522, "grad_norm": 0.5565951466560364, "learning_rate": 6.446017064197205e-05, "loss": 1.7973, "step": 7474 }, { "epoch": 0.41664344239451534, "grad_norm": 0.5711973905563354, "learning_rate": 6.445164667520691e-05, "loss": 1.751, "step": 7475 }, { "epoch": 0.4166991806476785, "grad_norm": 0.5332829356193542, "learning_rate": 6.44431222501198e-05, "loss": 1.5348, "step": 7476 }, { "epoch": 0.41675491890084165, "grad_norm": 0.5311811566352844, "learning_rate": 6.443459736698105e-05, "loss": 1.7648, "step": 7477 }, { "epoch": 0.4168106571540048, "grad_norm": 0.5389667749404907, "learning_rate": 6.442607202606104e-05, "loss": 1.5702, "step": 7478 }, { "epoch": 0.41686639540716797, "grad_norm": 0.5450131297111511, "learning_rate": 6.441754622763015e-05, "loss": 1.5624, "step": 7479 }, { "epoch": 0.4169221336603311, "grad_norm": 0.6195186376571655, "learning_rate": 6.440901997195871e-05, "loss": 1.8265, "step": 7480 }, { "epoch": 0.4169778719134942, "grad_norm": 0.5652611255645752, "learning_rate": 6.440049325931721e-05, "loss": 1.6908, "step": 7481 }, { "epoch": 0.41703361016665735, "grad_norm": 0.5675498843193054, "learning_rate": 6.4391966089976e-05, "loss": 1.8279, "step": 7482 }, { "epoch": 0.41708934841982054, "grad_norm": 0.5133779048919678, "learning_rate": 6.438343846420556e-05, "loss": 1.4909, "step": 7483 }, { "epoch": 0.41714508667298367, "grad_norm": 0.5815598964691162, "learning_rate": 6.437491038227628e-05, "loss": 1.6886, "step": 7484 }, { "epoch": 0.4172008249261468, "grad_norm": 0.5756742358207703, "learning_rate": 6.43663818444587e-05, "loss": 1.5501, "step": 7485 }, { "epoch": 0.41725656317931, "grad_norm": 0.5238984227180481, "learning_rate": 6.435785285102321e-05, "loss": 1.5227, "step": 7486 }, { "epoch": 0.4173123014324731, "grad_norm": 0.6538522839546204, "learning_rate": 6.434932340224036e-05, "loss": 1.8644, "step": 7487 }, { "epoch": 0.41736803968563624, "grad_norm": 0.5802149772644043, "learning_rate": 6.434079349838062e-05, "loss": 1.823, "step": 7488 }, { "epoch": 0.4174237779387994, "grad_norm": 0.5617754459381104, "learning_rate": 6.433226313971455e-05, "loss": 1.6917, "step": 7489 }, { "epoch": 0.41747951619196255, "grad_norm": 0.5967627763748169, "learning_rate": 6.432373232651261e-05, "loss": 1.8103, "step": 7490 }, { "epoch": 0.4175352544451257, "grad_norm": 0.5762447714805603, "learning_rate": 6.431520105904543e-05, "loss": 1.6457, "step": 7491 }, { "epoch": 0.4175909926982888, "grad_norm": 0.5717265009880066, "learning_rate": 6.430666933758353e-05, "loss": 1.7308, "step": 7492 }, { "epoch": 0.417646730951452, "grad_norm": 0.5314132571220398, "learning_rate": 6.429813716239747e-05, "loss": 1.5346, "step": 7493 }, { "epoch": 0.4177024692046151, "grad_norm": 0.5187550187110901, "learning_rate": 6.42896045337579e-05, "loss": 1.541, "step": 7494 }, { "epoch": 0.41775820745777825, "grad_norm": 0.524467945098877, "learning_rate": 6.428107145193535e-05, "loss": 1.6209, "step": 7495 }, { "epoch": 0.41781394571094144, "grad_norm": 0.5283476710319519, "learning_rate": 6.427253791720051e-05, "loss": 1.6333, "step": 7496 }, { "epoch": 0.41786968396410457, "grad_norm": 0.5059264302253723, "learning_rate": 6.426400392982396e-05, "loss": 1.4312, "step": 7497 }, { "epoch": 0.4179254222172677, "grad_norm": 0.5070070028305054, "learning_rate": 6.425546949007639e-05, "loss": 1.4918, "step": 7498 }, { "epoch": 0.4179811604704309, "grad_norm": 0.5226110219955444, "learning_rate": 6.424693459822842e-05, "loss": 1.6224, "step": 7499 }, { "epoch": 0.418036898723594, "grad_norm": 0.5620803833007812, "learning_rate": 6.423839925455077e-05, "loss": 1.815, "step": 7500 }, { "epoch": 0.41809263697675714, "grad_norm": 0.5102522969245911, "learning_rate": 6.422986345931411e-05, "loss": 1.6608, "step": 7501 }, { "epoch": 0.4181483752299203, "grad_norm": 0.5353087782859802, "learning_rate": 6.422132721278915e-05, "loss": 1.5651, "step": 7502 }, { "epoch": 0.41820411348308345, "grad_norm": 0.6161815524101257, "learning_rate": 6.421279051524658e-05, "loss": 1.6941, "step": 7503 }, { "epoch": 0.4182598517362466, "grad_norm": 0.6280367970466614, "learning_rate": 6.420425336695719e-05, "loss": 1.8122, "step": 7504 }, { "epoch": 0.4183155899894097, "grad_norm": 0.5285361409187317, "learning_rate": 6.419571576819168e-05, "loss": 1.59, "step": 7505 }, { "epoch": 0.4183713282425729, "grad_norm": 0.5601312518119812, "learning_rate": 6.418717771922084e-05, "loss": 1.6675, "step": 7506 }, { "epoch": 0.418427066495736, "grad_norm": 0.6108425855636597, "learning_rate": 6.417863922031544e-05, "loss": 1.9184, "step": 7507 }, { "epoch": 0.41848280474889915, "grad_norm": 0.5752027034759521, "learning_rate": 6.417010027174627e-05, "loss": 1.7789, "step": 7508 }, { "epoch": 0.41853854300206234, "grad_norm": 0.5731359720230103, "learning_rate": 6.416156087378415e-05, "loss": 1.6246, "step": 7509 }, { "epoch": 0.41859428125522546, "grad_norm": 0.5547140836715698, "learning_rate": 6.415302102669987e-05, "loss": 1.5967, "step": 7510 }, { "epoch": 0.4186500195083886, "grad_norm": 0.5709370970726013, "learning_rate": 6.414448073076429e-05, "loss": 1.6613, "step": 7511 }, { "epoch": 0.4187057577615518, "grad_norm": 0.5591392517089844, "learning_rate": 6.413593998624824e-05, "loss": 1.709, "step": 7512 }, { "epoch": 0.4187614960147149, "grad_norm": 0.5560973286628723, "learning_rate": 6.41273987934226e-05, "loss": 1.6281, "step": 7513 }, { "epoch": 0.41881723426787804, "grad_norm": 0.5822799205780029, "learning_rate": 6.411885715255823e-05, "loss": 1.7274, "step": 7514 }, { "epoch": 0.41887297252104116, "grad_norm": 0.5955770611763, "learning_rate": 6.411031506392605e-05, "loss": 1.6704, "step": 7515 }, { "epoch": 0.41892871077420435, "grad_norm": 0.5852923393249512, "learning_rate": 6.410177252779692e-05, "loss": 1.7526, "step": 7516 }, { "epoch": 0.4189844490273675, "grad_norm": 0.5543795228004456, "learning_rate": 6.409322954444179e-05, "loss": 1.5793, "step": 7517 }, { "epoch": 0.4190401872805306, "grad_norm": 0.5983227491378784, "learning_rate": 6.408468611413159e-05, "loss": 1.8319, "step": 7518 }, { "epoch": 0.4190959255336938, "grad_norm": 0.5510286688804626, "learning_rate": 6.407614223713727e-05, "loss": 1.6506, "step": 7519 }, { "epoch": 0.4191516637868569, "grad_norm": 0.5010602474212646, "learning_rate": 6.40675979137298e-05, "loss": 1.5807, "step": 7520 }, { "epoch": 0.41920740204002005, "grad_norm": 0.5825363397598267, "learning_rate": 6.405905314418013e-05, "loss": 1.6839, "step": 7521 }, { "epoch": 0.41926314029318323, "grad_norm": 0.5282953977584839, "learning_rate": 6.405050792875926e-05, "loss": 1.5602, "step": 7522 }, { "epoch": 0.41931887854634636, "grad_norm": 0.5378554463386536, "learning_rate": 6.40419622677382e-05, "loss": 1.5204, "step": 7523 }, { "epoch": 0.4193746167995095, "grad_norm": 0.548743486404419, "learning_rate": 6.403341616138797e-05, "loss": 1.7654, "step": 7524 }, { "epoch": 0.4194303550526727, "grad_norm": 0.5437180399894714, "learning_rate": 6.40248696099796e-05, "loss": 1.7341, "step": 7525 }, { "epoch": 0.4194860933058358, "grad_norm": 0.7081752419471741, "learning_rate": 6.401632261378414e-05, "loss": 1.3932, "step": 7526 }, { "epoch": 0.41954183155899893, "grad_norm": 0.6215348243713379, "learning_rate": 6.400777517307265e-05, "loss": 1.9211, "step": 7527 }, { "epoch": 0.41959756981216206, "grad_norm": 0.5972661375999451, "learning_rate": 6.39992272881162e-05, "loss": 1.848, "step": 7528 }, { "epoch": 0.41965330806532525, "grad_norm": 0.5357066988945007, "learning_rate": 6.399067895918587e-05, "loss": 1.6233, "step": 7529 }, { "epoch": 0.4197090463184884, "grad_norm": 0.5154542922973633, "learning_rate": 6.39821301865528e-05, "loss": 1.578, "step": 7530 }, { "epoch": 0.4197647845716515, "grad_norm": 0.524694561958313, "learning_rate": 6.397358097048806e-05, "loss": 1.6923, "step": 7531 }, { "epoch": 0.4198205228248147, "grad_norm": 0.5902459025382996, "learning_rate": 6.39650313112628e-05, "loss": 1.7314, "step": 7532 }, { "epoch": 0.4198762610779778, "grad_norm": 0.5320487022399902, "learning_rate": 6.39564812091482e-05, "loss": 1.6396, "step": 7533 }, { "epoch": 0.41993199933114095, "grad_norm": 0.5881032943725586, "learning_rate": 6.394793066441534e-05, "loss": 1.8865, "step": 7534 }, { "epoch": 0.41998773758430413, "grad_norm": 0.5616896748542786, "learning_rate": 6.393937967733548e-05, "loss": 1.8735, "step": 7535 }, { "epoch": 0.42004347583746726, "grad_norm": 0.5341779589653015, "learning_rate": 6.393082824817974e-05, "loss": 1.635, "step": 7536 }, { "epoch": 0.4200992140906304, "grad_norm": 0.5636286735534668, "learning_rate": 6.392227637721937e-05, "loss": 1.797, "step": 7537 }, { "epoch": 0.4201549523437935, "grad_norm": 0.5334611535072327, "learning_rate": 6.391372406472557e-05, "loss": 1.6705, "step": 7538 }, { "epoch": 0.4202106905969567, "grad_norm": 0.588848888874054, "learning_rate": 6.390517131096955e-05, "loss": 1.7877, "step": 7539 }, { "epoch": 0.42026642885011983, "grad_norm": 0.5427910685539246, "learning_rate": 6.389661811622258e-05, "loss": 1.5672, "step": 7540 }, { "epoch": 0.42032216710328296, "grad_norm": 0.6046989560127258, "learning_rate": 6.388806448075591e-05, "loss": 1.8186, "step": 7541 }, { "epoch": 0.42037790535644615, "grad_norm": 0.5373850464820862, "learning_rate": 6.38795104048408e-05, "loss": 1.5539, "step": 7542 }, { "epoch": 0.4204336436096093, "grad_norm": 0.5726231336593628, "learning_rate": 6.387095588874854e-05, "loss": 1.6383, "step": 7543 }, { "epoch": 0.4204893818627724, "grad_norm": 0.5964796543121338, "learning_rate": 6.386240093275044e-05, "loss": 1.9338, "step": 7544 }, { "epoch": 0.4205451201159356, "grad_norm": 0.5379793047904968, "learning_rate": 6.385384553711779e-05, "loss": 1.5479, "step": 7545 }, { "epoch": 0.4206008583690987, "grad_norm": 0.5321194529533386, "learning_rate": 6.384528970212196e-05, "loss": 1.6119, "step": 7546 }, { "epoch": 0.42065659662226185, "grad_norm": 0.6583168506622314, "learning_rate": 6.383673342803424e-05, "loss": 1.7555, "step": 7547 }, { "epoch": 0.42071233487542503, "grad_norm": 0.5755535364151001, "learning_rate": 6.382817671512603e-05, "loss": 1.629, "step": 7548 }, { "epoch": 0.42076807312858816, "grad_norm": 0.614747941493988, "learning_rate": 6.381961956366865e-05, "loss": 2.0066, "step": 7549 }, { "epoch": 0.4208238113817513, "grad_norm": 0.5643095374107361, "learning_rate": 6.381106197393353e-05, "loss": 1.7497, "step": 7550 }, { "epoch": 0.4208795496349144, "grad_norm": 0.5332757234573364, "learning_rate": 6.380250394619205e-05, "loss": 1.4505, "step": 7551 }, { "epoch": 0.4209352878880776, "grad_norm": 0.5462849736213684, "learning_rate": 6.379394548071563e-05, "loss": 1.7164, "step": 7552 }, { "epoch": 0.42099102614124073, "grad_norm": 0.5277321338653564, "learning_rate": 6.378538657777565e-05, "loss": 1.4521, "step": 7553 }, { "epoch": 0.42104676439440386, "grad_norm": 0.5687193274497986, "learning_rate": 6.37768272376436e-05, "loss": 1.6832, "step": 7554 }, { "epoch": 0.42110250264756705, "grad_norm": 0.5538173913955688, "learning_rate": 6.376826746059092e-05, "loss": 1.5916, "step": 7555 }, { "epoch": 0.4211582409007302, "grad_norm": 0.5794023871421814, "learning_rate": 6.375970724688906e-05, "loss": 1.5985, "step": 7556 }, { "epoch": 0.4212139791538933, "grad_norm": 0.534807026386261, "learning_rate": 6.375114659680951e-05, "loss": 1.5822, "step": 7557 }, { "epoch": 0.4212697174070565, "grad_norm": 0.5474613308906555, "learning_rate": 6.374258551062378e-05, "loss": 1.7155, "step": 7558 }, { "epoch": 0.4213254556602196, "grad_norm": 0.558594286441803, "learning_rate": 6.373402398860336e-05, "loss": 1.7239, "step": 7559 }, { "epoch": 0.42138119391338275, "grad_norm": 0.6263135671615601, "learning_rate": 6.372546203101977e-05, "loss": 1.8782, "step": 7560 }, { "epoch": 0.4214369321665459, "grad_norm": 0.5759534239768982, "learning_rate": 6.371689963814455e-05, "loss": 1.798, "step": 7561 }, { "epoch": 0.42149267041970906, "grad_norm": 0.582333505153656, "learning_rate": 6.370833681024924e-05, "loss": 1.679, "step": 7562 }, { "epoch": 0.4215484086728722, "grad_norm": 0.5175591707229614, "learning_rate": 6.369977354760541e-05, "loss": 1.6172, "step": 7563 }, { "epoch": 0.4216041469260353, "grad_norm": 0.6253464818000793, "learning_rate": 6.369120985048464e-05, "loss": 1.8897, "step": 7564 }, { "epoch": 0.4216598851791985, "grad_norm": 0.6171419024467468, "learning_rate": 6.368264571915854e-05, "loss": 1.9296, "step": 7565 }, { "epoch": 0.42171562343236163, "grad_norm": 0.5854969620704651, "learning_rate": 6.367408115389868e-05, "loss": 1.8127, "step": 7566 }, { "epoch": 0.42177136168552476, "grad_norm": 0.5167074203491211, "learning_rate": 6.366551615497669e-05, "loss": 1.4419, "step": 7567 }, { "epoch": 0.42182709993868794, "grad_norm": 0.5605902075767517, "learning_rate": 6.36569507226642e-05, "loss": 1.5106, "step": 7568 }, { "epoch": 0.4218828381918511, "grad_norm": 0.5542864799499512, "learning_rate": 6.364838485723286e-05, "loss": 1.6104, "step": 7569 }, { "epoch": 0.4219385764450142, "grad_norm": 0.5589380860328674, "learning_rate": 6.363981855895433e-05, "loss": 1.8112, "step": 7570 }, { "epoch": 0.4219943146981774, "grad_norm": 0.5342586040496826, "learning_rate": 6.363125182810028e-05, "loss": 1.668, "step": 7571 }, { "epoch": 0.4220500529513405, "grad_norm": 0.5474408268928528, "learning_rate": 6.36226846649424e-05, "loss": 1.477, "step": 7572 }, { "epoch": 0.42210579120450364, "grad_norm": 0.549768328666687, "learning_rate": 6.361411706975237e-05, "loss": 1.6127, "step": 7573 }, { "epoch": 0.4221615294576668, "grad_norm": 0.5820984244346619, "learning_rate": 6.360554904280196e-05, "loss": 1.7687, "step": 7574 }, { "epoch": 0.42221726771082996, "grad_norm": 0.5574761033058167, "learning_rate": 6.359698058436282e-05, "loss": 1.7282, "step": 7575 }, { "epoch": 0.4222730059639931, "grad_norm": 0.5506951808929443, "learning_rate": 6.358841169470676e-05, "loss": 1.6214, "step": 7576 }, { "epoch": 0.4223287442171562, "grad_norm": 0.5659124851226807, "learning_rate": 6.35798423741055e-05, "loss": 1.6966, "step": 7577 }, { "epoch": 0.4223844824703194, "grad_norm": 0.5484572052955627, "learning_rate": 6.357127262283081e-05, "loss": 1.6683, "step": 7578 }, { "epoch": 0.42244022072348253, "grad_norm": 0.4761580526828766, "learning_rate": 6.356270244115448e-05, "loss": 1.3579, "step": 7579 }, { "epoch": 0.42249595897664566, "grad_norm": 0.5656337738037109, "learning_rate": 6.355413182934831e-05, "loss": 1.7506, "step": 7580 }, { "epoch": 0.42255169722980884, "grad_norm": 0.6253755688667297, "learning_rate": 6.35455607876841e-05, "loss": 1.5443, "step": 7581 }, { "epoch": 0.42260743548297197, "grad_norm": 0.5522517561912537, "learning_rate": 6.353698931643368e-05, "loss": 1.7318, "step": 7582 }, { "epoch": 0.4226631737361351, "grad_norm": 0.5824682712554932, "learning_rate": 6.352841741586888e-05, "loss": 1.9499, "step": 7583 }, { "epoch": 0.42271891198929823, "grad_norm": 0.6166448593139648, "learning_rate": 6.351984508626155e-05, "loss": 1.6598, "step": 7584 }, { "epoch": 0.4227746502424614, "grad_norm": 0.6640730500221252, "learning_rate": 6.351127232788357e-05, "loss": 1.9022, "step": 7585 }, { "epoch": 0.42283038849562454, "grad_norm": 0.5395544171333313, "learning_rate": 6.350269914100681e-05, "loss": 1.8523, "step": 7586 }, { "epoch": 0.42288612674878767, "grad_norm": 0.597951352596283, "learning_rate": 6.349412552590317e-05, "loss": 1.7423, "step": 7587 }, { "epoch": 0.42294186500195086, "grad_norm": 0.5310340523719788, "learning_rate": 6.348555148284452e-05, "loss": 1.6669, "step": 7588 }, { "epoch": 0.422997603255114, "grad_norm": 0.563275933265686, "learning_rate": 6.347697701210281e-05, "loss": 1.8138, "step": 7589 }, { "epoch": 0.4230533415082771, "grad_norm": 0.5225051641464233, "learning_rate": 6.346840211394998e-05, "loss": 1.5228, "step": 7590 }, { "epoch": 0.4231090797614403, "grad_norm": 0.5949013233184814, "learning_rate": 6.345982678865795e-05, "loss": 1.8378, "step": 7591 }, { "epoch": 0.4231648180146034, "grad_norm": 0.6444050073623657, "learning_rate": 6.345125103649869e-05, "loss": 1.9561, "step": 7592 }, { "epoch": 0.42322055626776656, "grad_norm": 0.538077712059021, "learning_rate": 6.344267485774417e-05, "loss": 1.6172, "step": 7593 }, { "epoch": 0.42327629452092974, "grad_norm": 0.5770418047904968, "learning_rate": 6.34340982526664e-05, "loss": 1.7064, "step": 7594 }, { "epoch": 0.42333203277409287, "grad_norm": 0.5491243600845337, "learning_rate": 6.342552122153734e-05, "loss": 1.5869, "step": 7595 }, { "epoch": 0.423387771027256, "grad_norm": 0.5911741852760315, "learning_rate": 6.3416943764629e-05, "loss": 1.4539, "step": 7596 }, { "epoch": 0.42344350928041913, "grad_norm": 0.5493375062942505, "learning_rate": 6.340836588221347e-05, "loss": 1.2324, "step": 7597 }, { "epoch": 0.4234992475335823, "grad_norm": 0.5272154808044434, "learning_rate": 6.339978757456274e-05, "loss": 1.7336, "step": 7598 }, { "epoch": 0.42355498578674544, "grad_norm": 0.6132648587226868, "learning_rate": 6.339120884194886e-05, "loss": 1.8399, "step": 7599 }, { "epoch": 0.42361072403990857, "grad_norm": 0.6002299189567566, "learning_rate": 6.338262968464394e-05, "loss": 1.7355, "step": 7600 }, { "epoch": 0.42366646229307175, "grad_norm": 0.5747309327125549, "learning_rate": 6.337405010292e-05, "loss": 1.5466, "step": 7601 }, { "epoch": 0.4237222005462349, "grad_norm": 0.6044133901596069, "learning_rate": 6.336547009704919e-05, "loss": 1.894, "step": 7602 }, { "epoch": 0.423777938799398, "grad_norm": 0.6029581427574158, "learning_rate": 6.335688966730358e-05, "loss": 1.7874, "step": 7603 }, { "epoch": 0.4238336770525612, "grad_norm": 0.5374162197113037, "learning_rate": 6.334830881395533e-05, "loss": 1.4537, "step": 7604 }, { "epoch": 0.4238894153057243, "grad_norm": 0.5794885158538818, "learning_rate": 6.333972753727653e-05, "loss": 1.6731, "step": 7605 }, { "epoch": 0.42394515355888746, "grad_norm": 0.6136147379875183, "learning_rate": 6.333114583753936e-05, "loss": 2.0005, "step": 7606 }, { "epoch": 0.4240008918120506, "grad_norm": 0.6465775370597839, "learning_rate": 6.332256371501597e-05, "loss": 1.7024, "step": 7607 }, { "epoch": 0.42405663006521377, "grad_norm": 0.4953748285770416, "learning_rate": 6.331398116997851e-05, "loss": 1.4046, "step": 7608 }, { "epoch": 0.4241123683183769, "grad_norm": 0.5147947669029236, "learning_rate": 6.330539820269921e-05, "loss": 1.7066, "step": 7609 }, { "epoch": 0.42416810657154, "grad_norm": 0.5854727029800415, "learning_rate": 6.329681481345026e-05, "loss": 1.7871, "step": 7610 }, { "epoch": 0.4242238448247032, "grad_norm": 0.5421152710914612, "learning_rate": 6.328823100250386e-05, "loss": 1.6782, "step": 7611 }, { "epoch": 0.42427958307786634, "grad_norm": 0.5201201438903809, "learning_rate": 6.327964677013224e-05, "loss": 1.6405, "step": 7612 }, { "epoch": 0.42433532133102947, "grad_norm": 0.5656992197036743, "learning_rate": 6.327106211660769e-05, "loss": 1.798, "step": 7613 }, { "epoch": 0.42439105958419265, "grad_norm": 0.5751951336860657, "learning_rate": 6.326247704220239e-05, "loss": 1.6055, "step": 7614 }, { "epoch": 0.4244467978373558, "grad_norm": 0.546371579170227, "learning_rate": 6.325389154718865e-05, "loss": 1.7596, "step": 7615 }, { "epoch": 0.4245025360905189, "grad_norm": 0.5406731367111206, "learning_rate": 6.324530563183875e-05, "loss": 1.6401, "step": 7616 }, { "epoch": 0.4245582743436821, "grad_norm": 0.5809882879257202, "learning_rate": 6.323671929642498e-05, "loss": 1.868, "step": 7617 }, { "epoch": 0.4246140125968452, "grad_norm": 0.540643572807312, "learning_rate": 6.322813254121964e-05, "loss": 1.715, "step": 7618 }, { "epoch": 0.42466975085000835, "grad_norm": 0.5267550945281982, "learning_rate": 6.321954536649508e-05, "loss": 1.5837, "step": 7619 }, { "epoch": 0.4247254891031715, "grad_norm": 0.5602602958679199, "learning_rate": 6.32109577725236e-05, "loss": 1.7406, "step": 7620 }, { "epoch": 0.42478122735633467, "grad_norm": 0.5607280731201172, "learning_rate": 6.320236975957757e-05, "loss": 1.6099, "step": 7621 }, { "epoch": 0.4248369656094978, "grad_norm": 0.5364249348640442, "learning_rate": 6.319378132792935e-05, "loss": 1.5277, "step": 7622 }, { "epoch": 0.4248927038626609, "grad_norm": 0.5527327656745911, "learning_rate": 6.318519247785131e-05, "loss": 1.7702, "step": 7623 }, { "epoch": 0.4249484421158241, "grad_norm": 0.5770801901817322, "learning_rate": 6.317660320961585e-05, "loss": 1.6098, "step": 7624 }, { "epoch": 0.42500418036898724, "grad_norm": 0.5606113076210022, "learning_rate": 6.316801352349534e-05, "loss": 1.6451, "step": 7625 }, { "epoch": 0.42505991862215037, "grad_norm": 0.6124593615531921, "learning_rate": 6.315942341976223e-05, "loss": 1.9987, "step": 7626 }, { "epoch": 0.42511565687531355, "grad_norm": 0.5524605512619019, "learning_rate": 6.315083289868892e-05, "loss": 1.6352, "step": 7627 }, { "epoch": 0.4251713951284767, "grad_norm": 0.5734837651252747, "learning_rate": 6.314224196054787e-05, "loss": 1.8757, "step": 7628 }, { "epoch": 0.4252271333816398, "grad_norm": 0.64513099193573, "learning_rate": 6.313365060561153e-05, "loss": 2.0665, "step": 7629 }, { "epoch": 0.42528287163480294, "grad_norm": 0.5457690954208374, "learning_rate": 6.312505883415238e-05, "loss": 1.6602, "step": 7630 }, { "epoch": 0.4253386098879661, "grad_norm": 0.6007886528968811, "learning_rate": 6.311646664644288e-05, "loss": 1.7241, "step": 7631 }, { "epoch": 0.42539434814112925, "grad_norm": 0.5715931057929993, "learning_rate": 6.310787404275553e-05, "loss": 1.7581, "step": 7632 }, { "epoch": 0.4254500863942924, "grad_norm": 0.5710930228233337, "learning_rate": 6.309928102336284e-05, "loss": 1.7147, "step": 7633 }, { "epoch": 0.42550582464745557, "grad_norm": 0.5583118796348572, "learning_rate": 6.309068758853732e-05, "loss": 1.6103, "step": 7634 }, { "epoch": 0.4255615629006187, "grad_norm": 0.5537952184677124, "learning_rate": 6.308209373855154e-05, "loss": 1.6947, "step": 7635 }, { "epoch": 0.4256173011537818, "grad_norm": 0.5451967716217041, "learning_rate": 6.3073499473678e-05, "loss": 1.6384, "step": 7636 }, { "epoch": 0.425673039406945, "grad_norm": 0.5317254066467285, "learning_rate": 6.30649047941893e-05, "loss": 1.5643, "step": 7637 }, { "epoch": 0.42572877766010814, "grad_norm": 0.5423393845558167, "learning_rate": 6.305630970035796e-05, "loss": 1.5257, "step": 7638 }, { "epoch": 0.42578451591327127, "grad_norm": 0.5897427797317505, "learning_rate": 6.304771419245663e-05, "loss": 1.8738, "step": 7639 }, { "epoch": 0.42584025416643445, "grad_norm": 0.5559675097465515, "learning_rate": 6.303911827075786e-05, "loss": 1.8562, "step": 7640 }, { "epoch": 0.4258959924195976, "grad_norm": 0.5857858061790466, "learning_rate": 6.303052193553429e-05, "loss": 1.7146, "step": 7641 }, { "epoch": 0.4259517306727607, "grad_norm": 0.6495271325111389, "learning_rate": 6.302192518705853e-05, "loss": 1.7639, "step": 7642 }, { "epoch": 0.42600746892592384, "grad_norm": 0.5638108253479004, "learning_rate": 6.301332802560325e-05, "loss": 1.5804, "step": 7643 }, { "epoch": 0.426063207179087, "grad_norm": 0.5066633224487305, "learning_rate": 6.300473045144107e-05, "loss": 1.4344, "step": 7644 }, { "epoch": 0.42611894543225015, "grad_norm": 0.5637665390968323, "learning_rate": 6.299613246484464e-05, "loss": 1.6573, "step": 7645 }, { "epoch": 0.4261746836854133, "grad_norm": 0.5206940174102783, "learning_rate": 6.298753406608668e-05, "loss": 1.5995, "step": 7646 }, { "epoch": 0.42623042193857646, "grad_norm": 0.5374553799629211, "learning_rate": 6.297893525543986e-05, "loss": 1.7107, "step": 7647 }, { "epoch": 0.4262861601917396, "grad_norm": 0.5552041530609131, "learning_rate": 6.297033603317689e-05, "loss": 1.6734, "step": 7648 }, { "epoch": 0.4263418984449027, "grad_norm": 0.5269225239753723, "learning_rate": 6.296173639957045e-05, "loss": 1.64, "step": 7649 }, { "epoch": 0.4263976366980659, "grad_norm": 0.5553382635116577, "learning_rate": 6.295313635489335e-05, "loss": 1.3837, "step": 7650 }, { "epoch": 0.42645337495122904, "grad_norm": 0.5205674171447754, "learning_rate": 6.294453589941826e-05, "loss": 1.6142, "step": 7651 }, { "epoch": 0.42650911320439217, "grad_norm": 0.6198689937591553, "learning_rate": 6.2935935033418e-05, "loss": 1.7297, "step": 7652 }, { "epoch": 0.4265648514575553, "grad_norm": 0.556909441947937, "learning_rate": 6.292733375716526e-05, "loss": 1.7119, "step": 7653 }, { "epoch": 0.4266205897107185, "grad_norm": 0.5496246218681335, "learning_rate": 6.291873207093287e-05, "loss": 1.6478, "step": 7654 }, { "epoch": 0.4266763279638816, "grad_norm": 0.5758047103881836, "learning_rate": 6.291012997499362e-05, "loss": 1.8439, "step": 7655 }, { "epoch": 0.42673206621704474, "grad_norm": 0.5833730697631836, "learning_rate": 6.290152746962034e-05, "loss": 1.6251, "step": 7656 }, { "epoch": 0.4267878044702079, "grad_norm": 0.509559690952301, "learning_rate": 6.289292455508582e-05, "loss": 1.6364, "step": 7657 }, { "epoch": 0.42684354272337105, "grad_norm": 0.5244433879852295, "learning_rate": 6.28843212316629e-05, "loss": 1.4855, "step": 7658 }, { "epoch": 0.4268992809765342, "grad_norm": 0.5262942314147949, "learning_rate": 6.287571749962444e-05, "loss": 1.6034, "step": 7659 }, { "epoch": 0.42695501922969736, "grad_norm": 0.592850923538208, "learning_rate": 6.286711335924326e-05, "loss": 2.0333, "step": 7660 }, { "epoch": 0.4270107574828605, "grad_norm": 0.5585233569145203, "learning_rate": 6.28585088107923e-05, "loss": 1.7037, "step": 7661 }, { "epoch": 0.4270664957360236, "grad_norm": 0.5201496481895447, "learning_rate": 6.284990385454439e-05, "loss": 1.5226, "step": 7662 }, { "epoch": 0.4271222339891868, "grad_norm": 0.5410779714584351, "learning_rate": 6.284129849077247e-05, "loss": 1.6186, "step": 7663 }, { "epoch": 0.42717797224234993, "grad_norm": 0.5643417835235596, "learning_rate": 6.283269271974941e-05, "loss": 1.7211, "step": 7664 }, { "epoch": 0.42723371049551306, "grad_norm": 0.5603637099266052, "learning_rate": 6.282408654174818e-05, "loss": 1.6978, "step": 7665 }, { "epoch": 0.4272894487486762, "grad_norm": 0.5303884744644165, "learning_rate": 6.281547995704168e-05, "loss": 1.5544, "step": 7666 }, { "epoch": 0.4273451870018394, "grad_norm": 0.5895907282829285, "learning_rate": 6.280687296590287e-05, "loss": 1.697, "step": 7667 }, { "epoch": 0.4274009252550025, "grad_norm": 0.566055953502655, "learning_rate": 6.279826556860472e-05, "loss": 1.6596, "step": 7668 }, { "epoch": 0.42745666350816564, "grad_norm": 0.5401179790496826, "learning_rate": 6.278965776542021e-05, "loss": 1.7029, "step": 7669 }, { "epoch": 0.4275124017613288, "grad_norm": 0.6178464889526367, "learning_rate": 6.278104955662234e-05, "loss": 1.7344, "step": 7670 }, { "epoch": 0.42756814001449195, "grad_norm": 0.5440572500228882, "learning_rate": 6.277244094248407e-05, "loss": 1.7182, "step": 7671 }, { "epoch": 0.4276238782676551, "grad_norm": 0.5953531265258789, "learning_rate": 6.276383192327846e-05, "loss": 1.7045, "step": 7672 }, { "epoch": 0.42767961652081826, "grad_norm": 0.5182901620864868, "learning_rate": 6.27552224992785e-05, "loss": 1.5657, "step": 7673 }, { "epoch": 0.4277353547739814, "grad_norm": 0.5608685612678528, "learning_rate": 6.274661267075728e-05, "loss": 1.701, "step": 7674 }, { "epoch": 0.4277910930271445, "grad_norm": 0.5933842658996582, "learning_rate": 6.27380024379878e-05, "loss": 1.804, "step": 7675 }, { "epoch": 0.42784683128030765, "grad_norm": 0.586521327495575, "learning_rate": 6.272939180124317e-05, "loss": 1.7744, "step": 7676 }, { "epoch": 0.42790256953347083, "grad_norm": 0.6096509695053101, "learning_rate": 6.272078076079644e-05, "loss": 1.9837, "step": 7677 }, { "epoch": 0.42795830778663396, "grad_norm": 0.6212565302848816, "learning_rate": 6.27121693169207e-05, "loss": 1.8042, "step": 7678 }, { "epoch": 0.4280140460397971, "grad_norm": 0.5542432069778442, "learning_rate": 6.270355746988908e-05, "loss": 1.6222, "step": 7679 }, { "epoch": 0.4280697842929603, "grad_norm": 0.5913196802139282, "learning_rate": 6.269494521997467e-05, "loss": 1.6313, "step": 7680 }, { "epoch": 0.4281255225461234, "grad_norm": 0.5573778748512268, "learning_rate": 6.268633256745063e-05, "loss": 1.7364, "step": 7681 }, { "epoch": 0.42818126079928653, "grad_norm": 0.5151004195213318, "learning_rate": 6.267771951259009e-05, "loss": 1.8938, "step": 7682 }, { "epoch": 0.4282369990524497, "grad_norm": 0.5424497127532959, "learning_rate": 6.26691060556662e-05, "loss": 1.706, "step": 7683 }, { "epoch": 0.42829273730561285, "grad_norm": 0.5353766083717346, "learning_rate": 6.266049219695211e-05, "loss": 1.6015, "step": 7684 }, { "epoch": 0.428348475558776, "grad_norm": 0.5848101377487183, "learning_rate": 6.265187793672105e-05, "loss": 1.9252, "step": 7685 }, { "epoch": 0.42840421381193916, "grad_norm": 0.5816083550453186, "learning_rate": 6.264326327524617e-05, "loss": 1.7076, "step": 7686 }, { "epoch": 0.4284599520651023, "grad_norm": 0.595378577709198, "learning_rate": 6.263464821280071e-05, "loss": 1.8343, "step": 7687 }, { "epoch": 0.4285156903182654, "grad_norm": 0.5391969084739685, "learning_rate": 6.262603274965786e-05, "loss": 1.5771, "step": 7688 }, { "epoch": 0.42857142857142855, "grad_norm": 0.5316036939620972, "learning_rate": 6.261741688609087e-05, "loss": 1.6646, "step": 7689 }, { "epoch": 0.42862716682459173, "grad_norm": 0.5671446323394775, "learning_rate": 6.260880062237299e-05, "loss": 1.8235, "step": 7690 }, { "epoch": 0.42868290507775486, "grad_norm": 0.5752628445625305, "learning_rate": 6.260018395877747e-05, "loss": 1.7776, "step": 7691 }, { "epoch": 0.428738643330918, "grad_norm": 0.5416520833969116, "learning_rate": 6.259156689557757e-05, "loss": 1.5817, "step": 7692 }, { "epoch": 0.4287943815840812, "grad_norm": 0.5795433521270752, "learning_rate": 6.258294943304656e-05, "loss": 1.6236, "step": 7693 }, { "epoch": 0.4288501198372443, "grad_norm": 0.5906192064285278, "learning_rate": 6.257433157145779e-05, "loss": 1.8114, "step": 7694 }, { "epoch": 0.42890585809040743, "grad_norm": 0.589847207069397, "learning_rate": 6.256571331108454e-05, "loss": 1.7796, "step": 7695 }, { "epoch": 0.4289615963435706, "grad_norm": 0.5236275792121887, "learning_rate": 6.25570946522001e-05, "loss": 1.4089, "step": 7696 }, { "epoch": 0.42901733459673375, "grad_norm": 0.5735291838645935, "learning_rate": 6.254847559507783e-05, "loss": 1.8332, "step": 7697 }, { "epoch": 0.4290730728498969, "grad_norm": 0.5835584998130798, "learning_rate": 6.253985613999111e-05, "loss": 1.7905, "step": 7698 }, { "epoch": 0.42912881110306, "grad_norm": 0.5706406831741333, "learning_rate": 6.253123628721324e-05, "loss": 1.7185, "step": 7699 }, { "epoch": 0.4291845493562232, "grad_norm": 0.6053869724273682, "learning_rate": 6.252261603701762e-05, "loss": 1.6092, "step": 7700 }, { "epoch": 0.4292402876093863, "grad_norm": 0.559517502784729, "learning_rate": 6.251399538967764e-05, "loss": 1.6353, "step": 7701 }, { "epoch": 0.42929602586254945, "grad_norm": 0.5170453190803528, "learning_rate": 6.250537434546668e-05, "loss": 1.5933, "step": 7702 }, { "epoch": 0.42935176411571263, "grad_norm": 0.5452066659927368, "learning_rate": 6.249675290465817e-05, "loss": 1.5875, "step": 7703 }, { "epoch": 0.42940750236887576, "grad_norm": 0.5306586623191833, "learning_rate": 6.248813106752551e-05, "loss": 1.4277, "step": 7704 }, { "epoch": 0.4294632406220389, "grad_norm": 0.601926863193512, "learning_rate": 6.247950883434214e-05, "loss": 1.667, "step": 7705 }, { "epoch": 0.4295189788752021, "grad_norm": 0.6103541254997253, "learning_rate": 6.24708862053815e-05, "loss": 1.6387, "step": 7706 }, { "epoch": 0.4295747171283652, "grad_norm": 0.5850464701652527, "learning_rate": 6.246226318091708e-05, "loss": 1.5703, "step": 7707 }, { "epoch": 0.42963045538152833, "grad_norm": 0.564311683177948, "learning_rate": 6.245363976122232e-05, "loss": 1.4084, "step": 7708 }, { "epoch": 0.4296861936346915, "grad_norm": 0.5692956447601318, "learning_rate": 6.244501594657073e-05, "loss": 1.6056, "step": 7709 }, { "epoch": 0.42974193188785464, "grad_norm": 0.48438626527786255, "learning_rate": 6.243639173723577e-05, "loss": 1.3122, "step": 7710 }, { "epoch": 0.4297976701410178, "grad_norm": 0.5293724536895752, "learning_rate": 6.2427767133491e-05, "loss": 1.5922, "step": 7711 }, { "epoch": 0.4298534083941809, "grad_norm": 0.5632352232933044, "learning_rate": 6.241914213560988e-05, "loss": 1.7423, "step": 7712 }, { "epoch": 0.4299091466473441, "grad_norm": 0.5172026753425598, "learning_rate": 6.241051674386602e-05, "loss": 1.4298, "step": 7713 }, { "epoch": 0.4299648849005072, "grad_norm": 0.5803625583648682, "learning_rate": 6.24018909585329e-05, "loss": 1.6772, "step": 7714 }, { "epoch": 0.43002062315367034, "grad_norm": 0.530988335609436, "learning_rate": 6.239326477988413e-05, "loss": 1.7007, "step": 7715 }, { "epoch": 0.43007636140683353, "grad_norm": 0.5132483243942261, "learning_rate": 6.238463820819325e-05, "loss": 1.5829, "step": 7716 }, { "epoch": 0.43013209965999666, "grad_norm": 0.6094499230384827, "learning_rate": 6.237601124373385e-05, "loss": 1.7885, "step": 7717 }, { "epoch": 0.4301878379131598, "grad_norm": 0.5744908452033997, "learning_rate": 6.236738388677952e-05, "loss": 1.7993, "step": 7718 }, { "epoch": 0.43024357616632297, "grad_norm": 0.6198621392250061, "learning_rate": 6.23587561376039e-05, "loss": 1.8437, "step": 7719 }, { "epoch": 0.4302993144194861, "grad_norm": 0.5478682518005371, "learning_rate": 6.235012799648057e-05, "loss": 1.7246, "step": 7720 }, { "epoch": 0.43035505267264923, "grad_norm": 0.5738255381584167, "learning_rate": 6.23414994636832e-05, "loss": 1.7322, "step": 7721 }, { "epoch": 0.43041079092581236, "grad_norm": 0.6019119024276733, "learning_rate": 6.233287053948543e-05, "loss": 1.6743, "step": 7722 }, { "epoch": 0.43046652917897554, "grad_norm": 0.5403818488121033, "learning_rate": 6.23242412241609e-05, "loss": 1.5439, "step": 7723 }, { "epoch": 0.43052226743213867, "grad_norm": 0.5892661213874817, "learning_rate": 6.23156115179833e-05, "loss": 2.0254, "step": 7724 }, { "epoch": 0.4305780056853018, "grad_norm": 0.6273830533027649, "learning_rate": 6.230698142122629e-05, "loss": 1.6787, "step": 7725 }, { "epoch": 0.430633743938465, "grad_norm": 0.5560447573661804, "learning_rate": 6.229835093416361e-05, "loss": 1.711, "step": 7726 }, { "epoch": 0.4306894821916281, "grad_norm": 0.5284225344657898, "learning_rate": 6.228972005706893e-05, "loss": 1.5921, "step": 7727 }, { "epoch": 0.43074522044479124, "grad_norm": 0.5550575852394104, "learning_rate": 6.228108879021599e-05, "loss": 1.5798, "step": 7728 }, { "epoch": 0.43080095869795443, "grad_norm": 0.5931698083877563, "learning_rate": 6.22724571338785e-05, "loss": 2.0899, "step": 7729 }, { "epoch": 0.43085669695111756, "grad_norm": 0.5341006517410278, "learning_rate": 6.226382508833026e-05, "loss": 1.6937, "step": 7730 }, { "epoch": 0.4309124352042807, "grad_norm": 0.5837813019752502, "learning_rate": 6.225519265384495e-05, "loss": 1.7363, "step": 7731 }, { "epoch": 0.43096817345744387, "grad_norm": 0.5665456056594849, "learning_rate": 6.22465598306964e-05, "loss": 1.6438, "step": 7732 }, { "epoch": 0.431023911710607, "grad_norm": 0.7508494257926941, "learning_rate": 6.223792661915838e-05, "loss": 1.6701, "step": 7733 }, { "epoch": 0.43107964996377013, "grad_norm": 0.5742450952529907, "learning_rate": 6.222929301950466e-05, "loss": 1.6195, "step": 7734 }, { "epoch": 0.43113538821693326, "grad_norm": 0.5885428190231323, "learning_rate": 6.222065903200908e-05, "loss": 1.852, "step": 7735 }, { "epoch": 0.43119112647009644, "grad_norm": 0.6054401993751526, "learning_rate": 6.221202465694545e-05, "loss": 1.9739, "step": 7736 }, { "epoch": 0.43124686472325957, "grad_norm": 0.5252482891082764, "learning_rate": 6.22033898945876e-05, "loss": 1.5755, "step": 7737 }, { "epoch": 0.4313026029764227, "grad_norm": 0.5708329677581787, "learning_rate": 6.219475474520936e-05, "loss": 1.7666, "step": 7738 }, { "epoch": 0.4313583412295859, "grad_norm": 0.5406473278999329, "learning_rate": 6.218611920908461e-05, "loss": 1.6721, "step": 7739 }, { "epoch": 0.431414079482749, "grad_norm": 0.5870915055274963, "learning_rate": 6.21774832864872e-05, "loss": 1.635, "step": 7740 }, { "epoch": 0.43146981773591214, "grad_norm": 0.5580663681030273, "learning_rate": 6.216884697769104e-05, "loss": 1.7878, "step": 7741 }, { "epoch": 0.4315255559890753, "grad_norm": 0.6071598529815674, "learning_rate": 6.216021028296999e-05, "loss": 1.817, "step": 7742 }, { "epoch": 0.43158129424223846, "grad_norm": 0.5742529630661011, "learning_rate": 6.215157320259798e-05, "loss": 1.6086, "step": 7743 }, { "epoch": 0.4316370324954016, "grad_norm": 0.5802901387214661, "learning_rate": 6.214293573684889e-05, "loss": 1.7647, "step": 7744 }, { "epoch": 0.4316927707485647, "grad_norm": 0.6176155209541321, "learning_rate": 6.21342978859967e-05, "loss": 2.0043, "step": 7745 }, { "epoch": 0.4317485090017279, "grad_norm": 0.6097760200500488, "learning_rate": 6.212565965031532e-05, "loss": 1.7955, "step": 7746 }, { "epoch": 0.431804247254891, "grad_norm": 0.5612444877624512, "learning_rate": 6.211702103007871e-05, "loss": 1.6242, "step": 7747 }, { "epoch": 0.43185998550805416, "grad_norm": 0.6074878573417664, "learning_rate": 6.210838202556085e-05, "loss": 1.5951, "step": 7748 }, { "epoch": 0.43191572376121734, "grad_norm": 0.5827562808990479, "learning_rate": 6.209974263703569e-05, "loss": 1.849, "step": 7749 }, { "epoch": 0.43197146201438047, "grad_norm": 0.5888208746910095, "learning_rate": 6.209110286477727e-05, "loss": 1.7899, "step": 7750 }, { "epoch": 0.4320272002675436, "grad_norm": 0.5709846019744873, "learning_rate": 6.208246270905952e-05, "loss": 1.8588, "step": 7751 }, { "epoch": 0.4320829385207068, "grad_norm": 0.5687053203582764, "learning_rate": 6.207382217015655e-05, "loss": 1.7115, "step": 7752 }, { "epoch": 0.4321386767738699, "grad_norm": 0.5730668306350708, "learning_rate": 6.206518124834231e-05, "loss": 1.7556, "step": 7753 }, { "epoch": 0.43219441502703304, "grad_norm": 0.48593658208847046, "learning_rate": 6.205653994389087e-05, "loss": 1.4447, "step": 7754 }, { "epoch": 0.4322501532801962, "grad_norm": 0.5364407896995544, "learning_rate": 6.204789825707626e-05, "loss": 1.7097, "step": 7755 }, { "epoch": 0.43230589153335935, "grad_norm": 0.5474497079849243, "learning_rate": 6.203925618817258e-05, "loss": 1.6242, "step": 7756 }, { "epoch": 0.4323616297865225, "grad_norm": 0.5366718173027039, "learning_rate": 6.203061373745388e-05, "loss": 1.6055, "step": 7757 }, { "epoch": 0.4324173680396856, "grad_norm": 0.6138222813606262, "learning_rate": 6.202197090519428e-05, "loss": 1.6537, "step": 7758 }, { "epoch": 0.4324731062928488, "grad_norm": 0.5678575038909912, "learning_rate": 6.201332769166782e-05, "loss": 1.5895, "step": 7759 }, { "epoch": 0.4325288445460119, "grad_norm": 0.5866283178329468, "learning_rate": 6.200468409714866e-05, "loss": 1.6663, "step": 7760 }, { "epoch": 0.43258458279917505, "grad_norm": 0.5652245879173279, "learning_rate": 6.199604012191093e-05, "loss": 1.6446, "step": 7761 }, { "epoch": 0.43264032105233824, "grad_norm": 0.5838261842727661, "learning_rate": 6.198739576622872e-05, "loss": 1.8155, "step": 7762 }, { "epoch": 0.43269605930550137, "grad_norm": 0.537699818611145, "learning_rate": 6.197875103037623e-05, "loss": 1.6124, "step": 7763 }, { "epoch": 0.4327517975586645, "grad_norm": 0.6197475790977478, "learning_rate": 6.197010591462758e-05, "loss": 1.72, "step": 7764 }, { "epoch": 0.4328075358118277, "grad_norm": 0.5581753253936768, "learning_rate": 6.196146041925697e-05, "loss": 1.6948, "step": 7765 }, { "epoch": 0.4328632740649908, "grad_norm": 0.5555060505867004, "learning_rate": 6.195281454453858e-05, "loss": 1.5966, "step": 7766 }, { "epoch": 0.43291901231815394, "grad_norm": 0.5592203140258789, "learning_rate": 6.19441682907466e-05, "loss": 1.8594, "step": 7767 }, { "epoch": 0.43297475057131707, "grad_norm": 0.5492338538169861, "learning_rate": 6.193552165815525e-05, "loss": 1.707, "step": 7768 }, { "epoch": 0.43303048882448025, "grad_norm": 0.5119403600692749, "learning_rate": 6.192687464703873e-05, "loss": 1.3713, "step": 7769 }, { "epoch": 0.4330862270776434, "grad_norm": 0.6076398491859436, "learning_rate": 6.191822725767129e-05, "loss": 1.7667, "step": 7770 }, { "epoch": 0.4331419653308065, "grad_norm": 0.5796701312065125, "learning_rate": 6.190957949032716e-05, "loss": 1.688, "step": 7771 }, { "epoch": 0.4331977035839697, "grad_norm": 0.5363877415657043, "learning_rate": 6.190093134528061e-05, "loss": 1.6081, "step": 7772 }, { "epoch": 0.4332534418371328, "grad_norm": 0.5938536524772644, "learning_rate": 6.189228282280592e-05, "loss": 1.7503, "step": 7773 }, { "epoch": 0.43330918009029595, "grad_norm": 0.5643225312232971, "learning_rate": 6.188363392317734e-05, "loss": 1.7848, "step": 7774 }, { "epoch": 0.43336491834345914, "grad_norm": 0.5852196216583252, "learning_rate": 6.187498464666917e-05, "loss": 1.8112, "step": 7775 }, { "epoch": 0.43342065659662227, "grad_norm": 0.5774117112159729, "learning_rate": 6.186633499355576e-05, "loss": 1.5268, "step": 7776 }, { "epoch": 0.4334763948497854, "grad_norm": 0.5480836033821106, "learning_rate": 6.185768496411135e-05, "loss": 1.6839, "step": 7777 }, { "epoch": 0.4335321331029486, "grad_norm": 0.5210850834846497, "learning_rate": 6.184903455861032e-05, "loss": 1.592, "step": 7778 }, { "epoch": 0.4335878713561117, "grad_norm": 0.532539427280426, "learning_rate": 6.1840383777327e-05, "loss": 1.7992, "step": 7779 }, { "epoch": 0.43364360960927484, "grad_norm": 0.5546075105667114, "learning_rate": 6.183173262053575e-05, "loss": 1.76, "step": 7780 }, { "epoch": 0.43369934786243797, "grad_norm": 0.5634498000144958, "learning_rate": 6.182308108851091e-05, "loss": 1.5548, "step": 7781 }, { "epoch": 0.43375508611560115, "grad_norm": 0.5091983079910278, "learning_rate": 6.18144291815269e-05, "loss": 1.4981, "step": 7782 }, { "epoch": 0.4338108243687643, "grad_norm": 0.550807535648346, "learning_rate": 6.180577689985805e-05, "loss": 1.6661, "step": 7783 }, { "epoch": 0.4338665626219274, "grad_norm": 0.5441664457321167, "learning_rate": 6.179712424377879e-05, "loss": 1.6262, "step": 7784 }, { "epoch": 0.4339223008750906, "grad_norm": 0.620506227016449, "learning_rate": 6.178847121356353e-05, "loss": 1.9091, "step": 7785 }, { "epoch": 0.4339780391282537, "grad_norm": 0.6028100252151489, "learning_rate": 6.17798178094867e-05, "loss": 1.7357, "step": 7786 }, { "epoch": 0.43403377738141685, "grad_norm": 0.549159049987793, "learning_rate": 6.177116403182274e-05, "loss": 1.6313, "step": 7787 }, { "epoch": 0.43408951563458004, "grad_norm": 0.5400141477584839, "learning_rate": 6.176250988084608e-05, "loss": 1.605, "step": 7788 }, { "epoch": 0.43414525388774317, "grad_norm": 0.5363699793815613, "learning_rate": 6.17538553568312e-05, "loss": 1.5072, "step": 7789 }, { "epoch": 0.4342009921409063, "grad_norm": 0.5816105604171753, "learning_rate": 6.174520046005253e-05, "loss": 1.769, "step": 7790 }, { "epoch": 0.4342567303940694, "grad_norm": 0.5653383731842041, "learning_rate": 6.17365451907846e-05, "loss": 1.6427, "step": 7791 }, { "epoch": 0.4343124686472326, "grad_norm": 0.5933492183685303, "learning_rate": 6.172788954930188e-05, "loss": 1.7614, "step": 7792 }, { "epoch": 0.43436820690039574, "grad_norm": 0.5355760455131531, "learning_rate": 6.171923353587888e-05, "loss": 1.7932, "step": 7793 }, { "epoch": 0.43442394515355887, "grad_norm": 0.5630636811256409, "learning_rate": 6.171057715079012e-05, "loss": 1.5032, "step": 7794 }, { "epoch": 0.43447968340672205, "grad_norm": 0.5832585692405701, "learning_rate": 6.170192039431013e-05, "loss": 1.7822, "step": 7795 }, { "epoch": 0.4345354216598852, "grad_norm": 0.4809796214103699, "learning_rate": 6.169326326671346e-05, "loss": 1.4389, "step": 7796 }, { "epoch": 0.4345911599130483, "grad_norm": 0.5459611415863037, "learning_rate": 6.168460576827465e-05, "loss": 1.6287, "step": 7797 }, { "epoch": 0.4346468981662115, "grad_norm": 0.5732072591781616, "learning_rate": 6.167594789926827e-05, "loss": 1.9769, "step": 7798 }, { "epoch": 0.4347026364193746, "grad_norm": 0.5578893423080444, "learning_rate": 6.16672896599689e-05, "loss": 1.8077, "step": 7799 }, { "epoch": 0.43475837467253775, "grad_norm": 0.5882522463798523, "learning_rate": 6.165863105065113e-05, "loss": 1.7451, "step": 7800 }, { "epoch": 0.43481411292570094, "grad_norm": 0.6155940890312195, "learning_rate": 6.164997207158954e-05, "loss": 1.809, "step": 7801 }, { "epoch": 0.43486985117886406, "grad_norm": 0.5675914883613586, "learning_rate": 6.164131272305878e-05, "loss": 1.7839, "step": 7802 }, { "epoch": 0.4349255894320272, "grad_norm": 0.5673891305923462, "learning_rate": 6.163265300533345e-05, "loss": 1.6121, "step": 7803 }, { "epoch": 0.4349813276851903, "grad_norm": 0.5579030513763428, "learning_rate": 6.162399291868819e-05, "loss": 1.7024, "step": 7804 }, { "epoch": 0.4350370659383535, "grad_norm": 0.5674803256988525, "learning_rate": 6.161533246339764e-05, "loss": 1.702, "step": 7805 }, { "epoch": 0.43509280419151664, "grad_norm": 0.5546411275863647, "learning_rate": 6.160667163973648e-05, "loss": 1.7928, "step": 7806 }, { "epoch": 0.43514854244467976, "grad_norm": 0.6025899648666382, "learning_rate": 6.159801044797936e-05, "loss": 1.7094, "step": 7807 }, { "epoch": 0.43520428069784295, "grad_norm": 0.5264720916748047, "learning_rate": 6.158934888840095e-05, "loss": 1.4788, "step": 7808 }, { "epoch": 0.4352600189510061, "grad_norm": 0.6098587512969971, "learning_rate": 6.158068696127601e-05, "loss": 1.789, "step": 7809 }, { "epoch": 0.4353157572041692, "grad_norm": 0.5427471995353699, "learning_rate": 6.157202466687916e-05, "loss": 1.7309, "step": 7810 }, { "epoch": 0.4353714954573324, "grad_norm": 0.5572206974029541, "learning_rate": 6.156336200548517e-05, "loss": 1.7018, "step": 7811 }, { "epoch": 0.4354272337104955, "grad_norm": 0.5554936528205872, "learning_rate": 6.155469897736874e-05, "loss": 1.6621, "step": 7812 }, { "epoch": 0.43548297196365865, "grad_norm": 0.5617427229881287, "learning_rate": 6.154603558280466e-05, "loss": 1.7123, "step": 7813 }, { "epoch": 0.4355387102168218, "grad_norm": 0.572582483291626, "learning_rate": 6.153737182206762e-05, "loss": 1.7392, "step": 7814 }, { "epoch": 0.43559444846998496, "grad_norm": 0.5278533101081848, "learning_rate": 6.152870769543245e-05, "loss": 1.5766, "step": 7815 }, { "epoch": 0.4356501867231481, "grad_norm": 0.5663198232650757, "learning_rate": 6.152004320317385e-05, "loss": 1.6999, "step": 7816 }, { "epoch": 0.4357059249763112, "grad_norm": 0.5262326598167419, "learning_rate": 6.151137834556666e-05, "loss": 1.569, "step": 7817 }, { "epoch": 0.4357616632294744, "grad_norm": 0.6140465140342712, "learning_rate": 6.150271312288566e-05, "loss": 1.9939, "step": 7818 }, { "epoch": 0.43581740148263753, "grad_norm": 0.5997401475906372, "learning_rate": 6.149404753540567e-05, "loss": 1.7254, "step": 7819 }, { "epoch": 0.43587313973580066, "grad_norm": 0.6437683701515198, "learning_rate": 6.14853815834015e-05, "loss": 2.0098, "step": 7820 }, { "epoch": 0.43592887798896385, "grad_norm": 0.6912010312080383, "learning_rate": 6.1476715267148e-05, "loss": 2.1957, "step": 7821 }, { "epoch": 0.435984616242127, "grad_norm": 0.5197498202323914, "learning_rate": 6.146804858692001e-05, "loss": 1.614, "step": 7822 }, { "epoch": 0.4360403544952901, "grad_norm": 0.5308524370193481, "learning_rate": 6.145938154299237e-05, "loss": 1.5681, "step": 7823 }, { "epoch": 0.4360960927484533, "grad_norm": 0.5914180278778076, "learning_rate": 6.145071413563996e-05, "loss": 1.8961, "step": 7824 }, { "epoch": 0.4361518310016164, "grad_norm": 0.583292543888092, "learning_rate": 6.144204636513767e-05, "loss": 1.7469, "step": 7825 }, { "epoch": 0.43620756925477955, "grad_norm": 0.6572228074073792, "learning_rate": 6.143337823176038e-05, "loss": 1.8796, "step": 7826 }, { "epoch": 0.4362633075079427, "grad_norm": 0.5719166994094849, "learning_rate": 6.142470973578299e-05, "loss": 1.8995, "step": 7827 }, { "epoch": 0.43631904576110586, "grad_norm": 0.561431348323822, "learning_rate": 6.141604087748043e-05, "loss": 1.544, "step": 7828 }, { "epoch": 0.436374784014269, "grad_norm": 0.5519416928291321, "learning_rate": 6.14073716571276e-05, "loss": 1.7948, "step": 7829 }, { "epoch": 0.4364305222674321, "grad_norm": 0.5517488718032837, "learning_rate": 6.139870207499945e-05, "loss": 1.6391, "step": 7830 }, { "epoch": 0.4364862605205953, "grad_norm": 0.5172828435897827, "learning_rate": 6.139003213137092e-05, "loss": 1.7099, "step": 7831 }, { "epoch": 0.43654199877375843, "grad_norm": 0.5379384756088257, "learning_rate": 6.1381361826517e-05, "loss": 1.5748, "step": 7832 }, { "epoch": 0.43659773702692156, "grad_norm": 0.5668090581893921, "learning_rate": 6.137269116071263e-05, "loss": 1.6389, "step": 7833 }, { "epoch": 0.43665347528008475, "grad_norm": 0.5936790704727173, "learning_rate": 6.13640201342328e-05, "loss": 1.7916, "step": 7834 }, { "epoch": 0.4367092135332479, "grad_norm": 0.5564102530479431, "learning_rate": 6.135534874735253e-05, "loss": 1.6772, "step": 7835 }, { "epoch": 0.436764951786411, "grad_norm": 0.6297538876533508, "learning_rate": 6.134667700034678e-05, "loss": 1.6905, "step": 7836 }, { "epoch": 0.43682069003957413, "grad_norm": 0.5488330125808716, "learning_rate": 6.13380048934906e-05, "loss": 1.4808, "step": 7837 }, { "epoch": 0.4368764282927373, "grad_norm": 0.5490309000015259, "learning_rate": 6.132933242705899e-05, "loss": 1.4744, "step": 7838 }, { "epoch": 0.43693216654590045, "grad_norm": 0.5560508370399475, "learning_rate": 6.132065960132705e-05, "loss": 1.5957, "step": 7839 }, { "epoch": 0.4369879047990636, "grad_norm": 0.6161486506462097, "learning_rate": 6.131198641656976e-05, "loss": 1.7756, "step": 7840 }, { "epoch": 0.43704364305222676, "grad_norm": 0.5948550701141357, "learning_rate": 6.130331287306224e-05, "loss": 1.8239, "step": 7841 }, { "epoch": 0.4370993813053899, "grad_norm": 0.5820697546005249, "learning_rate": 6.129463897107951e-05, "loss": 1.5446, "step": 7842 }, { "epoch": 0.437155119558553, "grad_norm": 0.5708462595939636, "learning_rate": 6.128596471089669e-05, "loss": 1.7479, "step": 7843 }, { "epoch": 0.4372108578117162, "grad_norm": 0.543056309223175, "learning_rate": 6.127729009278889e-05, "loss": 1.5951, "step": 7844 }, { "epoch": 0.43726659606487933, "grad_norm": 0.5421169400215149, "learning_rate": 6.126861511703119e-05, "loss": 1.7609, "step": 7845 }, { "epoch": 0.43732233431804246, "grad_norm": 0.5461887121200562, "learning_rate": 6.125993978389871e-05, "loss": 1.559, "step": 7846 }, { "epoch": 0.43737807257120564, "grad_norm": 0.5687921643257141, "learning_rate": 6.12512640936666e-05, "loss": 1.8498, "step": 7847 }, { "epoch": 0.4374338108243688, "grad_norm": 0.571535050868988, "learning_rate": 6.124258804660999e-05, "loss": 1.6316, "step": 7848 }, { "epoch": 0.4374895490775319, "grad_norm": 0.5363306999206543, "learning_rate": 6.123391164300404e-05, "loss": 1.5648, "step": 7849 }, { "epoch": 0.43754528733069503, "grad_norm": 0.5810931324958801, "learning_rate": 6.12252348831239e-05, "loss": 1.6624, "step": 7850 }, { "epoch": 0.4376010255838582, "grad_norm": 0.54121994972229, "learning_rate": 6.121655776724475e-05, "loss": 1.617, "step": 7851 }, { "epoch": 0.43765676383702135, "grad_norm": 0.54410719871521, "learning_rate": 6.120788029564181e-05, "loss": 1.6805, "step": 7852 }, { "epoch": 0.4377125020901845, "grad_norm": 0.5891941785812378, "learning_rate": 6.119920246859024e-05, "loss": 1.51, "step": 7853 }, { "epoch": 0.43776824034334766, "grad_norm": 0.625268280506134, "learning_rate": 6.119052428636529e-05, "loss": 1.9405, "step": 7854 }, { "epoch": 0.4378239785965108, "grad_norm": 0.5463603138923645, "learning_rate": 6.118184574924212e-05, "loss": 1.6922, "step": 7855 }, { "epoch": 0.4378797168496739, "grad_norm": 0.6116244196891785, "learning_rate": 6.1173166857496e-05, "loss": 1.7829, "step": 7856 }, { "epoch": 0.4379354551028371, "grad_norm": 0.60081547498703, "learning_rate": 6.116448761140218e-05, "loss": 1.9078, "step": 7857 }, { "epoch": 0.43799119335600023, "grad_norm": 0.5881320238113403, "learning_rate": 6.11558080112359e-05, "loss": 1.4085, "step": 7858 }, { "epoch": 0.43804693160916336, "grad_norm": 0.5768188238143921, "learning_rate": 6.114712805727244e-05, "loss": 1.8526, "step": 7859 }, { "epoch": 0.4381026698623265, "grad_norm": 0.530643105506897, "learning_rate": 6.113844774978706e-05, "loss": 1.6052, "step": 7860 }, { "epoch": 0.4381584081154897, "grad_norm": 0.5398595929145813, "learning_rate": 6.112976708905508e-05, "loss": 1.7706, "step": 7861 }, { "epoch": 0.4382141463686528, "grad_norm": 0.5204975008964539, "learning_rate": 6.112108607535176e-05, "loss": 1.6883, "step": 7862 }, { "epoch": 0.43826988462181593, "grad_norm": 0.7956941723823547, "learning_rate": 6.111240470895245e-05, "loss": 1.4164, "step": 7863 }, { "epoch": 0.4383256228749791, "grad_norm": 0.5599929094314575, "learning_rate": 6.110372299013243e-05, "loss": 1.7575, "step": 7864 }, { "epoch": 0.43838136112814224, "grad_norm": 0.5534434914588928, "learning_rate": 6.109504091916707e-05, "loss": 1.825, "step": 7865 }, { "epoch": 0.4384370993813054, "grad_norm": 0.5528411269187927, "learning_rate": 6.108635849633169e-05, "loss": 1.5657, "step": 7866 }, { "epoch": 0.43849283763446856, "grad_norm": 0.5750871300697327, "learning_rate": 6.107767572190168e-05, "loss": 2.019, "step": 7867 }, { "epoch": 0.4385485758876317, "grad_norm": 0.5783527493476868, "learning_rate": 6.106899259615236e-05, "loss": 1.5383, "step": 7868 }, { "epoch": 0.4386043141407948, "grad_norm": 0.5577226877212524, "learning_rate": 6.106030911935913e-05, "loss": 1.8226, "step": 7869 }, { "epoch": 0.438660052393958, "grad_norm": 0.5514130592346191, "learning_rate": 6.105162529179738e-05, "loss": 1.8757, "step": 7870 }, { "epoch": 0.43871579064712113, "grad_norm": 0.5459834337234497, "learning_rate": 6.104294111374252e-05, "loss": 1.6836, "step": 7871 }, { "epoch": 0.43877152890028426, "grad_norm": 0.5836615562438965, "learning_rate": 6.103425658546995e-05, "loss": 1.7928, "step": 7872 }, { "epoch": 0.4388272671534474, "grad_norm": 0.552156925201416, "learning_rate": 6.1025571707255104e-05, "loss": 1.7313, "step": 7873 }, { "epoch": 0.43888300540661057, "grad_norm": 0.5519532561302185, "learning_rate": 6.10168864793734e-05, "loss": 1.7947, "step": 7874 }, { "epoch": 0.4389387436597737, "grad_norm": 0.5163867473602295, "learning_rate": 6.100820090210028e-05, "loss": 1.5192, "step": 7875 }, { "epoch": 0.43899448191293683, "grad_norm": 0.5566312074661255, "learning_rate": 6.099951497571123e-05, "loss": 1.5993, "step": 7876 }, { "epoch": 0.4390502201661, "grad_norm": 0.5464503765106201, "learning_rate": 6.099082870048168e-05, "loss": 1.8421, "step": 7877 }, { "epoch": 0.43910595841926314, "grad_norm": 0.5337437987327576, "learning_rate": 6.098214207668713e-05, "loss": 1.5466, "step": 7878 }, { "epoch": 0.43916169667242627, "grad_norm": 0.6034952402114868, "learning_rate": 6.097345510460307e-05, "loss": 1.8151, "step": 7879 }, { "epoch": 0.43921743492558946, "grad_norm": 0.5526003241539001, "learning_rate": 6.0964767784504995e-05, "loss": 1.6425, "step": 7880 }, { "epoch": 0.4392731731787526, "grad_norm": 0.575605571269989, "learning_rate": 6.09560801166684e-05, "loss": 1.7276, "step": 7881 }, { "epoch": 0.4393289114319157, "grad_norm": 0.6006867289543152, "learning_rate": 6.094739210136883e-05, "loss": 1.7726, "step": 7882 }, { "epoch": 0.43938464968507884, "grad_norm": 0.5347257852554321, "learning_rate": 6.093870373888181e-05, "loss": 1.6228, "step": 7883 }, { "epoch": 0.439440387938242, "grad_norm": 0.5642088651657104, "learning_rate": 6.093001502948289e-05, "loss": 1.7197, "step": 7884 }, { "epoch": 0.43949612619140516, "grad_norm": 0.5518479943275452, "learning_rate": 6.0921325973447604e-05, "loss": 1.5778, "step": 7885 }, { "epoch": 0.4395518644445683, "grad_norm": 0.6168820261955261, "learning_rate": 6.091263657105155e-05, "loss": 1.7891, "step": 7886 }, { "epoch": 0.43960760269773147, "grad_norm": 0.5440758466720581, "learning_rate": 6.090394682257029e-05, "loss": 1.5781, "step": 7887 }, { "epoch": 0.4396633409508946, "grad_norm": 0.5412326455116272, "learning_rate": 6.08952567282794e-05, "loss": 1.683, "step": 7888 }, { "epoch": 0.43971907920405773, "grad_norm": 0.563556969165802, "learning_rate": 6.0886566288454496e-05, "loss": 1.5673, "step": 7889 }, { "epoch": 0.4397748174572209, "grad_norm": 0.5224372148513794, "learning_rate": 6.0877875503371176e-05, "loss": 1.7352, "step": 7890 }, { "epoch": 0.43983055571038404, "grad_norm": 0.5953571796417236, "learning_rate": 6.086918437330508e-05, "loss": 1.7736, "step": 7891 }, { "epoch": 0.43988629396354717, "grad_norm": 0.5646018385887146, "learning_rate": 6.086049289853182e-05, "loss": 1.7542, "step": 7892 }, { "epoch": 0.43994203221671035, "grad_norm": 0.6011926531791687, "learning_rate": 6.0851801079327056e-05, "loss": 1.7245, "step": 7893 }, { "epoch": 0.4399977704698735, "grad_norm": 0.4823513627052307, "learning_rate": 6.0843108915966415e-05, "loss": 1.4047, "step": 7894 }, { "epoch": 0.4400535087230366, "grad_norm": 0.6140894889831543, "learning_rate": 6.083441640872558e-05, "loss": 2.0188, "step": 7895 }, { "epoch": 0.44010924697619974, "grad_norm": 0.5411475896835327, "learning_rate": 6.082572355788023e-05, "loss": 1.5408, "step": 7896 }, { "epoch": 0.4401649852293629, "grad_norm": 0.6488401293754578, "learning_rate": 6.081703036370606e-05, "loss": 2.0136, "step": 7897 }, { "epoch": 0.44022072348252606, "grad_norm": 0.7427087426185608, "learning_rate": 6.080833682647874e-05, "loss": 1.6615, "step": 7898 }, { "epoch": 0.4402764617356892, "grad_norm": 0.6195456385612488, "learning_rate": 6.0799642946473986e-05, "loss": 1.5859, "step": 7899 }, { "epoch": 0.44033219998885237, "grad_norm": 0.5988082885742188, "learning_rate": 6.079094872396754e-05, "loss": 1.7462, "step": 7900 }, { "epoch": 0.4403879382420155, "grad_norm": 0.6001728177070618, "learning_rate": 6.0782254159235116e-05, "loss": 1.736, "step": 7901 }, { "epoch": 0.4404436764951786, "grad_norm": 0.5472791790962219, "learning_rate": 6.0773559252552446e-05, "loss": 1.372, "step": 7902 }, { "epoch": 0.4404994147483418, "grad_norm": 0.5791669487953186, "learning_rate": 6.0764864004195286e-05, "loss": 1.7732, "step": 7903 }, { "epoch": 0.44055515300150494, "grad_norm": 0.5353814363479614, "learning_rate": 6.075616841443943e-05, "loss": 1.8002, "step": 7904 }, { "epoch": 0.44061089125466807, "grad_norm": 0.5734871029853821, "learning_rate": 6.07474724835606e-05, "loss": 1.7832, "step": 7905 }, { "epoch": 0.4406666295078312, "grad_norm": 0.6158138513565063, "learning_rate": 6.0738776211834615e-05, "loss": 1.9006, "step": 7906 }, { "epoch": 0.4407223677609944, "grad_norm": 0.5585591793060303, "learning_rate": 6.073007959953726e-05, "loss": 1.8046, "step": 7907 }, { "epoch": 0.4407781060141575, "grad_norm": 0.5921459794044495, "learning_rate": 6.0721382646944326e-05, "loss": 1.8318, "step": 7908 }, { "epoch": 0.44083384426732064, "grad_norm": 0.5314304828643799, "learning_rate": 6.0712685354331654e-05, "loss": 1.4663, "step": 7909 }, { "epoch": 0.4408895825204838, "grad_norm": 0.5642038583755493, "learning_rate": 6.0703987721975076e-05, "loss": 1.6231, "step": 7910 }, { "epoch": 0.44094532077364695, "grad_norm": 0.598506510257721, "learning_rate": 6.0695289750150394e-05, "loss": 1.6668, "step": 7911 }, { "epoch": 0.4410010590268101, "grad_norm": 0.5824127197265625, "learning_rate": 6.068659143913349e-05, "loss": 1.7711, "step": 7912 }, { "epoch": 0.44105679727997327, "grad_norm": 0.5553746223449707, "learning_rate": 6.0677892789200216e-05, "loss": 1.7025, "step": 7913 }, { "epoch": 0.4411125355331364, "grad_norm": 0.5868836641311646, "learning_rate": 6.066919380062643e-05, "loss": 1.7495, "step": 7914 }, { "epoch": 0.4411682737862995, "grad_norm": 0.5977121591567993, "learning_rate": 6.066049447368802e-05, "loss": 1.5988, "step": 7915 }, { "epoch": 0.4412240120394627, "grad_norm": 0.6062576770782471, "learning_rate": 6.065179480866089e-05, "loss": 1.7006, "step": 7916 }, { "epoch": 0.44127975029262584, "grad_norm": 0.5636418461799622, "learning_rate": 6.064309480582093e-05, "loss": 1.6275, "step": 7917 }, { "epoch": 0.44133548854578897, "grad_norm": 0.5832415223121643, "learning_rate": 6.0634394465444056e-05, "loss": 1.8278, "step": 7918 }, { "epoch": 0.4413912267989521, "grad_norm": 0.5471083521842957, "learning_rate": 6.062569378780621e-05, "loss": 1.724, "step": 7919 }, { "epoch": 0.4414469650521153, "grad_norm": 0.5676271915435791, "learning_rate": 6.061699277318328e-05, "loss": 1.706, "step": 7920 }, { "epoch": 0.4415027033052784, "grad_norm": 0.5920431613922119, "learning_rate": 6.060829142185125e-05, "loss": 1.7118, "step": 7921 }, { "epoch": 0.44155844155844154, "grad_norm": 0.6104030609130859, "learning_rate": 6.059958973408607e-05, "loss": 1.908, "step": 7922 }, { "epoch": 0.4416141798116047, "grad_norm": 0.5903329849243164, "learning_rate": 6.05908877101637e-05, "loss": 1.7077, "step": 7923 }, { "epoch": 0.44166991806476785, "grad_norm": 0.5489821434020996, "learning_rate": 6.058218535036013e-05, "loss": 1.6519, "step": 7924 }, { "epoch": 0.441725656317931, "grad_norm": 0.5121790170669556, "learning_rate": 6.057348265495133e-05, "loss": 1.4665, "step": 7925 }, { "epoch": 0.44178139457109417, "grad_norm": 0.5221953392028809, "learning_rate": 6.0564779624213316e-05, "loss": 1.6157, "step": 7926 }, { "epoch": 0.4418371328242573, "grad_norm": 0.5600380897521973, "learning_rate": 6.055607625842208e-05, "loss": 1.5828, "step": 7927 }, { "epoch": 0.4418928710774204, "grad_norm": 0.5320744514465332, "learning_rate": 6.0547372557853655e-05, "loss": 1.6772, "step": 7928 }, { "epoch": 0.44194860933058355, "grad_norm": 0.5403137803077698, "learning_rate": 6.053866852278406e-05, "loss": 1.7394, "step": 7929 }, { "epoch": 0.44200434758374674, "grad_norm": 0.591922402381897, "learning_rate": 6.052996415348936e-05, "loss": 1.8231, "step": 7930 }, { "epoch": 0.44206008583690987, "grad_norm": 0.5516440868377686, "learning_rate": 6.052125945024558e-05, "loss": 1.6415, "step": 7931 }, { "epoch": 0.442115824090073, "grad_norm": 0.5129381418228149, "learning_rate": 6.05125544133288e-05, "loss": 1.5515, "step": 7932 }, { "epoch": 0.4421715623432362, "grad_norm": 0.5778689980506897, "learning_rate": 6.050384904301508e-05, "loss": 1.7348, "step": 7933 }, { "epoch": 0.4422273005963993, "grad_norm": 0.5508379340171814, "learning_rate": 6.049514333958052e-05, "loss": 1.6601, "step": 7934 }, { "epoch": 0.44228303884956244, "grad_norm": 0.5481617450714111, "learning_rate": 6.048643730330119e-05, "loss": 1.5493, "step": 7935 }, { "epoch": 0.4423387771027256, "grad_norm": 0.5237631797790527, "learning_rate": 6.0477730934453226e-05, "loss": 1.5092, "step": 7936 }, { "epoch": 0.44239451535588875, "grad_norm": 0.5657276511192322, "learning_rate": 6.046902423331271e-05, "loss": 1.4483, "step": 7937 }, { "epoch": 0.4424502536090519, "grad_norm": 0.5502325892448425, "learning_rate": 6.046031720015579e-05, "loss": 1.6987, "step": 7938 }, { "epoch": 0.44250599186221506, "grad_norm": 0.6082862615585327, "learning_rate": 6.045160983525859e-05, "loss": 1.8988, "step": 7939 }, { "epoch": 0.4425617301153782, "grad_norm": 0.5569537878036499, "learning_rate": 6.044290213889727e-05, "loss": 1.696, "step": 7940 }, { "epoch": 0.4426174683685413, "grad_norm": 0.518162190914154, "learning_rate": 6.0434194111347985e-05, "loss": 1.5279, "step": 7941 }, { "epoch": 0.44267320662170445, "grad_norm": 0.5695126056671143, "learning_rate": 6.042548575288689e-05, "loss": 1.7109, "step": 7942 }, { "epoch": 0.44272894487486764, "grad_norm": 0.49009808897972107, "learning_rate": 6.0416777063790184e-05, "loss": 1.4709, "step": 7943 }, { "epoch": 0.44278468312803076, "grad_norm": 0.5802407264709473, "learning_rate": 6.040806804433403e-05, "loss": 1.6943, "step": 7944 }, { "epoch": 0.4428404213811939, "grad_norm": 0.5507357716560364, "learning_rate": 6.0399358694794647e-05, "loss": 1.3918, "step": 7945 }, { "epoch": 0.4428961596343571, "grad_norm": 0.5855342745780945, "learning_rate": 6.039064901544824e-05, "loss": 1.8103, "step": 7946 }, { "epoch": 0.4429518978875202, "grad_norm": 0.5658082365989685, "learning_rate": 6.038193900657102e-05, "loss": 1.7597, "step": 7947 }, { "epoch": 0.44300763614068334, "grad_norm": 0.5863122344017029, "learning_rate": 6.037322866843923e-05, "loss": 1.7671, "step": 7948 }, { "epoch": 0.4430633743938465, "grad_norm": 0.5610207915306091, "learning_rate": 6.036451800132912e-05, "loss": 1.7487, "step": 7949 }, { "epoch": 0.44311911264700965, "grad_norm": 0.5848312377929688, "learning_rate": 6.03558070055169e-05, "loss": 1.7112, "step": 7950 }, { "epoch": 0.4431748509001728, "grad_norm": 0.5728501081466675, "learning_rate": 6.0347095681278876e-05, "loss": 1.7736, "step": 7951 }, { "epoch": 0.4432305891533359, "grad_norm": 0.5987431406974792, "learning_rate": 6.033838402889131e-05, "loss": 1.7693, "step": 7952 }, { "epoch": 0.4432863274064991, "grad_norm": 0.5747002959251404, "learning_rate": 6.032967204863048e-05, "loss": 1.6216, "step": 7953 }, { "epoch": 0.4433420656596622, "grad_norm": 0.5476230382919312, "learning_rate": 6.0320959740772666e-05, "loss": 1.7631, "step": 7954 }, { "epoch": 0.44339780391282535, "grad_norm": 0.5305277109146118, "learning_rate": 6.031224710559419e-05, "loss": 1.6809, "step": 7955 }, { "epoch": 0.44345354216598853, "grad_norm": 0.5442744493484497, "learning_rate": 6.0303534143371374e-05, "loss": 1.5357, "step": 7956 }, { "epoch": 0.44350928041915166, "grad_norm": 0.5553621053695679, "learning_rate": 6.029482085438051e-05, "loss": 1.6955, "step": 7957 }, { "epoch": 0.4435650186723148, "grad_norm": 0.5430163741111755, "learning_rate": 6.028610723889797e-05, "loss": 1.762, "step": 7958 }, { "epoch": 0.443620756925478, "grad_norm": 0.5217944979667664, "learning_rate": 6.027739329720006e-05, "loss": 1.4594, "step": 7959 }, { "epoch": 0.4436764951786411, "grad_norm": 0.5763014554977417, "learning_rate": 6.026867902956317e-05, "loss": 1.7942, "step": 7960 }, { "epoch": 0.44373223343180423, "grad_norm": 0.533718466758728, "learning_rate": 6.025996443626364e-05, "loss": 1.6659, "step": 7961 }, { "epoch": 0.4437879716849674, "grad_norm": 0.5921129584312439, "learning_rate": 6.0251249517577854e-05, "loss": 1.9042, "step": 7962 }, { "epoch": 0.44384370993813055, "grad_norm": 0.5379483103752136, "learning_rate": 6.024253427378222e-05, "loss": 1.6772, "step": 7963 }, { "epoch": 0.4438994481912937, "grad_norm": 0.5350393652915955, "learning_rate": 6.0233818705153114e-05, "loss": 1.5868, "step": 7964 }, { "epoch": 0.4439551864444568, "grad_norm": 0.5462901592254639, "learning_rate": 6.022510281196695e-05, "loss": 1.6118, "step": 7965 }, { "epoch": 0.44401092469762, "grad_norm": 0.5518479943275452, "learning_rate": 6.021638659450013e-05, "loss": 1.4902, "step": 7966 }, { "epoch": 0.4440666629507831, "grad_norm": 0.5284306406974792, "learning_rate": 6.020767005302909e-05, "loss": 1.5573, "step": 7967 }, { "epoch": 0.44412240120394625, "grad_norm": 0.6189160346984863, "learning_rate": 6.0198953187830277e-05, "loss": 1.9599, "step": 7968 }, { "epoch": 0.44417813945710943, "grad_norm": 0.5723422765731812, "learning_rate": 6.019023599918014e-05, "loss": 1.7111, "step": 7969 }, { "epoch": 0.44423387771027256, "grad_norm": 0.5545480251312256, "learning_rate": 6.018151848735511e-05, "loss": 1.6214, "step": 7970 }, { "epoch": 0.4442896159634357, "grad_norm": 0.5693395733833313, "learning_rate": 6.01728006526317e-05, "loss": 1.8074, "step": 7971 }, { "epoch": 0.4443453542165989, "grad_norm": 0.5313411951065063, "learning_rate": 6.0164082495286354e-05, "loss": 1.6405, "step": 7972 }, { "epoch": 0.444401092469762, "grad_norm": 0.5680732727050781, "learning_rate": 6.015536401559556e-05, "loss": 1.4973, "step": 7973 }, { "epoch": 0.44445683072292513, "grad_norm": 0.6219733357429504, "learning_rate": 6.014664521383584e-05, "loss": 1.8733, "step": 7974 }, { "epoch": 0.44451256897608826, "grad_norm": 0.5903530716896057, "learning_rate": 6.0137926090283694e-05, "loss": 1.6334, "step": 7975 }, { "epoch": 0.44456830722925145, "grad_norm": 0.6504166722297668, "learning_rate": 6.0129206645215655e-05, "loss": 1.7995, "step": 7976 }, { "epoch": 0.4446240454824146, "grad_norm": 0.6121776103973389, "learning_rate": 6.012048687890821e-05, "loss": 1.8132, "step": 7977 }, { "epoch": 0.4446797837355777, "grad_norm": 0.6290067434310913, "learning_rate": 6.011176679163796e-05, "loss": 1.9482, "step": 7978 }, { "epoch": 0.4447355219887409, "grad_norm": 0.6563844084739685, "learning_rate": 6.010304638368139e-05, "loss": 1.8485, "step": 7979 }, { "epoch": 0.444791260241904, "grad_norm": 0.5514439940452576, "learning_rate": 6.009432565531511e-05, "loss": 1.6343, "step": 7980 }, { "epoch": 0.44484699849506715, "grad_norm": 0.5945736765861511, "learning_rate": 6.008560460681567e-05, "loss": 1.721, "step": 7981 }, { "epoch": 0.44490273674823033, "grad_norm": 0.5428782105445862, "learning_rate": 6.007688323845966e-05, "loss": 1.5152, "step": 7982 }, { "epoch": 0.44495847500139346, "grad_norm": 0.5975694060325623, "learning_rate": 6.006816155052366e-05, "loss": 1.7975, "step": 7983 }, { "epoch": 0.4450142132545566, "grad_norm": 0.5683627724647522, "learning_rate": 6.005943954328429e-05, "loss": 1.7401, "step": 7984 }, { "epoch": 0.4450699515077198, "grad_norm": 0.552085280418396, "learning_rate": 6.005071721701814e-05, "loss": 1.5525, "step": 7985 }, { "epoch": 0.4451256897608829, "grad_norm": 0.5957344770431519, "learning_rate": 6.004199457200184e-05, "loss": 1.8248, "step": 7986 }, { "epoch": 0.44518142801404603, "grad_norm": 0.5816213488578796, "learning_rate": 6.003327160851201e-05, "loss": 1.5985, "step": 7987 }, { "epoch": 0.44523716626720916, "grad_norm": 0.5090708136558533, "learning_rate": 6.002454832682532e-05, "loss": 1.4312, "step": 7988 }, { "epoch": 0.44529290452037235, "grad_norm": 0.5570594668388367, "learning_rate": 6.00158247272184e-05, "loss": 1.6288, "step": 7989 }, { "epoch": 0.4453486427735355, "grad_norm": 0.4970921576023102, "learning_rate": 6.00071008099679e-05, "loss": 1.2663, "step": 7990 }, { "epoch": 0.4454043810266986, "grad_norm": 0.5791414976119995, "learning_rate": 5.999837657535052e-05, "loss": 1.8037, "step": 7991 }, { "epoch": 0.4454601192798618, "grad_norm": 0.5636151432991028, "learning_rate": 5.998965202364294e-05, "loss": 1.6298, "step": 7992 }, { "epoch": 0.4455158575330249, "grad_norm": 0.5829344987869263, "learning_rate": 5.998092715512183e-05, "loss": 1.6349, "step": 7993 }, { "epoch": 0.44557159578618805, "grad_norm": 0.556348979473114, "learning_rate": 5.9972201970063904e-05, "loss": 1.5642, "step": 7994 }, { "epoch": 0.44562733403935123, "grad_norm": 0.5365790724754333, "learning_rate": 5.996347646874587e-05, "loss": 1.6421, "step": 7995 }, { "epoch": 0.44568307229251436, "grad_norm": 0.576501190662384, "learning_rate": 5.9954750651444455e-05, "loss": 1.6171, "step": 7996 }, { "epoch": 0.4457388105456775, "grad_norm": 0.5861379504203796, "learning_rate": 5.9946024518436406e-05, "loss": 1.6702, "step": 7997 }, { "epoch": 0.4457945487988406, "grad_norm": 0.5348252058029175, "learning_rate": 5.9937298069998424e-05, "loss": 1.4339, "step": 7998 }, { "epoch": 0.4458502870520038, "grad_norm": 0.5516197085380554, "learning_rate": 5.99285713064073e-05, "loss": 1.738, "step": 7999 }, { "epoch": 0.44590602530516693, "grad_norm": 0.58391934633255, "learning_rate": 5.991984422793977e-05, "loss": 1.4481, "step": 8000 }, { "epoch": 0.44596176355833006, "grad_norm": 0.5707566738128662, "learning_rate": 5.9911116834872624e-05, "loss": 1.7051, "step": 8001 }, { "epoch": 0.44601750181149324, "grad_norm": 0.5384584069252014, "learning_rate": 5.990238912748265e-05, "loss": 1.7542, "step": 8002 }, { "epoch": 0.4460732400646564, "grad_norm": 0.5866785645484924, "learning_rate": 5.989366110604662e-05, "loss": 1.8245, "step": 8003 }, { "epoch": 0.4461289783178195, "grad_norm": 0.5644246935844421, "learning_rate": 5.988493277084134e-05, "loss": 1.7637, "step": 8004 }, { "epoch": 0.4461847165709827, "grad_norm": 0.5331970453262329, "learning_rate": 5.9876204122143634e-05, "loss": 1.6303, "step": 8005 }, { "epoch": 0.4462404548241458, "grad_norm": 0.5923652648925781, "learning_rate": 5.98674751602303e-05, "loss": 1.8505, "step": 8006 }, { "epoch": 0.44629619307730894, "grad_norm": 0.5415480136871338, "learning_rate": 5.985874588537819e-05, "loss": 1.6483, "step": 8007 }, { "epoch": 0.44635193133047213, "grad_norm": 0.5634106397628784, "learning_rate": 5.985001629786415e-05, "loss": 1.5566, "step": 8008 }, { "epoch": 0.44640766958363526, "grad_norm": 0.5723522901535034, "learning_rate": 5.9841286397965014e-05, "loss": 1.7409, "step": 8009 }, { "epoch": 0.4464634078367984, "grad_norm": 0.5537884831428528, "learning_rate": 5.983255618595767e-05, "loss": 1.712, "step": 8010 }, { "epoch": 0.4465191460899615, "grad_norm": 0.5915796160697937, "learning_rate": 5.982382566211895e-05, "loss": 1.7699, "step": 8011 }, { "epoch": 0.4465748843431247, "grad_norm": 0.6134962439537048, "learning_rate": 5.981509482672576e-05, "loss": 1.862, "step": 8012 }, { "epoch": 0.44663062259628783, "grad_norm": 0.4997968077659607, "learning_rate": 5.980636368005499e-05, "loss": 1.5174, "step": 8013 }, { "epoch": 0.44668636084945096, "grad_norm": 0.5801420211791992, "learning_rate": 5.979763222238354e-05, "loss": 1.8425, "step": 8014 }, { "epoch": 0.44674209910261414, "grad_norm": 0.5159302949905396, "learning_rate": 5.978890045398833e-05, "loss": 1.7243, "step": 8015 }, { "epoch": 0.44679783735577727, "grad_norm": 0.59089195728302, "learning_rate": 5.978016837514625e-05, "loss": 1.8003, "step": 8016 }, { "epoch": 0.4468535756089404, "grad_norm": 0.5666080713272095, "learning_rate": 5.9771435986134274e-05, "loss": 1.648, "step": 8017 }, { "epoch": 0.4469093138621036, "grad_norm": 0.5891024470329285, "learning_rate": 5.9762703287229304e-05, "loss": 1.5867, "step": 8018 }, { "epoch": 0.4469650521152667, "grad_norm": 0.5871114730834961, "learning_rate": 5.975397027870831e-05, "loss": 1.656, "step": 8019 }, { "epoch": 0.44702079036842984, "grad_norm": 0.6023023724555969, "learning_rate": 5.974523696084825e-05, "loss": 1.6628, "step": 8020 }, { "epoch": 0.447076528621593, "grad_norm": 0.5608631372451782, "learning_rate": 5.97365033339261e-05, "loss": 1.4316, "step": 8021 }, { "epoch": 0.44713226687475616, "grad_norm": 0.5549430251121521, "learning_rate": 5.972776939821883e-05, "loss": 1.4696, "step": 8022 }, { "epoch": 0.4471880051279193, "grad_norm": 0.5799054503440857, "learning_rate": 5.971903515400342e-05, "loss": 1.7885, "step": 8023 }, { "epoch": 0.4472437433810824, "grad_norm": 0.5215498208999634, "learning_rate": 5.971030060155689e-05, "loss": 1.6956, "step": 8024 }, { "epoch": 0.4472994816342456, "grad_norm": 0.5385097861289978, "learning_rate": 5.970156574115623e-05, "loss": 1.5434, "step": 8025 }, { "epoch": 0.44735521988740873, "grad_norm": 0.5320507287979126, "learning_rate": 5.969283057307847e-05, "loss": 1.5207, "step": 8026 }, { "epoch": 0.44741095814057186, "grad_norm": 0.53661048412323, "learning_rate": 5.9684095097600645e-05, "loss": 1.6211, "step": 8027 }, { "epoch": 0.44746669639373504, "grad_norm": 0.5779610872268677, "learning_rate": 5.967535931499979e-05, "loss": 1.7282, "step": 8028 }, { "epoch": 0.44752243464689817, "grad_norm": 0.5973451137542725, "learning_rate": 5.966662322555294e-05, "loss": 1.822, "step": 8029 }, { "epoch": 0.4475781729000613, "grad_norm": 0.6070274710655212, "learning_rate": 5.965788682953717e-05, "loss": 1.6235, "step": 8030 }, { "epoch": 0.4476339111532245, "grad_norm": 0.5565271377563477, "learning_rate": 5.9649150127229534e-05, "loss": 1.8248, "step": 8031 }, { "epoch": 0.4476896494063876, "grad_norm": 0.5610112547874451, "learning_rate": 5.964041311890711e-05, "loss": 1.5738, "step": 8032 }, { "epoch": 0.44774538765955074, "grad_norm": 0.5636839270591736, "learning_rate": 5.9631675804846985e-05, "loss": 1.5644, "step": 8033 }, { "epoch": 0.44780112591271387, "grad_norm": 0.5381824970245361, "learning_rate": 5.962293818532628e-05, "loss": 1.6785, "step": 8034 }, { "epoch": 0.44785686416587706, "grad_norm": 0.5614325404167175, "learning_rate": 5.9614200260622066e-05, "loss": 1.7991, "step": 8035 }, { "epoch": 0.4479126024190402, "grad_norm": 0.527214527130127, "learning_rate": 5.960546203101148e-05, "loss": 1.6311, "step": 8036 }, { "epoch": 0.4479683406722033, "grad_norm": 0.5667834877967834, "learning_rate": 5.959672349677163e-05, "loss": 1.4416, "step": 8037 }, { "epoch": 0.4480240789253665, "grad_norm": 0.5953390002250671, "learning_rate": 5.9587984658179676e-05, "loss": 1.8168, "step": 8038 }, { "epoch": 0.4480798171785296, "grad_norm": 0.5339275598526001, "learning_rate": 5.957924551551275e-05, "loss": 1.6999, "step": 8039 }, { "epoch": 0.44813555543169276, "grad_norm": 0.5568943619728088, "learning_rate": 5.9570506069048e-05, "loss": 1.7066, "step": 8040 }, { "epoch": 0.44819129368485594, "grad_norm": 0.5787097215652466, "learning_rate": 5.95617663190626e-05, "loss": 1.6468, "step": 8041 }, { "epoch": 0.44824703193801907, "grad_norm": 0.5685398578643799, "learning_rate": 5.955302626583374e-05, "loss": 1.8804, "step": 8042 }, { "epoch": 0.4483027701911822, "grad_norm": 0.5303986668586731, "learning_rate": 5.9544285909638566e-05, "loss": 1.4389, "step": 8043 }, { "epoch": 0.4483585084443453, "grad_norm": 0.5936418771743774, "learning_rate": 5.953554525075429e-05, "loss": 1.9128, "step": 8044 }, { "epoch": 0.4484142466975085, "grad_norm": 0.5271584391593933, "learning_rate": 5.952680428945812e-05, "loss": 1.5926, "step": 8045 }, { "epoch": 0.44846998495067164, "grad_norm": 0.5615208148956299, "learning_rate": 5.951806302602725e-05, "loss": 1.6805, "step": 8046 }, { "epoch": 0.44852572320383477, "grad_norm": 0.5467960834503174, "learning_rate": 5.950932146073893e-05, "loss": 1.6863, "step": 8047 }, { "epoch": 0.44858146145699795, "grad_norm": 0.5716736912727356, "learning_rate": 5.950057959387038e-05, "loss": 1.695, "step": 8048 }, { "epoch": 0.4486371997101611, "grad_norm": 0.5174785852432251, "learning_rate": 5.9491837425698816e-05, "loss": 1.3978, "step": 8049 }, { "epoch": 0.4486929379633242, "grad_norm": 0.5112467408180237, "learning_rate": 5.948309495650153e-05, "loss": 1.3862, "step": 8050 }, { "epoch": 0.4487486762164874, "grad_norm": 0.6070237755775452, "learning_rate": 5.947435218655576e-05, "loss": 1.744, "step": 8051 }, { "epoch": 0.4488044144696505, "grad_norm": 0.5886159539222717, "learning_rate": 5.946560911613877e-05, "loss": 1.9782, "step": 8052 }, { "epoch": 0.44886015272281365, "grad_norm": 0.6077089309692383, "learning_rate": 5.945686574552785e-05, "loss": 1.6861, "step": 8053 }, { "epoch": 0.44891589097597684, "grad_norm": 0.5767019391059875, "learning_rate": 5.944812207500029e-05, "loss": 1.8577, "step": 8054 }, { "epoch": 0.44897162922913997, "grad_norm": 0.5735483765602112, "learning_rate": 5.943937810483338e-05, "loss": 1.8143, "step": 8055 }, { "epoch": 0.4490273674823031, "grad_norm": 0.5384686589241028, "learning_rate": 5.943063383530444e-05, "loss": 1.7183, "step": 8056 }, { "epoch": 0.4490831057354662, "grad_norm": 0.5415961146354675, "learning_rate": 5.942188926669077e-05, "loss": 1.5619, "step": 8057 }, { "epoch": 0.4491388439886294, "grad_norm": 0.5548281669616699, "learning_rate": 5.941314439926969e-05, "loss": 1.8049, "step": 8058 }, { "epoch": 0.44919458224179254, "grad_norm": 0.5731210112571716, "learning_rate": 5.940439923331857e-05, "loss": 1.9301, "step": 8059 }, { "epoch": 0.44925032049495567, "grad_norm": 0.5715717673301697, "learning_rate": 5.939565376911475e-05, "loss": 1.6145, "step": 8060 }, { "epoch": 0.44930605874811885, "grad_norm": 0.5775079131126404, "learning_rate": 5.938690800693556e-05, "loss": 1.7435, "step": 8061 }, { "epoch": 0.449361797001282, "grad_norm": 0.5366044044494629, "learning_rate": 5.937816194705838e-05, "loss": 1.7497, "step": 8062 }, { "epoch": 0.4494175352544451, "grad_norm": 0.5498981475830078, "learning_rate": 5.936941558976058e-05, "loss": 1.6565, "step": 8063 }, { "epoch": 0.4494732735076083, "grad_norm": 0.541826605796814, "learning_rate": 5.936066893531954e-05, "loss": 1.6147, "step": 8064 }, { "epoch": 0.4495290117607714, "grad_norm": 0.5456510186195374, "learning_rate": 5.9351921984012657e-05, "loss": 1.652, "step": 8065 }, { "epoch": 0.44958475001393455, "grad_norm": 0.5831677317619324, "learning_rate": 5.934317473611734e-05, "loss": 1.7302, "step": 8066 }, { "epoch": 0.4496404882670977, "grad_norm": 0.55061274766922, "learning_rate": 5.9334427191911e-05, "loss": 1.6976, "step": 8067 }, { "epoch": 0.44969622652026087, "grad_norm": 0.5210010409355164, "learning_rate": 5.932567935167104e-05, "loss": 1.5901, "step": 8068 }, { "epoch": 0.449751964773424, "grad_norm": 0.5638371706008911, "learning_rate": 5.931693121567492e-05, "loss": 1.7005, "step": 8069 }, { "epoch": 0.4498077030265871, "grad_norm": 0.5460227131843567, "learning_rate": 5.930818278420005e-05, "loss": 1.8827, "step": 8070 }, { "epoch": 0.4498634412797503, "grad_norm": 0.5335036516189575, "learning_rate": 5.9299434057523894e-05, "loss": 1.6689, "step": 8071 }, { "epoch": 0.44991917953291344, "grad_norm": 0.45309698581695557, "learning_rate": 5.929068503592391e-05, "loss": 1.1558, "step": 8072 }, { "epoch": 0.44997491778607657, "grad_norm": 0.5678838491439819, "learning_rate": 5.9281935719677574e-05, "loss": 1.7916, "step": 8073 }, { "epoch": 0.45003065603923975, "grad_norm": 0.6037769913673401, "learning_rate": 5.927318610906234e-05, "loss": 1.6458, "step": 8074 }, { "epoch": 0.4500863942924029, "grad_norm": 0.5376781821250916, "learning_rate": 5.9264436204355724e-05, "loss": 1.754, "step": 8075 }, { "epoch": 0.450142132545566, "grad_norm": 0.5493988394737244, "learning_rate": 5.92556860058352e-05, "loss": 1.7992, "step": 8076 }, { "epoch": 0.4501978707987292, "grad_norm": 0.5373069643974304, "learning_rate": 5.9246935513778276e-05, "loss": 1.6756, "step": 8077 }, { "epoch": 0.4502536090518923, "grad_norm": 0.5574460625648499, "learning_rate": 5.923818472846248e-05, "loss": 1.6423, "step": 8078 }, { "epoch": 0.45030934730505545, "grad_norm": 0.5568375587463379, "learning_rate": 5.922943365016531e-05, "loss": 1.7708, "step": 8079 }, { "epoch": 0.4503650855582186, "grad_norm": 0.551171064376831, "learning_rate": 5.922068227916433e-05, "loss": 1.7107, "step": 8080 }, { "epoch": 0.45042082381138177, "grad_norm": 0.5870986580848694, "learning_rate": 5.9211930615737066e-05, "loss": 1.801, "step": 8081 }, { "epoch": 0.4504765620645449, "grad_norm": 0.5700268745422363, "learning_rate": 5.920317866016108e-05, "loss": 1.6317, "step": 8082 }, { "epoch": 0.450532300317708, "grad_norm": 0.5469490885734558, "learning_rate": 5.919442641271391e-05, "loss": 1.6841, "step": 8083 }, { "epoch": 0.4505880385708712, "grad_norm": 0.5380752682685852, "learning_rate": 5.9185673873673154e-05, "loss": 1.3761, "step": 8084 }, { "epoch": 0.45064377682403434, "grad_norm": 0.6156383156776428, "learning_rate": 5.917692104331637e-05, "loss": 1.9012, "step": 8085 }, { "epoch": 0.45069951507719747, "grad_norm": 0.6044989824295044, "learning_rate": 5.916816792192116e-05, "loss": 1.8825, "step": 8086 }, { "epoch": 0.45075525333036065, "grad_norm": 0.5541858673095703, "learning_rate": 5.915941450976512e-05, "loss": 1.6097, "step": 8087 }, { "epoch": 0.4508109915835238, "grad_norm": 0.5468337535858154, "learning_rate": 5.9150660807125844e-05, "loss": 1.7299, "step": 8088 }, { "epoch": 0.4508667298366869, "grad_norm": 0.6255477070808411, "learning_rate": 5.9141906814280975e-05, "loss": 1.818, "step": 8089 }, { "epoch": 0.45092246808985004, "grad_norm": 0.5574450492858887, "learning_rate": 5.9133152531508106e-05, "loss": 1.8804, "step": 8090 }, { "epoch": 0.4509782063430132, "grad_norm": 0.5240482091903687, "learning_rate": 5.91243979590849e-05, "loss": 1.6162, "step": 8091 }, { "epoch": 0.45103394459617635, "grad_norm": 0.5322662591934204, "learning_rate": 5.911564309728899e-05, "loss": 1.7833, "step": 8092 }, { "epoch": 0.4510896828493395, "grad_norm": 0.5365003347396851, "learning_rate": 5.910688794639803e-05, "loss": 1.5982, "step": 8093 }, { "epoch": 0.45114542110250266, "grad_norm": 0.5948169827461243, "learning_rate": 5.909813250668967e-05, "loss": 1.8386, "step": 8094 }, { "epoch": 0.4512011593556658, "grad_norm": 0.5501197576522827, "learning_rate": 5.9089376778441606e-05, "loss": 1.748, "step": 8095 }, { "epoch": 0.4512568976088289, "grad_norm": 0.5238162875175476, "learning_rate": 5.908062076193149e-05, "loss": 1.4871, "step": 8096 }, { "epoch": 0.4513126358619921, "grad_norm": 0.515355110168457, "learning_rate": 5.907186445743704e-05, "loss": 1.4985, "step": 8097 }, { "epoch": 0.45136837411515524, "grad_norm": 0.5451371073722839, "learning_rate": 5.9063107865235936e-05, "loss": 1.7953, "step": 8098 }, { "epoch": 0.45142411236831836, "grad_norm": 0.5602155327796936, "learning_rate": 5.90543509856059e-05, "loss": 1.4848, "step": 8099 }, { "epoch": 0.45147985062148155, "grad_norm": 0.6136230826377869, "learning_rate": 5.904559381882463e-05, "loss": 1.8602, "step": 8100 }, { "epoch": 0.4515355888746447, "grad_norm": 0.5416921973228455, "learning_rate": 5.9036836365169865e-05, "loss": 1.7242, "step": 8101 }, { "epoch": 0.4515913271278078, "grad_norm": 0.5299700498580933, "learning_rate": 5.9028078624919344e-05, "loss": 1.4976, "step": 8102 }, { "epoch": 0.45164706538097094, "grad_norm": 0.5295999050140381, "learning_rate": 5.901932059835081e-05, "loss": 1.667, "step": 8103 }, { "epoch": 0.4517028036341341, "grad_norm": 0.5291856527328491, "learning_rate": 5.9010562285742e-05, "loss": 1.5909, "step": 8104 }, { "epoch": 0.45175854188729725, "grad_norm": 0.5456459522247314, "learning_rate": 5.9001803687370696e-05, "loss": 1.6947, "step": 8105 }, { "epoch": 0.4518142801404604, "grad_norm": 0.534061074256897, "learning_rate": 5.8993044803514674e-05, "loss": 1.4796, "step": 8106 }, { "epoch": 0.45187001839362356, "grad_norm": 0.5795206427574158, "learning_rate": 5.8984285634451695e-05, "loss": 1.8176, "step": 8107 }, { "epoch": 0.4519257566467867, "grad_norm": 0.5638490915298462, "learning_rate": 5.897552618045956e-05, "loss": 1.6067, "step": 8108 }, { "epoch": 0.4519814948999498, "grad_norm": 0.5725950002670288, "learning_rate": 5.896676644181607e-05, "loss": 1.6761, "step": 8109 }, { "epoch": 0.452037233153113, "grad_norm": 0.6189979314804077, "learning_rate": 5.8958006418799005e-05, "loss": 1.8323, "step": 8110 }, { "epoch": 0.45209297140627613, "grad_norm": 0.550565779209137, "learning_rate": 5.894924611168622e-05, "loss": 1.865, "step": 8111 }, { "epoch": 0.45214870965943926, "grad_norm": 0.563420832157135, "learning_rate": 5.894048552075554e-05, "loss": 1.8, "step": 8112 }, { "epoch": 0.4522044479126024, "grad_norm": 0.5111345052719116, "learning_rate": 5.893172464628477e-05, "loss": 1.4806, "step": 8113 }, { "epoch": 0.4522601861657656, "grad_norm": 0.566088855266571, "learning_rate": 5.8922963488551775e-05, "loss": 1.7427, "step": 8114 }, { "epoch": 0.4523159244189287, "grad_norm": 0.5696318745613098, "learning_rate": 5.89142020478344e-05, "loss": 1.8576, "step": 8115 }, { "epoch": 0.45237166267209183, "grad_norm": 0.5730637907981873, "learning_rate": 5.890544032441051e-05, "loss": 1.6966, "step": 8116 }, { "epoch": 0.452427400925255, "grad_norm": 0.5427675247192383, "learning_rate": 5.889667831855797e-05, "loss": 1.639, "step": 8117 }, { "epoch": 0.45248313917841815, "grad_norm": 0.6031304001808167, "learning_rate": 5.888791603055467e-05, "loss": 1.7707, "step": 8118 }, { "epoch": 0.4525388774315813, "grad_norm": 0.5573417544364929, "learning_rate": 5.887915346067851e-05, "loss": 1.8751, "step": 8119 }, { "epoch": 0.45259461568474446, "grad_norm": 0.5398233532905579, "learning_rate": 5.8870390609207337e-05, "loss": 1.5854, "step": 8120 }, { "epoch": 0.4526503539379076, "grad_norm": 0.554905354976654, "learning_rate": 5.886162747641912e-05, "loss": 1.6138, "step": 8121 }, { "epoch": 0.4527060921910707, "grad_norm": 0.5116898417472839, "learning_rate": 5.885286406259174e-05, "loss": 1.4997, "step": 8122 }, { "epoch": 0.4527618304442339, "grad_norm": 0.5095398426055908, "learning_rate": 5.884410036800312e-05, "loss": 1.372, "step": 8123 }, { "epoch": 0.45281756869739703, "grad_norm": 0.5345844626426697, "learning_rate": 5.883533639293119e-05, "loss": 1.7398, "step": 8124 }, { "epoch": 0.45287330695056016, "grad_norm": 0.5889625549316406, "learning_rate": 5.882657213765393e-05, "loss": 1.8826, "step": 8125 }, { "epoch": 0.4529290452037233, "grad_norm": 0.5907882452011108, "learning_rate": 5.881780760244926e-05, "loss": 1.8187, "step": 8126 }, { "epoch": 0.4529847834568865, "grad_norm": 0.5326589941978455, "learning_rate": 5.8809042787595135e-05, "loss": 1.5317, "step": 8127 }, { "epoch": 0.4530405217100496, "grad_norm": 0.6067203283309937, "learning_rate": 5.880027769336953e-05, "loss": 1.9912, "step": 8128 }, { "epoch": 0.45309625996321273, "grad_norm": 0.5273611545562744, "learning_rate": 5.879151232005044e-05, "loss": 1.7771, "step": 8129 }, { "epoch": 0.4531519982163759, "grad_norm": 0.5791671872138977, "learning_rate": 5.8782746667915824e-05, "loss": 1.9728, "step": 8130 }, { "epoch": 0.45320773646953905, "grad_norm": 0.5748934149742126, "learning_rate": 5.877398073724368e-05, "loss": 1.7932, "step": 8131 }, { "epoch": 0.4532634747227022, "grad_norm": 0.5750080943107605, "learning_rate": 5.876521452831205e-05, "loss": 1.6562, "step": 8132 }, { "epoch": 0.45331921297586536, "grad_norm": 0.5455517172813416, "learning_rate": 5.87564480413989e-05, "loss": 1.6491, "step": 8133 }, { "epoch": 0.4533749512290285, "grad_norm": 0.5786875486373901, "learning_rate": 5.8747681276782294e-05, "loss": 1.6799, "step": 8134 }, { "epoch": 0.4534306894821916, "grad_norm": 0.5193260908126831, "learning_rate": 5.8738914234740225e-05, "loss": 1.7299, "step": 8135 }, { "epoch": 0.45348642773535475, "grad_norm": 0.5477581024169922, "learning_rate": 5.8730146915550745e-05, "loss": 1.529, "step": 8136 }, { "epoch": 0.45354216598851793, "grad_norm": 0.5622334480285645, "learning_rate": 5.872137931949191e-05, "loss": 1.7301, "step": 8137 }, { "epoch": 0.45359790424168106, "grad_norm": 0.5410364866256714, "learning_rate": 5.871261144684177e-05, "loss": 1.7159, "step": 8138 }, { "epoch": 0.4536536424948442, "grad_norm": 0.5440908670425415, "learning_rate": 5.870384329787839e-05, "loss": 1.6208, "step": 8139 }, { "epoch": 0.4537093807480074, "grad_norm": 0.5730171799659729, "learning_rate": 5.8695074872879855e-05, "loss": 1.7554, "step": 8140 }, { "epoch": 0.4537651190011705, "grad_norm": 0.5274659991264343, "learning_rate": 5.868630617212424e-05, "loss": 1.6493, "step": 8141 }, { "epoch": 0.45382085725433363, "grad_norm": 0.5639094114303589, "learning_rate": 5.867753719588963e-05, "loss": 1.8717, "step": 8142 }, { "epoch": 0.4538765955074968, "grad_norm": 0.5402084589004517, "learning_rate": 5.8668767944454136e-05, "loss": 1.7959, "step": 8143 }, { "epoch": 0.45393233376065995, "grad_norm": 0.5999549627304077, "learning_rate": 5.865999841809586e-05, "loss": 1.7492, "step": 8144 }, { "epoch": 0.4539880720138231, "grad_norm": 0.5832345485687256, "learning_rate": 5.865122861709295e-05, "loss": 1.7432, "step": 8145 }, { "epoch": 0.45404381026698626, "grad_norm": 0.500333309173584, "learning_rate": 5.864245854172349e-05, "loss": 1.5536, "step": 8146 }, { "epoch": 0.4540995485201494, "grad_norm": 0.5283179879188538, "learning_rate": 5.8633688192265645e-05, "loss": 1.5528, "step": 8147 }, { "epoch": 0.4541552867733125, "grad_norm": 0.5074849128723145, "learning_rate": 5.862491756899753e-05, "loss": 1.5251, "step": 8148 }, { "epoch": 0.45421102502647565, "grad_norm": 0.5706311464309692, "learning_rate": 5.8616146672197326e-05, "loss": 1.5709, "step": 8149 }, { "epoch": 0.45426676327963883, "grad_norm": 0.570326566696167, "learning_rate": 5.8607375502143183e-05, "loss": 1.6585, "step": 8150 }, { "epoch": 0.45432250153280196, "grad_norm": 0.7040314674377441, "learning_rate": 5.859860405911328e-05, "loss": 2.0239, "step": 8151 }, { "epoch": 0.4543782397859651, "grad_norm": 0.5602174401283264, "learning_rate": 5.858983234338579e-05, "loss": 1.5565, "step": 8152 }, { "epoch": 0.4544339780391283, "grad_norm": 0.596564531326294, "learning_rate": 5.858106035523888e-05, "loss": 1.8482, "step": 8153 }, { "epoch": 0.4544897162922914, "grad_norm": 0.5571820735931396, "learning_rate": 5.85722880949508e-05, "loss": 1.6401, "step": 8154 }, { "epoch": 0.45454545454545453, "grad_norm": 0.5759769678115845, "learning_rate": 5.8563515562799695e-05, "loss": 1.8876, "step": 8155 }, { "epoch": 0.4546011927986177, "grad_norm": 0.526823103427887, "learning_rate": 5.855474275906381e-05, "loss": 1.4215, "step": 8156 }, { "epoch": 0.45465693105178084, "grad_norm": 0.5801699161529541, "learning_rate": 5.854596968402136e-05, "loss": 1.8225, "step": 8157 }, { "epoch": 0.454712669304944, "grad_norm": 0.548812747001648, "learning_rate": 5.8537196337950596e-05, "loss": 1.6582, "step": 8158 }, { "epoch": 0.4547684075581071, "grad_norm": 0.5647279024124146, "learning_rate": 5.8528422721129726e-05, "loss": 1.6121, "step": 8159 }, { "epoch": 0.4548241458112703, "grad_norm": 0.5501880645751953, "learning_rate": 5.8519648833837013e-05, "loss": 1.5704, "step": 8160 }, { "epoch": 0.4548798840644334, "grad_norm": 0.5714605450630188, "learning_rate": 5.851087467635071e-05, "loss": 1.918, "step": 8161 }, { "epoch": 0.45493562231759654, "grad_norm": 0.5872429609298706, "learning_rate": 5.8502100248949085e-05, "loss": 1.7381, "step": 8162 }, { "epoch": 0.45499136057075973, "grad_norm": 0.5113133788108826, "learning_rate": 5.8493325551910405e-05, "loss": 1.6602, "step": 8163 }, { "epoch": 0.45504709882392286, "grad_norm": 0.5724974274635315, "learning_rate": 5.848455058551298e-05, "loss": 1.7762, "step": 8164 }, { "epoch": 0.455102837077086, "grad_norm": 0.5925339460372925, "learning_rate": 5.8475775350035056e-05, "loss": 1.8456, "step": 8165 }, { "epoch": 0.45515857533024917, "grad_norm": 0.567402720451355, "learning_rate": 5.846699984575497e-05, "loss": 1.6512, "step": 8166 }, { "epoch": 0.4552143135834123, "grad_norm": 0.53789883852005, "learning_rate": 5.8458224072951005e-05, "loss": 1.675, "step": 8167 }, { "epoch": 0.45527005183657543, "grad_norm": 0.563400149345398, "learning_rate": 5.844944803190149e-05, "loss": 1.4973, "step": 8168 }, { "epoch": 0.4553257900897386, "grad_norm": 0.5786770582199097, "learning_rate": 5.844067172288474e-05, "loss": 1.6223, "step": 8169 }, { "epoch": 0.45538152834290174, "grad_norm": 0.5910102725028992, "learning_rate": 5.843189514617911e-05, "loss": 1.7822, "step": 8170 }, { "epoch": 0.45543726659606487, "grad_norm": 0.5599364638328552, "learning_rate": 5.8423118302062915e-05, "loss": 1.7511, "step": 8171 }, { "epoch": 0.455493004849228, "grad_norm": 0.5284358263015747, "learning_rate": 5.841434119081453e-05, "loss": 1.6494, "step": 8172 }, { "epoch": 0.4555487431023912, "grad_norm": 0.5970794558525085, "learning_rate": 5.840556381271229e-05, "loss": 1.6952, "step": 8173 }, { "epoch": 0.4556044813555543, "grad_norm": 0.5448065400123596, "learning_rate": 5.839678616803458e-05, "loss": 1.5907, "step": 8174 }, { "epoch": 0.45566021960871744, "grad_norm": 0.5598198771476746, "learning_rate": 5.838800825705977e-05, "loss": 1.6862, "step": 8175 }, { "epoch": 0.4557159578618806, "grad_norm": 0.5819631218910217, "learning_rate": 5.837923008006623e-05, "loss": 1.7354, "step": 8176 }, { "epoch": 0.45577169611504376, "grad_norm": 0.5947074890136719, "learning_rate": 5.837045163733239e-05, "loss": 1.7971, "step": 8177 }, { "epoch": 0.4558274343682069, "grad_norm": 0.541515588760376, "learning_rate": 5.8361672929136614e-05, "loss": 1.4939, "step": 8178 }, { "epoch": 0.45588317262137007, "grad_norm": 0.670753002166748, "learning_rate": 5.835289395575731e-05, "loss": 1.8816, "step": 8179 }, { "epoch": 0.4559389108745332, "grad_norm": 0.5665016174316406, "learning_rate": 5.8344114717472943e-05, "loss": 1.6907, "step": 8180 }, { "epoch": 0.4559946491276963, "grad_norm": 0.5885823369026184, "learning_rate": 5.833533521456188e-05, "loss": 1.6905, "step": 8181 }, { "epoch": 0.45605038738085946, "grad_norm": 0.5672965049743652, "learning_rate": 5.832655544730259e-05, "loss": 1.5996, "step": 8182 }, { "epoch": 0.45610612563402264, "grad_norm": 0.5488877296447754, "learning_rate": 5.831777541597351e-05, "loss": 1.6316, "step": 8183 }, { "epoch": 0.45616186388718577, "grad_norm": 0.541111409664154, "learning_rate": 5.8308995120853096e-05, "loss": 1.7246, "step": 8184 }, { "epoch": 0.4562176021403489, "grad_norm": 0.5794996619224548, "learning_rate": 5.830021456221979e-05, "loss": 1.8438, "step": 8185 }, { "epoch": 0.4562733403935121, "grad_norm": 0.4965246021747589, "learning_rate": 5.829143374035209e-05, "loss": 1.2569, "step": 8186 }, { "epoch": 0.4563290786466752, "grad_norm": 0.5464833974838257, "learning_rate": 5.8282652655528426e-05, "loss": 1.6355, "step": 8187 }, { "epoch": 0.45638481689983834, "grad_norm": 0.617215096950531, "learning_rate": 5.827387130802733e-05, "loss": 1.7473, "step": 8188 }, { "epoch": 0.4564405551530015, "grad_norm": 0.6064026355743408, "learning_rate": 5.826508969812726e-05, "loss": 1.817, "step": 8189 }, { "epoch": 0.45649629340616465, "grad_norm": 0.6004077792167664, "learning_rate": 5.825630782610676e-05, "loss": 1.8728, "step": 8190 }, { "epoch": 0.4565520316593278, "grad_norm": 0.6301288604736328, "learning_rate": 5.82475256922443e-05, "loss": 1.8616, "step": 8191 }, { "epoch": 0.45660776991249097, "grad_norm": 0.540440022945404, "learning_rate": 5.8238743296818396e-05, "loss": 1.7224, "step": 8192 }, { "epoch": 0.4566635081656541, "grad_norm": 0.5390138626098633, "learning_rate": 5.8229960640107617e-05, "loss": 1.556, "step": 8193 }, { "epoch": 0.4567192464188172, "grad_norm": 0.5261824131011963, "learning_rate": 5.822117772239045e-05, "loss": 1.6086, "step": 8194 }, { "epoch": 0.45677498467198036, "grad_norm": 0.543070375919342, "learning_rate": 5.821239454394547e-05, "loss": 1.5987, "step": 8195 }, { "epoch": 0.45683072292514354, "grad_norm": 0.6048296689987183, "learning_rate": 5.8203611105051204e-05, "loss": 1.7936, "step": 8196 }, { "epoch": 0.45688646117830667, "grad_norm": 0.5308238863945007, "learning_rate": 5.819482740598624e-05, "loss": 1.5304, "step": 8197 }, { "epoch": 0.4569421994314698, "grad_norm": 0.5806917548179626, "learning_rate": 5.8186043447029125e-05, "loss": 1.6869, "step": 8198 }, { "epoch": 0.456997937684633, "grad_norm": 0.5387137532234192, "learning_rate": 5.8177259228458444e-05, "loss": 1.7673, "step": 8199 }, { "epoch": 0.4570536759377961, "grad_norm": 0.5830815434455872, "learning_rate": 5.816847475055277e-05, "loss": 1.9119, "step": 8200 }, { "epoch": 0.45710941419095924, "grad_norm": 0.5564570426940918, "learning_rate": 5.8159690013590695e-05, "loss": 1.5385, "step": 8201 }, { "epoch": 0.4571651524441224, "grad_norm": 0.5688846707344055, "learning_rate": 5.815090501785083e-05, "loss": 1.5954, "step": 8202 }, { "epoch": 0.45722089069728555, "grad_norm": 0.6317092776298523, "learning_rate": 5.814211976361179e-05, "loss": 1.9886, "step": 8203 }, { "epoch": 0.4572766289504487, "grad_norm": 0.5649227499961853, "learning_rate": 5.813333425115218e-05, "loss": 1.6259, "step": 8204 }, { "epoch": 0.4573323672036118, "grad_norm": 0.5452385544776917, "learning_rate": 5.812454848075063e-05, "loss": 1.7129, "step": 8205 }, { "epoch": 0.457388105456775, "grad_norm": 0.5575756430625916, "learning_rate": 5.8115762452685774e-05, "loss": 1.7543, "step": 8206 }, { "epoch": 0.4574438437099381, "grad_norm": 0.5120208263397217, "learning_rate": 5.810697616723624e-05, "loss": 1.5619, "step": 8207 }, { "epoch": 0.45749958196310125, "grad_norm": 0.5111353397369385, "learning_rate": 5.809818962468069e-05, "loss": 1.5594, "step": 8208 }, { "epoch": 0.45755532021626444, "grad_norm": 0.5274066925048828, "learning_rate": 5.8089402825297776e-05, "loss": 1.5727, "step": 8209 }, { "epoch": 0.45761105846942757, "grad_norm": 0.531512439250946, "learning_rate": 5.80806157693662e-05, "loss": 1.6845, "step": 8210 }, { "epoch": 0.4576667967225907, "grad_norm": 0.587890088558197, "learning_rate": 5.807182845716458e-05, "loss": 1.8239, "step": 8211 }, { "epoch": 0.4577225349757539, "grad_norm": 0.543900191783905, "learning_rate": 5.8063040888971635e-05, "loss": 1.9671, "step": 8212 }, { "epoch": 0.457778273228917, "grad_norm": 0.5269332528114319, "learning_rate": 5.8054253065066024e-05, "loss": 1.5801, "step": 8213 }, { "epoch": 0.45783401148208014, "grad_norm": 0.5568074584007263, "learning_rate": 5.8045464985726474e-05, "loss": 1.5843, "step": 8214 }, { "epoch": 0.4578897497352433, "grad_norm": 0.5887969136238098, "learning_rate": 5.803667665123168e-05, "loss": 1.9532, "step": 8215 }, { "epoch": 0.45794548798840645, "grad_norm": 0.6071587204933167, "learning_rate": 5.802788806186038e-05, "loss": 1.9501, "step": 8216 }, { "epoch": 0.4580012262415696, "grad_norm": 0.5481032133102417, "learning_rate": 5.801909921789126e-05, "loss": 1.7435, "step": 8217 }, { "epoch": 0.4580569644947327, "grad_norm": 0.6313177347183228, "learning_rate": 5.801031011960306e-05, "loss": 1.928, "step": 8218 }, { "epoch": 0.4581127027478959, "grad_norm": 0.5789720416069031, "learning_rate": 5.800152076727454e-05, "loss": 1.8, "step": 8219 }, { "epoch": 0.458168441001059, "grad_norm": 0.5438299775123596, "learning_rate": 5.799273116118443e-05, "loss": 1.6805, "step": 8220 }, { "epoch": 0.45822417925422215, "grad_norm": 0.5296357870101929, "learning_rate": 5.798394130161149e-05, "loss": 1.4218, "step": 8221 }, { "epoch": 0.45827991750738534, "grad_norm": 0.6217812895774841, "learning_rate": 5.7975151188834475e-05, "loss": 1.7633, "step": 8222 }, { "epoch": 0.45833565576054847, "grad_norm": 0.6416480541229248, "learning_rate": 5.796636082313217e-05, "loss": 2.0147, "step": 8223 }, { "epoch": 0.4583913940137116, "grad_norm": 0.5263529419898987, "learning_rate": 5.795757020478334e-05, "loss": 1.5335, "step": 8224 }, { "epoch": 0.4584471322668748, "grad_norm": 0.565466046333313, "learning_rate": 5.794877933406679e-05, "loss": 1.778, "step": 8225 }, { "epoch": 0.4585028705200379, "grad_norm": 0.5382056832313538, "learning_rate": 5.79399882112613e-05, "loss": 1.678, "step": 8226 }, { "epoch": 0.45855860877320104, "grad_norm": 0.5097582340240479, "learning_rate": 5.7931196836645675e-05, "loss": 1.5224, "step": 8227 }, { "epoch": 0.45861434702636417, "grad_norm": 0.5619562268257141, "learning_rate": 5.792240521049872e-05, "loss": 1.9743, "step": 8228 }, { "epoch": 0.45867008527952735, "grad_norm": 0.57401442527771, "learning_rate": 5.791361333309926e-05, "loss": 1.6526, "step": 8229 }, { "epoch": 0.4587258235326905, "grad_norm": 0.557773232460022, "learning_rate": 5.790482120472615e-05, "loss": 1.7427, "step": 8230 }, { "epoch": 0.4587815617858536, "grad_norm": 0.5370197296142578, "learning_rate": 5.789602882565818e-05, "loss": 1.5028, "step": 8231 }, { "epoch": 0.4588373000390168, "grad_norm": 0.559916079044342, "learning_rate": 5.788723619617422e-05, "loss": 1.6115, "step": 8232 }, { "epoch": 0.4588930382921799, "grad_norm": 0.5461910367012024, "learning_rate": 5.787844331655311e-05, "loss": 1.5789, "step": 8233 }, { "epoch": 0.45894877654534305, "grad_norm": 0.5319302082061768, "learning_rate": 5.786965018707371e-05, "loss": 1.66, "step": 8234 }, { "epoch": 0.45900451479850624, "grad_norm": 0.5757958292961121, "learning_rate": 5.786085680801488e-05, "loss": 1.9192, "step": 8235 }, { "epoch": 0.45906025305166936, "grad_norm": 0.523041844367981, "learning_rate": 5.785206317965553e-05, "loss": 1.5435, "step": 8236 }, { "epoch": 0.4591159913048325, "grad_norm": 0.5196270942687988, "learning_rate": 5.7843269302274506e-05, "loss": 1.2152, "step": 8237 }, { "epoch": 0.4591717295579957, "grad_norm": 0.5284752249717712, "learning_rate": 5.7834475176150715e-05, "loss": 1.6407, "step": 8238 }, { "epoch": 0.4592274678111588, "grad_norm": 0.5639576315879822, "learning_rate": 5.782568080156303e-05, "loss": 1.8297, "step": 8239 }, { "epoch": 0.45928320606432194, "grad_norm": 0.5723278522491455, "learning_rate": 5.781688617879039e-05, "loss": 1.7981, "step": 8240 }, { "epoch": 0.45933894431748507, "grad_norm": 0.5638182759284973, "learning_rate": 5.780809130811169e-05, "loss": 1.6244, "step": 8241 }, { "epoch": 0.45939468257064825, "grad_norm": 0.5704604983329773, "learning_rate": 5.779929618980586e-05, "loss": 1.6348, "step": 8242 }, { "epoch": 0.4594504208238114, "grad_norm": 0.5768876671791077, "learning_rate": 5.779050082415184e-05, "loss": 1.7342, "step": 8243 }, { "epoch": 0.4595061590769745, "grad_norm": 0.5308094620704651, "learning_rate": 5.778170521142854e-05, "loss": 1.6838, "step": 8244 }, { "epoch": 0.4595618973301377, "grad_norm": 0.6009156703948975, "learning_rate": 5.777290935191493e-05, "loss": 1.72, "step": 8245 }, { "epoch": 0.4596176355833008, "grad_norm": 0.5695474743843079, "learning_rate": 5.776411324588995e-05, "loss": 1.6783, "step": 8246 }, { "epoch": 0.45967337383646395, "grad_norm": 0.5541953444480896, "learning_rate": 5.775531689363256e-05, "loss": 1.5248, "step": 8247 }, { "epoch": 0.45972911208962713, "grad_norm": 0.5543676614761353, "learning_rate": 5.7746520295421736e-05, "loss": 1.5673, "step": 8248 }, { "epoch": 0.45978485034279026, "grad_norm": 0.6300926804542542, "learning_rate": 5.773772345153648e-05, "loss": 1.9275, "step": 8249 }, { "epoch": 0.4598405885959534, "grad_norm": 0.580083429813385, "learning_rate": 5.772892636225572e-05, "loss": 1.583, "step": 8250 }, { "epoch": 0.4598963268491165, "grad_norm": 0.6072207689285278, "learning_rate": 5.7720129027858496e-05, "loss": 1.6752, "step": 8251 }, { "epoch": 0.4599520651022797, "grad_norm": 0.575436532497406, "learning_rate": 5.771133144862377e-05, "loss": 1.5191, "step": 8252 }, { "epoch": 0.46000780335544283, "grad_norm": 0.5946778655052185, "learning_rate": 5.770253362483059e-05, "loss": 1.7338, "step": 8253 }, { "epoch": 0.46006354160860596, "grad_norm": 0.5782346129417419, "learning_rate": 5.769373555675794e-05, "loss": 1.9825, "step": 8254 }, { "epoch": 0.46011927986176915, "grad_norm": 0.6065311431884766, "learning_rate": 5.7684937244684856e-05, "loss": 1.8879, "step": 8255 }, { "epoch": 0.4601750181149323, "grad_norm": 0.5789337158203125, "learning_rate": 5.767613868889038e-05, "loss": 1.5408, "step": 8256 }, { "epoch": 0.4602307563680954, "grad_norm": 0.5640459060668945, "learning_rate": 5.766733988965354e-05, "loss": 1.7434, "step": 8257 }, { "epoch": 0.4602864946212586, "grad_norm": 0.5351431965827942, "learning_rate": 5.765854084725337e-05, "loss": 1.7586, "step": 8258 }, { "epoch": 0.4603422328744217, "grad_norm": 0.6039308905601501, "learning_rate": 5.764974156196895e-05, "loss": 1.8, "step": 8259 }, { "epoch": 0.46039797112758485, "grad_norm": 0.5545447468757629, "learning_rate": 5.764094203407933e-05, "loss": 1.5867, "step": 8260 }, { "epoch": 0.46045370938074803, "grad_norm": 0.5933241248130798, "learning_rate": 5.763214226386355e-05, "loss": 1.8117, "step": 8261 }, { "epoch": 0.46050944763391116, "grad_norm": 0.6593655943870544, "learning_rate": 5.7623342251600745e-05, "loss": 1.6466, "step": 8262 }, { "epoch": 0.4605651858870743, "grad_norm": 0.5840887427330017, "learning_rate": 5.761454199756996e-05, "loss": 1.6135, "step": 8263 }, { "epoch": 0.4606209241402374, "grad_norm": 0.5381019711494446, "learning_rate": 5.7605741502050314e-05, "loss": 1.6211, "step": 8264 }, { "epoch": 0.4606766623934006, "grad_norm": 0.6085990071296692, "learning_rate": 5.759694076532087e-05, "loss": 1.795, "step": 8265 }, { "epoch": 0.46073240064656373, "grad_norm": 0.5574647784233093, "learning_rate": 5.758813978766077e-05, "loss": 1.4925, "step": 8266 }, { "epoch": 0.46078813889972686, "grad_norm": 0.6263840794563293, "learning_rate": 5.75793385693491e-05, "loss": 1.8677, "step": 8267 }, { "epoch": 0.46084387715289005, "grad_norm": 0.543647289276123, "learning_rate": 5.7570537110665026e-05, "loss": 1.7692, "step": 8268 }, { "epoch": 0.4608996154060532, "grad_norm": 0.6330240368843079, "learning_rate": 5.7561735411887644e-05, "loss": 1.8521, "step": 8269 }, { "epoch": 0.4609553536592163, "grad_norm": 0.5961319208145142, "learning_rate": 5.75529334732961e-05, "loss": 1.8511, "step": 8270 }, { "epoch": 0.4610110919123795, "grad_norm": 0.5653590559959412, "learning_rate": 5.754413129516956e-05, "loss": 1.6472, "step": 8271 }, { "epoch": 0.4610668301655426, "grad_norm": 0.5134671330451965, "learning_rate": 5.753532887778714e-05, "loss": 1.5722, "step": 8272 }, { "epoch": 0.46112256841870575, "grad_norm": 0.5468015074729919, "learning_rate": 5.7526526221428036e-05, "loss": 1.6829, "step": 8273 }, { "epoch": 0.4611783066718689, "grad_norm": 0.5542712211608887, "learning_rate": 5.751772332637137e-05, "loss": 1.6583, "step": 8274 }, { "epoch": 0.46123404492503206, "grad_norm": 0.554300844669342, "learning_rate": 5.75089201928964e-05, "loss": 1.7805, "step": 8275 }, { "epoch": 0.4612897831781952, "grad_norm": 0.5648434162139893, "learning_rate": 5.750011682128222e-05, "loss": 1.8315, "step": 8276 }, { "epoch": 0.4613455214313583, "grad_norm": 0.5622681975364685, "learning_rate": 5.7491313211808095e-05, "loss": 1.6431, "step": 8277 }, { "epoch": 0.4614012596845215, "grad_norm": 0.5813915133476257, "learning_rate": 5.748250936475318e-05, "loss": 1.9023, "step": 8278 }, { "epoch": 0.46145699793768463, "grad_norm": 0.5567924380302429, "learning_rate": 5.747370528039668e-05, "loss": 1.7468, "step": 8279 }, { "epoch": 0.46151273619084776, "grad_norm": 0.5861298441886902, "learning_rate": 5.7464900959017844e-05, "loss": 1.7059, "step": 8280 }, { "epoch": 0.46156847444401095, "grad_norm": 0.642804741859436, "learning_rate": 5.745609640089585e-05, "loss": 1.8385, "step": 8281 }, { "epoch": 0.4616242126971741, "grad_norm": 0.5455397963523865, "learning_rate": 5.744729160630998e-05, "loss": 1.5585, "step": 8282 }, { "epoch": 0.4616799509503372, "grad_norm": 0.5456379055976868, "learning_rate": 5.743848657553943e-05, "loss": 1.6787, "step": 8283 }, { "epoch": 0.4617356892035004, "grad_norm": 0.6248784065246582, "learning_rate": 5.742968130886346e-05, "loss": 1.9457, "step": 8284 }, { "epoch": 0.4617914274566635, "grad_norm": 0.5508323311805725, "learning_rate": 5.74208758065613e-05, "loss": 1.7643, "step": 8285 }, { "epoch": 0.46184716570982665, "grad_norm": 0.5070561170578003, "learning_rate": 5.741207006891224e-05, "loss": 1.414, "step": 8286 }, { "epoch": 0.4619029039629898, "grad_norm": 0.5954271554946899, "learning_rate": 5.740326409619552e-05, "loss": 1.7004, "step": 8287 }, { "epoch": 0.46195864221615296, "grad_norm": 0.5585724115371704, "learning_rate": 5.739445788869043e-05, "loss": 1.7653, "step": 8288 }, { "epoch": 0.4620143804693161, "grad_norm": 0.5526925325393677, "learning_rate": 5.738565144667626e-05, "loss": 1.7572, "step": 8289 }, { "epoch": 0.4620701187224792, "grad_norm": 0.5708301663398743, "learning_rate": 5.737684477043228e-05, "loss": 1.8134, "step": 8290 }, { "epoch": 0.4621258569756424, "grad_norm": 0.5142967104911804, "learning_rate": 5.736803786023779e-05, "loss": 1.4841, "step": 8291 }, { "epoch": 0.46218159522880553, "grad_norm": 0.6403586864471436, "learning_rate": 5.7359230716372105e-05, "loss": 1.9146, "step": 8292 }, { "epoch": 0.46223733348196866, "grad_norm": 0.5327916145324707, "learning_rate": 5.735042333911452e-05, "loss": 1.6559, "step": 8293 }, { "epoch": 0.46229307173513184, "grad_norm": 0.5524441599845886, "learning_rate": 5.734161572874437e-05, "loss": 1.6659, "step": 8294 }, { "epoch": 0.462348809988295, "grad_norm": 0.5722818970680237, "learning_rate": 5.7332807885540976e-05, "loss": 1.7702, "step": 8295 }, { "epoch": 0.4624045482414581, "grad_norm": 0.5551111698150635, "learning_rate": 5.7323999809783656e-05, "loss": 1.6766, "step": 8296 }, { "epoch": 0.46246028649462123, "grad_norm": 0.5412301421165466, "learning_rate": 5.731519150175179e-05, "loss": 1.6475, "step": 8297 }, { "epoch": 0.4625160247477844, "grad_norm": 0.5476828813552856, "learning_rate": 5.730638296172467e-05, "loss": 1.643, "step": 8298 }, { "epoch": 0.46257176300094754, "grad_norm": 0.5418581366539001, "learning_rate": 5.7297574189981705e-05, "loss": 1.5904, "step": 8299 }, { "epoch": 0.4626275012541107, "grad_norm": 0.5094223022460938, "learning_rate": 5.7288765186802204e-05, "loss": 1.6782, "step": 8300 }, { "epoch": 0.46268323950727386, "grad_norm": 0.5535764694213867, "learning_rate": 5.72799559524656e-05, "loss": 1.7858, "step": 8301 }, { "epoch": 0.462738977760437, "grad_norm": 0.5554370284080505, "learning_rate": 5.7271146487251224e-05, "loss": 1.757, "step": 8302 }, { "epoch": 0.4627947160136001, "grad_norm": 0.5177475810050964, "learning_rate": 5.726233679143849e-05, "loss": 1.7816, "step": 8303 }, { "epoch": 0.4628504542667633, "grad_norm": 0.5340207815170288, "learning_rate": 5.725352686530676e-05, "loss": 1.742, "step": 8304 }, { "epoch": 0.46290619251992643, "grad_norm": 0.5540534257888794, "learning_rate": 5.724471670913545e-05, "loss": 1.7751, "step": 8305 }, { "epoch": 0.46296193077308956, "grad_norm": 0.539763331413269, "learning_rate": 5.7235906323203956e-05, "loss": 1.6988, "step": 8306 }, { "epoch": 0.46301766902625274, "grad_norm": 0.5649262070655823, "learning_rate": 5.7227095707791714e-05, "loss": 1.6722, "step": 8307 }, { "epoch": 0.46307340727941587, "grad_norm": 0.583903968334198, "learning_rate": 5.721828486317814e-05, "loss": 1.8056, "step": 8308 }, { "epoch": 0.463129145532579, "grad_norm": 0.5246012210845947, "learning_rate": 5.7209473789642644e-05, "loss": 1.4819, "step": 8309 }, { "epoch": 0.46318488378574213, "grad_norm": 0.5652540922164917, "learning_rate": 5.720066248746468e-05, "loss": 1.7022, "step": 8310 }, { "epoch": 0.4632406220389053, "grad_norm": 0.5494220852851868, "learning_rate": 5.7191850956923675e-05, "loss": 1.5258, "step": 8311 }, { "epoch": 0.46329636029206844, "grad_norm": 0.5923638343811035, "learning_rate": 5.7183039198299105e-05, "loss": 1.7439, "step": 8312 }, { "epoch": 0.46335209854523157, "grad_norm": 0.6051487922668457, "learning_rate": 5.717422721187039e-05, "loss": 1.8911, "step": 8313 }, { "epoch": 0.46340783679839476, "grad_norm": 0.5064337253570557, "learning_rate": 5.7165414997917045e-05, "loss": 1.6547, "step": 8314 }, { "epoch": 0.4634635750515579, "grad_norm": 0.6165828704833984, "learning_rate": 5.715660255671848e-05, "loss": 1.8988, "step": 8315 }, { "epoch": 0.463519313304721, "grad_norm": 0.5490414500236511, "learning_rate": 5.714778988855422e-05, "loss": 1.8075, "step": 8316 }, { "epoch": 0.4635750515578842, "grad_norm": 0.5493695139884949, "learning_rate": 5.713897699370376e-05, "loss": 1.6288, "step": 8317 }, { "epoch": 0.46363078981104733, "grad_norm": 0.5596882700920105, "learning_rate": 5.713016387244656e-05, "loss": 1.6575, "step": 8318 }, { "epoch": 0.46368652806421046, "grad_norm": 0.562776505947113, "learning_rate": 5.7121350525062126e-05, "loss": 1.7129, "step": 8319 }, { "epoch": 0.4637422663173736, "grad_norm": 0.6399055123329163, "learning_rate": 5.7112536951829975e-05, "loss": 1.7888, "step": 8320 }, { "epoch": 0.46379800457053677, "grad_norm": 0.5227872729301453, "learning_rate": 5.710372315302963e-05, "loss": 1.6324, "step": 8321 }, { "epoch": 0.4638537428236999, "grad_norm": 0.5664421319961548, "learning_rate": 5.70949091289406e-05, "loss": 1.5484, "step": 8322 }, { "epoch": 0.46390948107686303, "grad_norm": 0.5465877652168274, "learning_rate": 5.708609487984242e-05, "loss": 1.5863, "step": 8323 }, { "epoch": 0.4639652193300262, "grad_norm": 0.562119722366333, "learning_rate": 5.707728040601462e-05, "loss": 1.7411, "step": 8324 }, { "epoch": 0.46402095758318934, "grad_norm": 0.569681704044342, "learning_rate": 5.706846570773676e-05, "loss": 1.6488, "step": 8325 }, { "epoch": 0.46407669583635247, "grad_norm": 0.6219793558120728, "learning_rate": 5.7059650785288354e-05, "loss": 1.7995, "step": 8326 }, { "epoch": 0.46413243408951566, "grad_norm": 0.5750408172607422, "learning_rate": 5.705083563894902e-05, "loss": 1.8457, "step": 8327 }, { "epoch": 0.4641881723426788, "grad_norm": 0.5338056683540344, "learning_rate": 5.7042020268998265e-05, "loss": 1.665, "step": 8328 }, { "epoch": 0.4642439105958419, "grad_norm": 0.5091413259506226, "learning_rate": 5.703320467571569e-05, "loss": 1.5915, "step": 8329 }, { "epoch": 0.4642996488490051, "grad_norm": 0.567847490310669, "learning_rate": 5.7024388859380875e-05, "loss": 1.6417, "step": 8330 }, { "epoch": 0.4643553871021682, "grad_norm": 0.591010332107544, "learning_rate": 5.701557282027339e-05, "loss": 1.8457, "step": 8331 }, { "epoch": 0.46441112535533136, "grad_norm": 0.5327983498573303, "learning_rate": 5.700675655867285e-05, "loss": 1.6806, "step": 8332 }, { "epoch": 0.4644668636084945, "grad_norm": 0.5359470844268799, "learning_rate": 5.6997940074858835e-05, "loss": 1.5137, "step": 8333 }, { "epoch": 0.46452260186165767, "grad_norm": 0.5727723240852356, "learning_rate": 5.698912336911097e-05, "loss": 1.737, "step": 8334 }, { "epoch": 0.4645783401148208, "grad_norm": 0.5366725325584412, "learning_rate": 5.6980306441708854e-05, "loss": 1.5039, "step": 8335 }, { "epoch": 0.4646340783679839, "grad_norm": 0.5799429416656494, "learning_rate": 5.6971489292932126e-05, "loss": 1.7687, "step": 8336 }, { "epoch": 0.4646898166211471, "grad_norm": 0.6180622577667236, "learning_rate": 5.69626719230604e-05, "loss": 1.8375, "step": 8337 }, { "epoch": 0.46474555487431024, "grad_norm": 0.5698204636573792, "learning_rate": 5.6953854332373314e-05, "loss": 1.6076, "step": 8338 }, { "epoch": 0.46480129312747337, "grad_norm": 0.5486071109771729, "learning_rate": 5.6945036521150495e-05, "loss": 1.75, "step": 8339 }, { "epoch": 0.46485703138063655, "grad_norm": 0.5504134893417358, "learning_rate": 5.693621848967163e-05, "loss": 1.753, "step": 8340 }, { "epoch": 0.4649127696337997, "grad_norm": 0.5678994059562683, "learning_rate": 5.6927400238216354e-05, "loss": 1.845, "step": 8341 }, { "epoch": 0.4649685078869628, "grad_norm": 0.5259969234466553, "learning_rate": 5.6918581767064325e-05, "loss": 1.5699, "step": 8342 }, { "epoch": 0.46502424614012594, "grad_norm": 0.5243310928344727, "learning_rate": 5.690976307649523e-05, "loss": 1.5899, "step": 8343 }, { "epoch": 0.4650799843932891, "grad_norm": 0.5647771954536438, "learning_rate": 5.6900944166788725e-05, "loss": 1.7661, "step": 8344 }, { "epoch": 0.46513572264645225, "grad_norm": 0.6884542107582092, "learning_rate": 5.689212503822452e-05, "loss": 1.5225, "step": 8345 }, { "epoch": 0.4651914608996154, "grad_norm": 0.5403727889060974, "learning_rate": 5.688330569108228e-05, "loss": 1.5896, "step": 8346 }, { "epoch": 0.46524719915277857, "grad_norm": 0.5732728838920593, "learning_rate": 5.6874486125641726e-05, "loss": 1.5632, "step": 8347 }, { "epoch": 0.4653029374059417, "grad_norm": 0.5338377356529236, "learning_rate": 5.686566634218254e-05, "loss": 1.679, "step": 8348 }, { "epoch": 0.4653586756591048, "grad_norm": 0.6053128242492676, "learning_rate": 5.685684634098447e-05, "loss": 2.0888, "step": 8349 }, { "epoch": 0.465414413912268, "grad_norm": 0.5830248594284058, "learning_rate": 5.684802612232719e-05, "loss": 1.7972, "step": 8350 }, { "epoch": 0.46547015216543114, "grad_norm": 0.6264218688011169, "learning_rate": 5.683920568649047e-05, "loss": 1.8225, "step": 8351 }, { "epoch": 0.46552589041859427, "grad_norm": 0.6199706196784973, "learning_rate": 5.6830385033753995e-05, "loss": 1.6771, "step": 8352 }, { "epoch": 0.46558162867175745, "grad_norm": 0.5402054190635681, "learning_rate": 5.682156416439755e-05, "loss": 1.3349, "step": 8353 }, { "epoch": 0.4656373669249206, "grad_norm": 0.5562443733215332, "learning_rate": 5.681274307870085e-05, "loss": 1.606, "step": 8354 }, { "epoch": 0.4656931051780837, "grad_norm": 0.6087068915367126, "learning_rate": 5.680392177694366e-05, "loss": 1.7091, "step": 8355 }, { "epoch": 0.46574884343124684, "grad_norm": 0.5770891904830933, "learning_rate": 5.679510025940575e-05, "loss": 1.7989, "step": 8356 }, { "epoch": 0.46580458168441, "grad_norm": 0.5513335466384888, "learning_rate": 5.6786278526366875e-05, "loss": 1.5115, "step": 8357 }, { "epoch": 0.46586031993757315, "grad_norm": 0.5334859490394592, "learning_rate": 5.677745657810681e-05, "loss": 1.5391, "step": 8358 }, { "epoch": 0.4659160581907363, "grad_norm": 0.51854008436203, "learning_rate": 5.6768634414905344e-05, "loss": 1.4878, "step": 8359 }, { "epoch": 0.46597179644389947, "grad_norm": 0.5759007930755615, "learning_rate": 5.675981203704226e-05, "loss": 1.7812, "step": 8360 }, { "epoch": 0.4660275346970626, "grad_norm": 0.5255948305130005, "learning_rate": 5.675098944479733e-05, "loss": 1.6782, "step": 8361 }, { "epoch": 0.4660832729502257, "grad_norm": 0.5190218091011047, "learning_rate": 5.67421666384504e-05, "loss": 1.4408, "step": 8362 }, { "epoch": 0.4661390112033889, "grad_norm": 0.5538722276687622, "learning_rate": 5.673334361828124e-05, "loss": 1.6993, "step": 8363 }, { "epoch": 0.46619474945655204, "grad_norm": 0.5251713991165161, "learning_rate": 5.672452038456969e-05, "loss": 1.5929, "step": 8364 }, { "epoch": 0.46625048770971517, "grad_norm": 0.5203914642333984, "learning_rate": 5.671569693759554e-05, "loss": 1.5579, "step": 8365 }, { "epoch": 0.4663062259628783, "grad_norm": 0.4919300675392151, "learning_rate": 5.670687327763866e-05, "loss": 1.5625, "step": 8366 }, { "epoch": 0.4663619642160415, "grad_norm": 0.5500087141990662, "learning_rate": 5.6698049404978845e-05, "loss": 1.6695, "step": 8367 }, { "epoch": 0.4664177024692046, "grad_norm": 0.5846395492553711, "learning_rate": 5.6689225319895966e-05, "loss": 1.884, "step": 8368 }, { "epoch": 0.46647344072236774, "grad_norm": 0.5971377491950989, "learning_rate": 5.668040102266987e-05, "loss": 1.9091, "step": 8369 }, { "epoch": 0.4665291789755309, "grad_norm": 0.5873506665229797, "learning_rate": 5.6671576513580385e-05, "loss": 1.7085, "step": 8370 }, { "epoch": 0.46658491722869405, "grad_norm": 0.551792323589325, "learning_rate": 5.66627517929074e-05, "loss": 1.5626, "step": 8371 }, { "epoch": 0.4666406554818572, "grad_norm": 0.5586331486701965, "learning_rate": 5.665392686093076e-05, "loss": 1.7621, "step": 8372 }, { "epoch": 0.46669639373502037, "grad_norm": 0.6477528810501099, "learning_rate": 5.664510171793038e-05, "loss": 1.9983, "step": 8373 }, { "epoch": 0.4667521319881835, "grad_norm": 0.5568731427192688, "learning_rate": 5.6636276364186105e-05, "loss": 1.5046, "step": 8374 }, { "epoch": 0.4668078702413466, "grad_norm": 0.5492534637451172, "learning_rate": 5.6627450799977844e-05, "loss": 1.6931, "step": 8375 }, { "epoch": 0.4668636084945098, "grad_norm": 0.5230808854103088, "learning_rate": 5.661862502558547e-05, "loss": 1.5232, "step": 8376 }, { "epoch": 0.46691934674767294, "grad_norm": 0.5762078762054443, "learning_rate": 5.660979904128891e-05, "loss": 1.8327, "step": 8377 }, { "epoch": 0.46697508500083607, "grad_norm": 0.5496635437011719, "learning_rate": 5.660097284736805e-05, "loss": 1.5354, "step": 8378 }, { "epoch": 0.4670308232539992, "grad_norm": 0.5177884101867676, "learning_rate": 5.6592146444102826e-05, "loss": 1.4303, "step": 8379 }, { "epoch": 0.4670865615071624, "grad_norm": 0.6022128462791443, "learning_rate": 5.658331983177315e-05, "loss": 1.9321, "step": 8380 }, { "epoch": 0.4671422997603255, "grad_norm": 0.5913931131362915, "learning_rate": 5.657449301065895e-05, "loss": 1.9125, "step": 8381 }, { "epoch": 0.46719803801348864, "grad_norm": 0.4976262152194977, "learning_rate": 5.656566598104017e-05, "loss": 1.6072, "step": 8382 }, { "epoch": 0.4672537762666518, "grad_norm": 0.5472914576530457, "learning_rate": 5.655683874319675e-05, "loss": 1.719, "step": 8383 }, { "epoch": 0.46730951451981495, "grad_norm": 0.5451732277870178, "learning_rate": 5.6548011297408634e-05, "loss": 1.6492, "step": 8384 }, { "epoch": 0.4673652527729781, "grad_norm": 0.5876046419143677, "learning_rate": 5.653918364395575e-05, "loss": 1.7208, "step": 8385 }, { "epoch": 0.46742099102614126, "grad_norm": 0.5409192442893982, "learning_rate": 5.653035578311812e-05, "loss": 1.6186, "step": 8386 }, { "epoch": 0.4674767292793044, "grad_norm": 0.5066797733306885, "learning_rate": 5.652152771517566e-05, "loss": 1.2929, "step": 8387 }, { "epoch": 0.4675324675324675, "grad_norm": 0.5531768202781677, "learning_rate": 5.651269944040838e-05, "loss": 1.7447, "step": 8388 }, { "epoch": 0.46758820578563065, "grad_norm": 0.5745431780815125, "learning_rate": 5.650387095909623e-05, "loss": 1.7896, "step": 8389 }, { "epoch": 0.46764394403879384, "grad_norm": 0.5450076460838318, "learning_rate": 5.649504227151922e-05, "loss": 1.5537, "step": 8390 }, { "epoch": 0.46769968229195696, "grad_norm": 0.5614714622497559, "learning_rate": 5.648621337795733e-05, "loss": 1.5894, "step": 8391 }, { "epoch": 0.4677554205451201, "grad_norm": 0.6122470498085022, "learning_rate": 5.647738427869058e-05, "loss": 1.8336, "step": 8392 }, { "epoch": 0.4678111587982833, "grad_norm": 0.598466157913208, "learning_rate": 5.6468554973998955e-05, "loss": 1.799, "step": 8393 }, { "epoch": 0.4678668970514464, "grad_norm": 0.5752211213111877, "learning_rate": 5.645972546416248e-05, "loss": 1.7678, "step": 8394 }, { "epoch": 0.46792263530460954, "grad_norm": 0.5438199043273926, "learning_rate": 5.6450895749461194e-05, "loss": 1.6982, "step": 8395 }, { "epoch": 0.4679783735577727, "grad_norm": 0.5414747595787048, "learning_rate": 5.64420658301751e-05, "loss": 1.5794, "step": 8396 }, { "epoch": 0.46803411181093585, "grad_norm": 0.5446813702583313, "learning_rate": 5.643323570658424e-05, "loss": 1.4545, "step": 8397 }, { "epoch": 0.468089850064099, "grad_norm": 0.5998760461807251, "learning_rate": 5.642440537896863e-05, "loss": 1.6886, "step": 8398 }, { "epoch": 0.46814558831726216, "grad_norm": 0.5757097005844116, "learning_rate": 5.6415574847608365e-05, "loss": 1.6932, "step": 8399 }, { "epoch": 0.4682013265704253, "grad_norm": 0.5681119561195374, "learning_rate": 5.640674411278345e-05, "loss": 1.6357, "step": 8400 }, { "epoch": 0.4682570648235884, "grad_norm": 0.5782068371772766, "learning_rate": 5.6397913174773986e-05, "loss": 1.4748, "step": 8401 }, { "epoch": 0.46831280307675155, "grad_norm": 0.5838581323623657, "learning_rate": 5.638908203386001e-05, "loss": 1.6619, "step": 8402 }, { "epoch": 0.46836854132991473, "grad_norm": 0.5535818934440613, "learning_rate": 5.638025069032159e-05, "loss": 1.7486, "step": 8403 }, { "epoch": 0.46842427958307786, "grad_norm": 0.5350418090820312, "learning_rate": 5.637141914443883e-05, "loss": 1.6243, "step": 8404 }, { "epoch": 0.468480017836241, "grad_norm": 0.5376988053321838, "learning_rate": 5.6362587396491805e-05, "loss": 1.6984, "step": 8405 }, { "epoch": 0.4685357560894042, "grad_norm": 0.593912661075592, "learning_rate": 5.63537554467606e-05, "loss": 1.6001, "step": 8406 }, { "epoch": 0.4685914943425673, "grad_norm": 0.5185176730155945, "learning_rate": 5.634492329552531e-05, "loss": 1.4702, "step": 8407 }, { "epoch": 0.46864723259573043, "grad_norm": 0.5814734101295471, "learning_rate": 5.6336090943066063e-05, "loss": 1.8799, "step": 8408 }, { "epoch": 0.4687029708488936, "grad_norm": 0.5562795400619507, "learning_rate": 5.632725838966294e-05, "loss": 1.7107, "step": 8409 }, { "epoch": 0.46875870910205675, "grad_norm": 0.5342075824737549, "learning_rate": 5.631842563559608e-05, "loss": 1.6502, "step": 8410 }, { "epoch": 0.4688144473552199, "grad_norm": 0.5376294255256653, "learning_rate": 5.630959268114558e-05, "loss": 1.6374, "step": 8411 }, { "epoch": 0.46887018560838306, "grad_norm": 0.5461024641990662, "learning_rate": 5.630075952659162e-05, "loss": 1.7209, "step": 8412 }, { "epoch": 0.4689259238615462, "grad_norm": 0.5888074040412903, "learning_rate": 5.629192617221427e-05, "loss": 1.7923, "step": 8413 }, { "epoch": 0.4689816621147093, "grad_norm": 0.5504298210144043, "learning_rate": 5.6283092618293734e-05, "loss": 1.6201, "step": 8414 }, { "epoch": 0.46903740036787245, "grad_norm": 0.5408875942230225, "learning_rate": 5.627425886511012e-05, "loss": 1.5646, "step": 8415 }, { "epoch": 0.46909313862103563, "grad_norm": 0.5847890377044678, "learning_rate": 5.626542491294359e-05, "loss": 1.7076, "step": 8416 }, { "epoch": 0.46914887687419876, "grad_norm": 0.5354915261268616, "learning_rate": 5.6256590762074315e-05, "loss": 1.5801, "step": 8417 }, { "epoch": 0.4692046151273619, "grad_norm": 0.5805383324623108, "learning_rate": 5.624775641278247e-05, "loss": 1.8075, "step": 8418 }, { "epoch": 0.4692603533805251, "grad_norm": 0.5791111588478088, "learning_rate": 5.6238921865348204e-05, "loss": 1.8437, "step": 8419 }, { "epoch": 0.4693160916336882, "grad_norm": 0.5863295793533325, "learning_rate": 5.623008712005172e-05, "loss": 1.7371, "step": 8420 }, { "epoch": 0.46937182988685133, "grad_norm": 0.5539514422416687, "learning_rate": 5.62212521771732e-05, "loss": 1.646, "step": 8421 }, { "epoch": 0.4694275681400145, "grad_norm": 0.5049216151237488, "learning_rate": 5.6212417036992826e-05, "loss": 1.447, "step": 8422 }, { "epoch": 0.46948330639317765, "grad_norm": 0.5240146517753601, "learning_rate": 5.620358169979082e-05, "loss": 1.729, "step": 8423 }, { "epoch": 0.4695390446463408, "grad_norm": 0.5284691452980042, "learning_rate": 5.619474616584734e-05, "loss": 1.5096, "step": 8424 }, { "epoch": 0.4695947828995039, "grad_norm": 0.5499683618545532, "learning_rate": 5.618591043544266e-05, "loss": 1.5803, "step": 8425 }, { "epoch": 0.4696505211526671, "grad_norm": 0.588737964630127, "learning_rate": 5.617707450885695e-05, "loss": 1.6776, "step": 8426 }, { "epoch": 0.4697062594058302, "grad_norm": 0.5827232599258423, "learning_rate": 5.6168238386370466e-05, "loss": 1.6402, "step": 8427 }, { "epoch": 0.46976199765899335, "grad_norm": 0.5729832649230957, "learning_rate": 5.615940206826341e-05, "loss": 1.7642, "step": 8428 }, { "epoch": 0.46981773591215653, "grad_norm": 0.5644805431365967, "learning_rate": 5.6150565554816035e-05, "loss": 1.7081, "step": 8429 }, { "epoch": 0.46987347416531966, "grad_norm": 0.5413994193077087, "learning_rate": 5.6141728846308586e-05, "loss": 1.7756, "step": 8430 }, { "epoch": 0.4699292124184828, "grad_norm": 0.5305155515670776, "learning_rate": 5.6132891943021304e-05, "loss": 1.5193, "step": 8431 }, { "epoch": 0.469984950671646, "grad_norm": 0.5325213074684143, "learning_rate": 5.612405484523444e-05, "loss": 1.5169, "step": 8432 }, { "epoch": 0.4700406889248091, "grad_norm": 0.5783179998397827, "learning_rate": 5.6115217553228274e-05, "loss": 1.6159, "step": 8433 }, { "epoch": 0.47009642717797223, "grad_norm": 0.5537718534469604, "learning_rate": 5.610638006728306e-05, "loss": 1.6027, "step": 8434 }, { "epoch": 0.4701521654311354, "grad_norm": 0.6395325660705566, "learning_rate": 5.609754238767907e-05, "loss": 1.3854, "step": 8435 }, { "epoch": 0.47020790368429854, "grad_norm": 0.5301234126091003, "learning_rate": 5.608870451469659e-05, "loss": 1.6888, "step": 8436 }, { "epoch": 0.4702636419374617, "grad_norm": 0.5246771574020386, "learning_rate": 5.607986644861588e-05, "loss": 1.5963, "step": 8437 }, { "epoch": 0.4703193801906248, "grad_norm": 0.5331987738609314, "learning_rate": 5.607102818971729e-05, "loss": 1.7791, "step": 8438 }, { "epoch": 0.470375118443788, "grad_norm": 0.5587426424026489, "learning_rate": 5.6062189738281056e-05, "loss": 1.744, "step": 8439 }, { "epoch": 0.4704308566969511, "grad_norm": 0.5236651301383972, "learning_rate": 5.6053351094587526e-05, "loss": 1.4963, "step": 8440 }, { "epoch": 0.47048659495011425, "grad_norm": 0.5496351718902588, "learning_rate": 5.604451225891698e-05, "loss": 1.491, "step": 8441 }, { "epoch": 0.47054233320327743, "grad_norm": 0.5666020512580872, "learning_rate": 5.603567323154975e-05, "loss": 1.6241, "step": 8442 }, { "epoch": 0.47059807145644056, "grad_norm": 0.5503633618354797, "learning_rate": 5.602683401276615e-05, "loss": 1.6522, "step": 8443 }, { "epoch": 0.4706538097096037, "grad_norm": 0.5833953022956848, "learning_rate": 5.601799460284654e-05, "loss": 1.7361, "step": 8444 }, { "epoch": 0.47070954796276687, "grad_norm": 0.5664584636688232, "learning_rate": 5.60091550020712e-05, "loss": 1.6558, "step": 8445 }, { "epoch": 0.47076528621593, "grad_norm": 0.5645166635513306, "learning_rate": 5.60003152107205e-05, "loss": 1.7492, "step": 8446 }, { "epoch": 0.47082102446909313, "grad_norm": 0.5689491629600525, "learning_rate": 5.599147522907481e-05, "loss": 1.6956, "step": 8447 }, { "epoch": 0.47087676272225626, "grad_norm": 0.6192054152488708, "learning_rate": 5.598263505741443e-05, "loss": 1.5153, "step": 8448 }, { "epoch": 0.47093250097541944, "grad_norm": 0.5669271945953369, "learning_rate": 5.597379469601978e-05, "loss": 1.5719, "step": 8449 }, { "epoch": 0.4709882392285826, "grad_norm": 0.5729002952575684, "learning_rate": 5.5964954145171145e-05, "loss": 1.7169, "step": 8450 }, { "epoch": 0.4710439774817457, "grad_norm": 0.532015323638916, "learning_rate": 5.595611340514898e-05, "loss": 1.6197, "step": 8451 }, { "epoch": 0.4710997157349089, "grad_norm": 0.5148784518241882, "learning_rate": 5.594727247623361e-05, "loss": 1.611, "step": 8452 }, { "epoch": 0.471155453988072, "grad_norm": 0.5674019455909729, "learning_rate": 5.593843135870545e-05, "loss": 1.6694, "step": 8453 }, { "epoch": 0.47121119224123514, "grad_norm": 0.5392388701438904, "learning_rate": 5.592959005284485e-05, "loss": 1.5342, "step": 8454 }, { "epoch": 0.47126693049439833, "grad_norm": 0.5939937829971313, "learning_rate": 5.592074855893223e-05, "loss": 1.7698, "step": 8455 }, { "epoch": 0.47132266874756146, "grad_norm": 0.603952169418335, "learning_rate": 5.591190687724799e-05, "loss": 1.885, "step": 8456 }, { "epoch": 0.4713784070007246, "grad_norm": 0.5169516801834106, "learning_rate": 5.590306500807253e-05, "loss": 1.4436, "step": 8457 }, { "epoch": 0.47143414525388777, "grad_norm": 0.5573791265487671, "learning_rate": 5.589422295168626e-05, "loss": 1.6708, "step": 8458 }, { "epoch": 0.4714898835070509, "grad_norm": 0.5594834685325623, "learning_rate": 5.5885380708369606e-05, "loss": 1.6496, "step": 8459 }, { "epoch": 0.47154562176021403, "grad_norm": 0.5771753787994385, "learning_rate": 5.5876538278403e-05, "loss": 1.7612, "step": 8460 }, { "epoch": 0.47160136001337716, "grad_norm": 0.5862414240837097, "learning_rate": 5.586769566206686e-05, "loss": 1.9365, "step": 8461 }, { "epoch": 0.47165709826654034, "grad_norm": 0.5807836055755615, "learning_rate": 5.585885285964163e-05, "loss": 1.623, "step": 8462 }, { "epoch": 0.47171283651970347, "grad_norm": 0.5933867692947388, "learning_rate": 5.5850009871407716e-05, "loss": 1.8284, "step": 8463 }, { "epoch": 0.4717685747728666, "grad_norm": 0.5377753973007202, "learning_rate": 5.584116669764563e-05, "loss": 1.462, "step": 8464 }, { "epoch": 0.4718243130260298, "grad_norm": 0.5384745597839355, "learning_rate": 5.583232333863577e-05, "loss": 1.5878, "step": 8465 }, { "epoch": 0.4718800512791929, "grad_norm": 0.5296236872673035, "learning_rate": 5.582347979465864e-05, "loss": 1.6045, "step": 8466 }, { "epoch": 0.47193578953235604, "grad_norm": 0.6247029304504395, "learning_rate": 5.581463606599467e-05, "loss": 1.6802, "step": 8467 }, { "epoch": 0.4719915277855192, "grad_norm": 0.5652837157249451, "learning_rate": 5.580579215292435e-05, "loss": 1.6555, "step": 8468 }, { "epoch": 0.47204726603868236, "grad_norm": 0.5700575709342957, "learning_rate": 5.5796948055728147e-05, "loss": 1.8245, "step": 8469 }, { "epoch": 0.4721030042918455, "grad_norm": 0.5366250276565552, "learning_rate": 5.578810377468656e-05, "loss": 1.8156, "step": 8470 }, { "epoch": 0.4721587425450086, "grad_norm": 0.5650043487548828, "learning_rate": 5.577925931008007e-05, "loss": 1.6757, "step": 8471 }, { "epoch": 0.4722144807981718, "grad_norm": 0.5967742204666138, "learning_rate": 5.577041466218915e-05, "loss": 1.939, "step": 8472 }, { "epoch": 0.4722702190513349, "grad_norm": 0.5320480465888977, "learning_rate": 5.576156983129435e-05, "loss": 1.5016, "step": 8473 }, { "epoch": 0.47232595730449806, "grad_norm": 0.5365233421325684, "learning_rate": 5.5752724817676125e-05, "loss": 1.5794, "step": 8474 }, { "epoch": 0.47238169555766124, "grad_norm": 0.5704277753829956, "learning_rate": 5.5743879621615026e-05, "loss": 1.5467, "step": 8475 }, { "epoch": 0.47243743381082437, "grad_norm": 0.5679128170013428, "learning_rate": 5.5735034243391537e-05, "loss": 1.6893, "step": 8476 }, { "epoch": 0.4724931720639875, "grad_norm": 0.5593464970588684, "learning_rate": 5.572618868328621e-05, "loss": 1.6293, "step": 8477 }, { "epoch": 0.4725489103171507, "grad_norm": 0.527761697769165, "learning_rate": 5.5717342941579555e-05, "loss": 1.6616, "step": 8478 }, { "epoch": 0.4726046485703138, "grad_norm": 0.5714175701141357, "learning_rate": 5.570849701855213e-05, "loss": 1.7797, "step": 8479 }, { "epoch": 0.47266038682347694, "grad_norm": 0.5801485180854797, "learning_rate": 5.569965091448446e-05, "loss": 1.6934, "step": 8480 }, { "epoch": 0.4727161250766401, "grad_norm": 0.6128066778182983, "learning_rate": 5.5690804629657076e-05, "loss": 1.8593, "step": 8481 }, { "epoch": 0.47277186332980325, "grad_norm": 0.6358544230461121, "learning_rate": 5.568195816435057e-05, "loss": 1.8292, "step": 8482 }, { "epoch": 0.4728276015829664, "grad_norm": 0.5209305882453918, "learning_rate": 5.567311151884547e-05, "loss": 1.6183, "step": 8483 }, { "epoch": 0.4728833398361295, "grad_norm": 0.5640316605567932, "learning_rate": 5.566426469342235e-05, "loss": 1.7618, "step": 8484 }, { "epoch": 0.4729390780892927, "grad_norm": 0.5284755825996399, "learning_rate": 5.565541768836178e-05, "loss": 1.6473, "step": 8485 }, { "epoch": 0.4729948163424558, "grad_norm": 0.5737931728363037, "learning_rate": 5.564657050394434e-05, "loss": 1.9419, "step": 8486 }, { "epoch": 0.47305055459561896, "grad_norm": 0.5647780299186707, "learning_rate": 5.563772314045059e-05, "loss": 1.6413, "step": 8487 }, { "epoch": 0.47310629284878214, "grad_norm": 0.5379336476325989, "learning_rate": 5.562887559816116e-05, "loss": 1.5344, "step": 8488 }, { "epoch": 0.47316203110194527, "grad_norm": 0.5728521943092346, "learning_rate": 5.562002787735657e-05, "loss": 1.6937, "step": 8489 }, { "epoch": 0.4732177693551084, "grad_norm": 0.5722839832305908, "learning_rate": 5.561117997831751e-05, "loss": 1.6869, "step": 8490 }, { "epoch": 0.4732735076082716, "grad_norm": 0.5436987280845642, "learning_rate": 5.56023319013245e-05, "loss": 1.3939, "step": 8491 }, { "epoch": 0.4733292458614347, "grad_norm": 0.5408251285552979, "learning_rate": 5.559348364665822e-05, "loss": 1.5309, "step": 8492 }, { "epoch": 0.47338498411459784, "grad_norm": 0.5417353510856628, "learning_rate": 5.5584635214599225e-05, "loss": 1.5592, "step": 8493 }, { "epoch": 0.47344072236776097, "grad_norm": 0.5821628570556641, "learning_rate": 5.557578660542816e-05, "loss": 1.5603, "step": 8494 }, { "epoch": 0.47349646062092415, "grad_norm": 0.5318421721458435, "learning_rate": 5.5566937819425656e-05, "loss": 1.5251, "step": 8495 }, { "epoch": 0.4735521988740873, "grad_norm": 0.5154527425765991, "learning_rate": 5.5558088856872346e-05, "loss": 1.572, "step": 8496 }, { "epoch": 0.4736079371272504, "grad_norm": 0.5686662197113037, "learning_rate": 5.554923971804887e-05, "loss": 1.5153, "step": 8497 }, { "epoch": 0.4736636753804136, "grad_norm": 0.5712747573852539, "learning_rate": 5.554039040323586e-05, "loss": 1.7534, "step": 8498 }, { "epoch": 0.4737194136335767, "grad_norm": 0.5434257388114929, "learning_rate": 5.5531540912713974e-05, "loss": 1.6791, "step": 8499 }, { "epoch": 0.47377515188673985, "grad_norm": 0.5522347092628479, "learning_rate": 5.552269124676386e-05, "loss": 1.7779, "step": 8500 }, { "epoch": 0.47383089013990304, "grad_norm": 0.5155788064002991, "learning_rate": 5.551384140566618e-05, "loss": 1.4377, "step": 8501 }, { "epoch": 0.47388662839306617, "grad_norm": 0.5739377737045288, "learning_rate": 5.550499138970158e-05, "loss": 1.8262, "step": 8502 }, { "epoch": 0.4739423666462293, "grad_norm": 0.5527716875076294, "learning_rate": 5.5496141199150766e-05, "loss": 1.3705, "step": 8503 }, { "epoch": 0.4739981048993925, "grad_norm": 0.5810341238975525, "learning_rate": 5.548729083429439e-05, "loss": 1.7927, "step": 8504 }, { "epoch": 0.4740538431525556, "grad_norm": 0.5541203618049622, "learning_rate": 5.547844029541316e-05, "loss": 1.7237, "step": 8505 }, { "epoch": 0.47410958140571874, "grad_norm": 0.5816789865493774, "learning_rate": 5.546958958278773e-05, "loss": 1.6761, "step": 8506 }, { "epoch": 0.47416531965888187, "grad_norm": 0.5344805121421814, "learning_rate": 5.546073869669881e-05, "loss": 1.7347, "step": 8507 }, { "epoch": 0.47422105791204505, "grad_norm": 0.5249469876289368, "learning_rate": 5.5451887637427104e-05, "loss": 1.5048, "step": 8508 }, { "epoch": 0.4742767961652082, "grad_norm": 0.5707089900970459, "learning_rate": 5.544303640525328e-05, "loss": 1.811, "step": 8509 }, { "epoch": 0.4743325344183713, "grad_norm": 0.5320430397987366, "learning_rate": 5.5434185000458114e-05, "loss": 1.7104, "step": 8510 }, { "epoch": 0.4743882726715345, "grad_norm": 0.5608380436897278, "learning_rate": 5.5425333423322255e-05, "loss": 1.7893, "step": 8511 }, { "epoch": 0.4744440109246976, "grad_norm": 0.5271068811416626, "learning_rate": 5.5416481674126474e-05, "loss": 1.7735, "step": 8512 }, { "epoch": 0.47449974917786075, "grad_norm": 0.5395051836967468, "learning_rate": 5.540762975315147e-05, "loss": 1.7249, "step": 8513 }, { "epoch": 0.47455548743102394, "grad_norm": 0.5892390012741089, "learning_rate": 5.539877766067798e-05, "loss": 1.7148, "step": 8514 }, { "epoch": 0.47461122568418707, "grad_norm": 0.5333415269851685, "learning_rate": 5.538992539698672e-05, "loss": 1.6184, "step": 8515 }, { "epoch": 0.4746669639373502, "grad_norm": 0.6480614542961121, "learning_rate": 5.538107296235847e-05, "loss": 1.6898, "step": 8516 }, { "epoch": 0.4747227021905133, "grad_norm": 0.5696564316749573, "learning_rate": 5.5372220357073955e-05, "loss": 1.7039, "step": 8517 }, { "epoch": 0.4747784404436765, "grad_norm": 0.5047008991241455, "learning_rate": 5.536336758141394e-05, "loss": 1.5221, "step": 8518 }, { "epoch": 0.47483417869683964, "grad_norm": 0.6112247705459595, "learning_rate": 5.535451463565916e-05, "loss": 1.7282, "step": 8519 }, { "epoch": 0.47488991695000277, "grad_norm": 0.5554122924804688, "learning_rate": 5.5345661520090394e-05, "loss": 1.6662, "step": 8520 }, { "epoch": 0.47494565520316595, "grad_norm": 0.5461030602455139, "learning_rate": 5.533680823498844e-05, "loss": 1.6679, "step": 8521 }, { "epoch": 0.4750013934563291, "grad_norm": 0.5860038995742798, "learning_rate": 5.5327954780634004e-05, "loss": 1.769, "step": 8522 }, { "epoch": 0.4750571317094922, "grad_norm": 0.6236945390701294, "learning_rate": 5.531910115730794e-05, "loss": 1.9089, "step": 8523 }, { "epoch": 0.4751128699626554, "grad_norm": 0.545220673084259, "learning_rate": 5.531024736529099e-05, "loss": 1.7743, "step": 8524 }, { "epoch": 0.4751686082158185, "grad_norm": 0.6534609198570251, "learning_rate": 5.5301393404863954e-05, "loss": 1.9673, "step": 8525 }, { "epoch": 0.47522434646898165, "grad_norm": 0.5649281740188599, "learning_rate": 5.529253927630762e-05, "loss": 1.6666, "step": 8526 }, { "epoch": 0.47528008472214484, "grad_norm": 0.5315033197402954, "learning_rate": 5.5283684979902815e-05, "loss": 1.678, "step": 8527 }, { "epoch": 0.47533582297530796, "grad_norm": 0.5951296091079712, "learning_rate": 5.5274830515930306e-05, "loss": 1.6429, "step": 8528 }, { "epoch": 0.4753915612284711, "grad_norm": 0.5288706421852112, "learning_rate": 5.526597588467095e-05, "loss": 1.65, "step": 8529 }, { "epoch": 0.4754472994816342, "grad_norm": 0.5894261002540588, "learning_rate": 5.525712108640553e-05, "loss": 1.6486, "step": 8530 }, { "epoch": 0.4755030377347974, "grad_norm": 0.5475479960441589, "learning_rate": 5.524826612141488e-05, "loss": 1.5981, "step": 8531 }, { "epoch": 0.47555877598796054, "grad_norm": 0.5496692657470703, "learning_rate": 5.523941098997983e-05, "loss": 1.6958, "step": 8532 }, { "epoch": 0.47561451424112366, "grad_norm": 0.6038063168525696, "learning_rate": 5.5230555692381214e-05, "loss": 1.7152, "step": 8533 }, { "epoch": 0.47567025249428685, "grad_norm": 0.5410369038581848, "learning_rate": 5.5221700228899866e-05, "loss": 1.5163, "step": 8534 }, { "epoch": 0.47572599074745, "grad_norm": 0.5673332214355469, "learning_rate": 5.521284459981662e-05, "loss": 1.6854, "step": 8535 }, { "epoch": 0.4757817290006131, "grad_norm": 0.5714686512947083, "learning_rate": 5.520398880541235e-05, "loss": 1.6205, "step": 8536 }, { "epoch": 0.4758374672537763, "grad_norm": 0.6370970606803894, "learning_rate": 5.519513284596789e-05, "loss": 1.8303, "step": 8537 }, { "epoch": 0.4758932055069394, "grad_norm": 0.5482840538024902, "learning_rate": 5.518627672176412e-05, "loss": 1.5506, "step": 8538 }, { "epoch": 0.47594894376010255, "grad_norm": 0.5282999277114868, "learning_rate": 5.5177420433081874e-05, "loss": 1.2786, "step": 8539 }, { "epoch": 0.4760046820132657, "grad_norm": 0.5575840473175049, "learning_rate": 5.516856398020205e-05, "loss": 1.5573, "step": 8540 }, { "epoch": 0.47606042026642886, "grad_norm": 0.5926665663719177, "learning_rate": 5.5159707363405485e-05, "loss": 1.7721, "step": 8541 }, { "epoch": 0.476116158519592, "grad_norm": 0.5172202587127686, "learning_rate": 5.515085058297313e-05, "loss": 1.4076, "step": 8542 }, { "epoch": 0.4761718967727551, "grad_norm": 0.581986665725708, "learning_rate": 5.514199363918578e-05, "loss": 1.7104, "step": 8543 }, { "epoch": 0.4762276350259183, "grad_norm": 0.5978564023971558, "learning_rate": 5.51331365323244e-05, "loss": 1.8326, "step": 8544 }, { "epoch": 0.47628337327908143, "grad_norm": 0.5649850368499756, "learning_rate": 5.5124279262669856e-05, "loss": 1.6206, "step": 8545 }, { "epoch": 0.47633911153224456, "grad_norm": 0.6205348372459412, "learning_rate": 5.511542183050305e-05, "loss": 1.7466, "step": 8546 }, { "epoch": 0.47639484978540775, "grad_norm": 0.5095716714859009, "learning_rate": 5.5106564236104884e-05, "loss": 1.5614, "step": 8547 }, { "epoch": 0.4764505880385709, "grad_norm": 0.5600999593734741, "learning_rate": 5.509770647975626e-05, "loss": 1.825, "step": 8548 }, { "epoch": 0.476506326291734, "grad_norm": 0.5659551620483398, "learning_rate": 5.508884856173813e-05, "loss": 1.8289, "step": 8549 }, { "epoch": 0.4765620645448972, "grad_norm": 0.524356484413147, "learning_rate": 5.507999048233138e-05, "loss": 1.591, "step": 8550 }, { "epoch": 0.4766178027980603, "grad_norm": 0.5709447860717773, "learning_rate": 5.507113224181696e-05, "loss": 1.6152, "step": 8551 }, { "epoch": 0.47667354105122345, "grad_norm": 0.5852453112602234, "learning_rate": 5.506227384047579e-05, "loss": 1.7522, "step": 8552 }, { "epoch": 0.4767292793043866, "grad_norm": 0.6322617530822754, "learning_rate": 5.50534152785888e-05, "loss": 1.8002, "step": 8553 }, { "epoch": 0.47678501755754976, "grad_norm": 0.6037564277648926, "learning_rate": 5.504455655643694e-05, "loss": 1.7472, "step": 8554 }, { "epoch": 0.4768407558107129, "grad_norm": 0.6172270774841309, "learning_rate": 5.503569767430118e-05, "loss": 1.7638, "step": 8555 }, { "epoch": 0.476896494063876, "grad_norm": 0.5917114615440369, "learning_rate": 5.502683863246243e-05, "loss": 1.7726, "step": 8556 }, { "epoch": 0.4769522323170392, "grad_norm": 0.5618294477462769, "learning_rate": 5.5017979431201675e-05, "loss": 1.5519, "step": 8557 }, { "epoch": 0.47700797057020233, "grad_norm": 0.5710815191268921, "learning_rate": 5.500912007079987e-05, "loss": 1.6896, "step": 8558 }, { "epoch": 0.47706370882336546, "grad_norm": 0.5609897971153259, "learning_rate": 5.5000260551537975e-05, "loss": 1.7455, "step": 8559 }, { "epoch": 0.47711944707652865, "grad_norm": 0.5565608739852905, "learning_rate": 5.499140087369697e-05, "loss": 1.5399, "step": 8560 }, { "epoch": 0.4771751853296918, "grad_norm": 0.5751162767410278, "learning_rate": 5.4982541037557823e-05, "loss": 1.5373, "step": 8561 }, { "epoch": 0.4772309235828549, "grad_norm": 0.5089201927185059, "learning_rate": 5.4973681043401534e-05, "loss": 1.2027, "step": 8562 }, { "epoch": 0.47728666183601803, "grad_norm": 0.5925856232643127, "learning_rate": 5.496482089150908e-05, "loss": 1.9377, "step": 8563 }, { "epoch": 0.4773424000891812, "grad_norm": 0.5660269260406494, "learning_rate": 5.495596058216147e-05, "loss": 1.4814, "step": 8564 }, { "epoch": 0.47739813834234435, "grad_norm": 0.5554754734039307, "learning_rate": 5.494710011563966e-05, "loss": 1.6303, "step": 8565 }, { "epoch": 0.4774538765955075, "grad_norm": 0.6004930138587952, "learning_rate": 5.49382394922247e-05, "loss": 1.6204, "step": 8566 }, { "epoch": 0.47750961484867066, "grad_norm": 0.5308135747909546, "learning_rate": 5.4929378712197556e-05, "loss": 1.5949, "step": 8567 }, { "epoch": 0.4775653531018338, "grad_norm": 0.5763102769851685, "learning_rate": 5.4920517775839276e-05, "loss": 1.7625, "step": 8568 }, { "epoch": 0.4776210913549969, "grad_norm": 0.572308361530304, "learning_rate": 5.491165668343085e-05, "loss": 1.7809, "step": 8569 }, { "epoch": 0.4776768296081601, "grad_norm": 0.6404359340667725, "learning_rate": 5.4902795435253306e-05, "loss": 2.0053, "step": 8570 }, { "epoch": 0.47773256786132323, "grad_norm": 0.5613745450973511, "learning_rate": 5.489393403158769e-05, "loss": 1.8136, "step": 8571 }, { "epoch": 0.47778830611448636, "grad_norm": 0.5631322860717773, "learning_rate": 5.488507247271502e-05, "loss": 1.9469, "step": 8572 }, { "epoch": 0.47784404436764955, "grad_norm": 0.5425231456756592, "learning_rate": 5.487621075891632e-05, "loss": 1.7089, "step": 8573 }, { "epoch": 0.4778997826208127, "grad_norm": 0.6085340976715088, "learning_rate": 5.4867348890472646e-05, "loss": 1.8108, "step": 8574 }, { "epoch": 0.4779555208739758, "grad_norm": 0.5472151637077332, "learning_rate": 5.485848686766506e-05, "loss": 1.5179, "step": 8575 }, { "epoch": 0.47801125912713893, "grad_norm": 0.5451512336730957, "learning_rate": 5.484962469077458e-05, "loss": 1.6112, "step": 8576 }, { "epoch": 0.4780669973803021, "grad_norm": 0.5663710236549377, "learning_rate": 5.4840762360082286e-05, "loss": 1.6932, "step": 8577 }, { "epoch": 0.47812273563346525, "grad_norm": 0.5614507794380188, "learning_rate": 5.483189987586924e-05, "loss": 1.7001, "step": 8578 }, { "epoch": 0.4781784738866284, "grad_norm": 0.5428431034088135, "learning_rate": 5.4823037238416506e-05, "loss": 1.7767, "step": 8579 }, { "epoch": 0.47823421213979156, "grad_norm": 0.5602681636810303, "learning_rate": 5.481417444800512e-05, "loss": 1.6749, "step": 8580 }, { "epoch": 0.4782899503929547, "grad_norm": 0.5648148655891418, "learning_rate": 5.480531150491622e-05, "loss": 1.723, "step": 8581 }, { "epoch": 0.4783456886461178, "grad_norm": 0.5764549970626831, "learning_rate": 5.4796448409430845e-05, "loss": 1.8049, "step": 8582 }, { "epoch": 0.478401426899281, "grad_norm": 0.5871893167495728, "learning_rate": 5.478758516183009e-05, "loss": 1.979, "step": 8583 }, { "epoch": 0.47845716515244413, "grad_norm": 0.5481773018836975, "learning_rate": 5.477872176239506e-05, "loss": 1.738, "step": 8584 }, { "epoch": 0.47851290340560726, "grad_norm": 0.5214368104934692, "learning_rate": 5.4769858211406824e-05, "loss": 1.5133, "step": 8585 }, { "epoch": 0.4785686416587704, "grad_norm": 0.5468040704727173, "learning_rate": 5.4760994509146514e-05, "loss": 1.6054, "step": 8586 }, { "epoch": 0.4786243799119336, "grad_norm": 0.5729833841323853, "learning_rate": 5.475213065589518e-05, "loss": 1.4712, "step": 8587 }, { "epoch": 0.4786801181650967, "grad_norm": 0.558814525604248, "learning_rate": 5.4743266651934e-05, "loss": 1.4907, "step": 8588 }, { "epoch": 0.47873585641825983, "grad_norm": 0.5633212924003601, "learning_rate": 5.4734402497544044e-05, "loss": 1.4832, "step": 8589 }, { "epoch": 0.478791594671423, "grad_norm": 0.6136720180511475, "learning_rate": 5.472553819300645e-05, "loss": 1.6588, "step": 8590 }, { "epoch": 0.47884733292458614, "grad_norm": 0.537601113319397, "learning_rate": 5.471667373860234e-05, "loss": 1.6905, "step": 8591 }, { "epoch": 0.4789030711777493, "grad_norm": 0.5937305688858032, "learning_rate": 5.4707809134612844e-05, "loss": 1.7177, "step": 8592 }, { "epoch": 0.47895880943091246, "grad_norm": 0.6321950554847717, "learning_rate": 5.469894438131906e-05, "loss": 1.8388, "step": 8593 }, { "epoch": 0.4790145476840756, "grad_norm": 0.5728781223297119, "learning_rate": 5.469007947900219e-05, "loss": 1.9354, "step": 8594 }, { "epoch": 0.4790702859372387, "grad_norm": 0.5851932764053345, "learning_rate": 5.468121442794333e-05, "loss": 1.6465, "step": 8595 }, { "epoch": 0.4791260241904019, "grad_norm": 0.5869148969650269, "learning_rate": 5.467234922842363e-05, "loss": 1.8636, "step": 8596 }, { "epoch": 0.47918176244356503, "grad_norm": 0.5678532719612122, "learning_rate": 5.4663483880724275e-05, "loss": 1.7346, "step": 8597 }, { "epoch": 0.47923750069672816, "grad_norm": 0.5783692598342896, "learning_rate": 5.46546183851264e-05, "loss": 1.8068, "step": 8598 }, { "epoch": 0.4792932389498913, "grad_norm": 0.5361393690109253, "learning_rate": 5.464575274191116e-05, "loss": 1.4534, "step": 8599 }, { "epoch": 0.47934897720305447, "grad_norm": 0.5204313397407532, "learning_rate": 5.4636886951359726e-05, "loss": 1.5212, "step": 8600 }, { "epoch": 0.4794047154562176, "grad_norm": 0.5215826630592346, "learning_rate": 5.4628021013753284e-05, "loss": 1.6756, "step": 8601 }, { "epoch": 0.47946045370938073, "grad_norm": 0.5335747599601746, "learning_rate": 5.461915492937299e-05, "loss": 1.7895, "step": 8602 }, { "epoch": 0.4795161919625439, "grad_norm": 0.5702705979347229, "learning_rate": 5.461028869850004e-05, "loss": 1.7024, "step": 8603 }, { "epoch": 0.47957193021570704, "grad_norm": 0.5771311521530151, "learning_rate": 5.4601422321415606e-05, "loss": 1.7879, "step": 8604 }, { "epoch": 0.47962766846887017, "grad_norm": 0.5826980471611023, "learning_rate": 5.459255579840089e-05, "loss": 1.6198, "step": 8605 }, { "epoch": 0.47968340672203336, "grad_norm": 0.5219647288322449, "learning_rate": 5.458368912973707e-05, "loss": 1.6159, "step": 8606 }, { "epoch": 0.4797391449751965, "grad_norm": 0.5676286220550537, "learning_rate": 5.4574822315705366e-05, "loss": 1.6843, "step": 8607 }, { "epoch": 0.4797948832283596, "grad_norm": 0.5792801380157471, "learning_rate": 5.456595535658696e-05, "loss": 1.8092, "step": 8608 }, { "epoch": 0.47985062148152274, "grad_norm": 0.5464149713516235, "learning_rate": 5.455708825266308e-05, "loss": 1.7726, "step": 8609 }, { "epoch": 0.47990635973468593, "grad_norm": 0.597957968711853, "learning_rate": 5.4548221004214936e-05, "loss": 1.7107, "step": 8610 }, { "epoch": 0.47996209798784906, "grad_norm": 0.5609841346740723, "learning_rate": 5.453935361152374e-05, "loss": 1.5578, "step": 8611 }, { "epoch": 0.4800178362410122, "grad_norm": 0.5753505229949951, "learning_rate": 5.45304860748707e-05, "loss": 1.8959, "step": 8612 }, { "epoch": 0.48007357449417537, "grad_norm": 0.5798444747924805, "learning_rate": 5.4521618394537056e-05, "loss": 1.9346, "step": 8613 }, { "epoch": 0.4801293127473385, "grad_norm": 0.536660373210907, "learning_rate": 5.451275057080405e-05, "loss": 1.6191, "step": 8614 }, { "epoch": 0.48018505100050163, "grad_norm": 0.5759127736091614, "learning_rate": 5.4503882603952905e-05, "loss": 1.6555, "step": 8615 }, { "epoch": 0.4802407892536648, "grad_norm": 0.5895690321922302, "learning_rate": 5.449501449426487e-05, "loss": 1.7481, "step": 8616 }, { "epoch": 0.48029652750682794, "grad_norm": 0.5727548003196716, "learning_rate": 5.448614624202117e-05, "loss": 1.7338, "step": 8617 }, { "epoch": 0.48035226575999107, "grad_norm": 0.5720645189285278, "learning_rate": 5.447727784750308e-05, "loss": 1.7127, "step": 8618 }, { "epoch": 0.48040800401315426, "grad_norm": 0.5797655582427979, "learning_rate": 5.446840931099182e-05, "loss": 1.733, "step": 8619 }, { "epoch": 0.4804637422663174, "grad_norm": 0.5146819949150085, "learning_rate": 5.445954063276869e-05, "loss": 1.5931, "step": 8620 }, { "epoch": 0.4805194805194805, "grad_norm": 0.5465497970581055, "learning_rate": 5.445067181311492e-05, "loss": 1.6994, "step": 8621 }, { "epoch": 0.48057521877264364, "grad_norm": 0.5129651427268982, "learning_rate": 5.4441802852311795e-05, "loss": 1.5357, "step": 8622 }, { "epoch": 0.4806309570258068, "grad_norm": 0.5457690954208374, "learning_rate": 5.443293375064058e-05, "loss": 1.5543, "step": 8623 }, { "epoch": 0.48068669527896996, "grad_norm": 0.5993552207946777, "learning_rate": 5.4424064508382556e-05, "loss": 1.902, "step": 8624 }, { "epoch": 0.4807424335321331, "grad_norm": 0.5725103616714478, "learning_rate": 5.4415195125819e-05, "loss": 1.7444, "step": 8625 }, { "epoch": 0.48079817178529627, "grad_norm": 0.5666811466217041, "learning_rate": 5.440632560323118e-05, "loss": 1.6553, "step": 8626 }, { "epoch": 0.4808539100384594, "grad_norm": 0.5566148161888123, "learning_rate": 5.439745594090042e-05, "loss": 1.3808, "step": 8627 }, { "epoch": 0.4809096482916225, "grad_norm": 0.5133042335510254, "learning_rate": 5.438858613910799e-05, "loss": 1.5705, "step": 8628 }, { "epoch": 0.4809653865447857, "grad_norm": 0.6130719780921936, "learning_rate": 5.43797161981352e-05, "loss": 1.9702, "step": 8629 }, { "epoch": 0.48102112479794884, "grad_norm": 0.5869434475898743, "learning_rate": 5.4370846118263354e-05, "loss": 1.8149, "step": 8630 }, { "epoch": 0.48107686305111197, "grad_norm": 0.5676392316818237, "learning_rate": 5.436197589977374e-05, "loss": 1.5798, "step": 8631 }, { "epoch": 0.4811326013042751, "grad_norm": 0.5470464825630188, "learning_rate": 5.435310554294769e-05, "loss": 1.6549, "step": 8632 }, { "epoch": 0.4811883395574383, "grad_norm": 0.5741833448410034, "learning_rate": 5.434423504806651e-05, "loss": 1.7124, "step": 8633 }, { "epoch": 0.4812440778106014, "grad_norm": 0.5436912178993225, "learning_rate": 5.433536441541152e-05, "loss": 1.568, "step": 8634 }, { "epoch": 0.48129981606376454, "grad_norm": 0.5380058884620667, "learning_rate": 5.432649364526403e-05, "loss": 1.4785, "step": 8635 }, { "epoch": 0.4813555543169277, "grad_norm": 0.5699672102928162, "learning_rate": 5.4317622737905413e-05, "loss": 1.4929, "step": 8636 }, { "epoch": 0.48141129257009085, "grad_norm": 0.565059244632721, "learning_rate": 5.4308751693616975e-05, "loss": 1.7861, "step": 8637 }, { "epoch": 0.481467030823254, "grad_norm": 0.5427149534225464, "learning_rate": 5.429988051268006e-05, "loss": 1.6655, "step": 8638 }, { "epoch": 0.48152276907641717, "grad_norm": 0.5943994522094727, "learning_rate": 5.429100919537597e-05, "loss": 1.8461, "step": 8639 }, { "epoch": 0.4815785073295803, "grad_norm": 0.5920754671096802, "learning_rate": 5.4282137741986125e-05, "loss": 1.9077, "step": 8640 }, { "epoch": 0.4816342455827434, "grad_norm": 0.5471158623695374, "learning_rate": 5.427326615279182e-05, "loss": 1.6468, "step": 8641 }, { "epoch": 0.4816899838359066, "grad_norm": 0.5595037341117859, "learning_rate": 5.426439442807444e-05, "loss": 1.7315, "step": 8642 }, { "epoch": 0.48174572208906974, "grad_norm": 0.5808396935462952, "learning_rate": 5.4255522568115314e-05, "loss": 1.8597, "step": 8643 }, { "epoch": 0.48180146034223287, "grad_norm": 0.5106577277183533, "learning_rate": 5.424665057319584e-05, "loss": 1.4579, "step": 8644 }, { "epoch": 0.481857198595396, "grad_norm": 0.5588060617446899, "learning_rate": 5.4237778443597366e-05, "loss": 1.7045, "step": 8645 }, { "epoch": 0.4819129368485592, "grad_norm": 0.5763769149780273, "learning_rate": 5.4228906179601256e-05, "loss": 1.7194, "step": 8646 }, { "epoch": 0.4819686751017223, "grad_norm": 0.5877617597579956, "learning_rate": 5.42200337814889e-05, "loss": 1.8115, "step": 8647 }, { "epoch": 0.48202441335488544, "grad_norm": 0.588557779788971, "learning_rate": 5.421116124954169e-05, "loss": 1.7122, "step": 8648 }, { "epoch": 0.4820801516080486, "grad_norm": 0.5687382221221924, "learning_rate": 5.4202288584040996e-05, "loss": 1.6734, "step": 8649 }, { "epoch": 0.48213588986121175, "grad_norm": 0.5797961950302124, "learning_rate": 5.4193415785268195e-05, "loss": 1.9098, "step": 8650 }, { "epoch": 0.4821916281143749, "grad_norm": 0.5459732413291931, "learning_rate": 5.418454285350472e-05, "loss": 1.5751, "step": 8651 }, { "epoch": 0.48224736636753807, "grad_norm": 0.6237668991088867, "learning_rate": 5.4175669789031904e-05, "loss": 1.9574, "step": 8652 }, { "epoch": 0.4823031046207012, "grad_norm": 0.5237795114517212, "learning_rate": 5.4166796592131216e-05, "loss": 1.6274, "step": 8653 }, { "epoch": 0.4823588428738643, "grad_norm": 0.8351784348487854, "learning_rate": 5.415792326308403e-05, "loss": 1.6101, "step": 8654 }, { "epoch": 0.48241458112702745, "grad_norm": 0.553855836391449, "learning_rate": 5.414904980217177e-05, "loss": 1.7006, "step": 8655 }, { "epoch": 0.48247031938019064, "grad_norm": 0.5128687620162964, "learning_rate": 5.414017620967582e-05, "loss": 1.5782, "step": 8656 }, { "epoch": 0.48252605763335377, "grad_norm": 0.5743347406387329, "learning_rate": 5.4131302485877635e-05, "loss": 1.8762, "step": 8657 }, { "epoch": 0.4825817958865169, "grad_norm": 0.5579991936683655, "learning_rate": 5.412242863105862e-05, "loss": 1.6882, "step": 8658 }, { "epoch": 0.4826375341396801, "grad_norm": 0.5496572256088257, "learning_rate": 5.41135546455002e-05, "loss": 1.6909, "step": 8659 }, { "epoch": 0.4826932723928432, "grad_norm": 0.5845061540603638, "learning_rate": 5.410468052948381e-05, "loss": 1.8966, "step": 8660 }, { "epoch": 0.48274901064600634, "grad_norm": 0.5628004670143127, "learning_rate": 5.409580628329088e-05, "loss": 1.6114, "step": 8661 }, { "epoch": 0.4828047488991695, "grad_norm": 0.52235347032547, "learning_rate": 5.408693190720288e-05, "loss": 1.4296, "step": 8662 }, { "epoch": 0.48286048715233265, "grad_norm": 0.5655858516693115, "learning_rate": 5.40780574015012e-05, "loss": 1.7761, "step": 8663 }, { "epoch": 0.4829162254054958, "grad_norm": 0.5697308778762817, "learning_rate": 5.406918276646733e-05, "loss": 1.7426, "step": 8664 }, { "epoch": 0.48297196365865896, "grad_norm": 0.5626512169837952, "learning_rate": 5.40603080023827e-05, "loss": 1.5949, "step": 8665 }, { "epoch": 0.4830277019118221, "grad_norm": 0.6178479194641113, "learning_rate": 5.405143310952878e-05, "loss": 1.9571, "step": 8666 }, { "epoch": 0.4830834401649852, "grad_norm": 0.6123231053352356, "learning_rate": 5.4042558088187014e-05, "loss": 1.9154, "step": 8667 }, { "epoch": 0.48313917841814835, "grad_norm": 0.5526097416877747, "learning_rate": 5.40336829386389e-05, "loss": 1.5508, "step": 8668 }, { "epoch": 0.48319491667131154, "grad_norm": 0.5456022620201111, "learning_rate": 5.4024807661165855e-05, "loss": 1.5887, "step": 8669 }, { "epoch": 0.48325065492447467, "grad_norm": 0.49078524112701416, "learning_rate": 5.4015932256049386e-05, "loss": 1.5876, "step": 8670 }, { "epoch": 0.4833063931776378, "grad_norm": 0.5714897513389587, "learning_rate": 5.4007056723570956e-05, "loss": 1.8633, "step": 8671 }, { "epoch": 0.483362131430801, "grad_norm": 0.6069988012313843, "learning_rate": 5.399818106401206e-05, "loss": 1.7922, "step": 8672 }, { "epoch": 0.4834178696839641, "grad_norm": 0.5466931462287903, "learning_rate": 5.3989305277654156e-05, "loss": 1.7496, "step": 8673 }, { "epoch": 0.48347360793712724, "grad_norm": 0.562350869178772, "learning_rate": 5.398042936477875e-05, "loss": 1.6191, "step": 8674 }, { "epoch": 0.4835293461902904, "grad_norm": 0.5562702417373657, "learning_rate": 5.397155332566736e-05, "loss": 1.8695, "step": 8675 }, { "epoch": 0.48358508444345355, "grad_norm": 0.598784863948822, "learning_rate": 5.3962677160601426e-05, "loss": 1.5275, "step": 8676 }, { "epoch": 0.4836408226966167, "grad_norm": 0.5225400924682617, "learning_rate": 5.395380086986249e-05, "loss": 1.4847, "step": 8677 }, { "epoch": 0.4836965609497798, "grad_norm": 0.58516925573349, "learning_rate": 5.3944924453732014e-05, "loss": 1.652, "step": 8678 }, { "epoch": 0.483752299202943, "grad_norm": 0.5312181115150452, "learning_rate": 5.3936047912491574e-05, "loss": 1.356, "step": 8679 }, { "epoch": 0.4838080374561061, "grad_norm": 0.5645095109939575, "learning_rate": 5.3927171246422615e-05, "loss": 1.7965, "step": 8680 }, { "epoch": 0.48386377570926925, "grad_norm": 0.5576086044311523, "learning_rate": 5.39182944558067e-05, "loss": 1.6595, "step": 8681 }, { "epoch": 0.48391951396243243, "grad_norm": 0.5667631030082703, "learning_rate": 5.390941754092532e-05, "loss": 1.6973, "step": 8682 }, { "epoch": 0.48397525221559556, "grad_norm": 0.5693982243537903, "learning_rate": 5.3900540502060015e-05, "loss": 1.6383, "step": 8683 }, { "epoch": 0.4840309904687587, "grad_norm": 0.5972820520401001, "learning_rate": 5.3891663339492306e-05, "loss": 1.73, "step": 8684 }, { "epoch": 0.4840867287219219, "grad_norm": 0.5453163385391235, "learning_rate": 5.388278605350372e-05, "loss": 1.5295, "step": 8685 }, { "epoch": 0.484142466975085, "grad_norm": 0.5659864544868469, "learning_rate": 5.38739086443758e-05, "loss": 1.6765, "step": 8686 }, { "epoch": 0.48419820522824814, "grad_norm": 0.5438006520271301, "learning_rate": 5.386503111239008e-05, "loss": 1.5357, "step": 8687 }, { "epoch": 0.4842539434814113, "grad_norm": 0.5650402903556824, "learning_rate": 5.385615345782813e-05, "loss": 1.7396, "step": 8688 }, { "epoch": 0.48430968173457445, "grad_norm": 0.5356137156486511, "learning_rate": 5.3847275680971454e-05, "loss": 1.7116, "step": 8689 }, { "epoch": 0.4843654199877376, "grad_norm": 0.5687363743782043, "learning_rate": 5.383839778210163e-05, "loss": 1.6747, "step": 8690 }, { "epoch": 0.4844211582409007, "grad_norm": 0.5704367756843567, "learning_rate": 5.38295197615002e-05, "loss": 1.5563, "step": 8691 }, { "epoch": 0.4844768964940639, "grad_norm": 0.6154001355171204, "learning_rate": 5.382064161944874e-05, "loss": 2.1129, "step": 8692 }, { "epoch": 0.484532634747227, "grad_norm": 0.5885458588600159, "learning_rate": 5.3811763356228804e-05, "loss": 1.6652, "step": 8693 }, { "epoch": 0.48458837300039015, "grad_norm": 0.5427495837211609, "learning_rate": 5.3802884972121955e-05, "loss": 1.7085, "step": 8694 }, { "epoch": 0.48464411125355333, "grad_norm": 0.5415340065956116, "learning_rate": 5.379400646740977e-05, "loss": 1.7126, "step": 8695 }, { "epoch": 0.48469984950671646, "grad_norm": 0.50815749168396, "learning_rate": 5.3785127842373814e-05, "loss": 1.7257, "step": 8696 }, { "epoch": 0.4847555877598796, "grad_norm": 0.5710844397544861, "learning_rate": 5.3776249097295696e-05, "loss": 1.6778, "step": 8697 }, { "epoch": 0.4848113260130428, "grad_norm": 0.5827280282974243, "learning_rate": 5.376737023245695e-05, "loss": 1.717, "step": 8698 }, { "epoch": 0.4848670642662059, "grad_norm": 0.6222889423370361, "learning_rate": 5.375849124813919e-05, "loss": 1.9998, "step": 8699 }, { "epoch": 0.48492280251936903, "grad_norm": 0.5893861651420593, "learning_rate": 5.3749612144623995e-05, "loss": 1.9211, "step": 8700 }, { "epoch": 0.48497854077253216, "grad_norm": 0.5538213849067688, "learning_rate": 5.374073292219297e-05, "loss": 1.7934, "step": 8701 }, { "epoch": 0.48503427902569535, "grad_norm": 0.5892875790596008, "learning_rate": 5.3731853581127714e-05, "loss": 1.8932, "step": 8702 }, { "epoch": 0.4850900172788585, "grad_norm": 0.5553523302078247, "learning_rate": 5.3722974121709815e-05, "loss": 1.7465, "step": 8703 }, { "epoch": 0.4851457555320216, "grad_norm": 0.57076096534729, "learning_rate": 5.371409454422087e-05, "loss": 1.7025, "step": 8704 }, { "epoch": 0.4852014937851848, "grad_norm": 0.5483660101890564, "learning_rate": 5.370521484894252e-05, "loss": 1.6435, "step": 8705 }, { "epoch": 0.4852572320383479, "grad_norm": 0.5742903351783752, "learning_rate": 5.3696335036156345e-05, "loss": 1.7067, "step": 8706 }, { "epoch": 0.48531297029151105, "grad_norm": 0.5819395184516907, "learning_rate": 5.368745510614399e-05, "loss": 1.6528, "step": 8707 }, { "epoch": 0.48536870854467423, "grad_norm": 0.5477610230445862, "learning_rate": 5.367857505918704e-05, "loss": 1.8253, "step": 8708 }, { "epoch": 0.48542444679783736, "grad_norm": 0.6026375889778137, "learning_rate": 5.3669694895567145e-05, "loss": 1.8483, "step": 8709 }, { "epoch": 0.4854801850510005, "grad_norm": 0.49743878841400146, "learning_rate": 5.366081461556593e-05, "loss": 1.4705, "step": 8710 }, { "epoch": 0.4855359233041637, "grad_norm": 0.5510653853416443, "learning_rate": 5.365193421946502e-05, "loss": 1.4843, "step": 8711 }, { "epoch": 0.4855916615573268, "grad_norm": 0.5583814978599548, "learning_rate": 5.3643053707546034e-05, "loss": 1.6045, "step": 8712 }, { "epoch": 0.48564739981048993, "grad_norm": 0.5511784553527832, "learning_rate": 5.363417308009062e-05, "loss": 1.7184, "step": 8713 }, { "epoch": 0.48570313806365306, "grad_norm": 0.5590716600418091, "learning_rate": 5.362529233738045e-05, "loss": 1.6326, "step": 8714 }, { "epoch": 0.48575887631681625, "grad_norm": 0.564095139503479, "learning_rate": 5.361641147969713e-05, "loss": 1.6036, "step": 8715 }, { "epoch": 0.4858146145699794, "grad_norm": 0.6147303581237793, "learning_rate": 5.3607530507322334e-05, "loss": 1.8542, "step": 8716 }, { "epoch": 0.4858703528231425, "grad_norm": 0.556438684463501, "learning_rate": 5.3598649420537675e-05, "loss": 1.6413, "step": 8717 }, { "epoch": 0.4859260910763057, "grad_norm": 0.5851439237594604, "learning_rate": 5.358976821962487e-05, "loss": 1.7414, "step": 8718 }, { "epoch": 0.4859818293294688, "grad_norm": 0.5886179804801941, "learning_rate": 5.358088690486553e-05, "loss": 1.623, "step": 8719 }, { "epoch": 0.48603756758263195, "grad_norm": 0.5328960418701172, "learning_rate": 5.357200547654134e-05, "loss": 1.4861, "step": 8720 }, { "epoch": 0.48609330583579513, "grad_norm": 0.5452643036842346, "learning_rate": 5.356312393493396e-05, "loss": 1.763, "step": 8721 }, { "epoch": 0.48614904408895826, "grad_norm": 0.5395748019218445, "learning_rate": 5.3554242280325064e-05, "loss": 1.4284, "step": 8722 }, { "epoch": 0.4862047823421214, "grad_norm": 0.6557826399803162, "learning_rate": 5.354536051299634e-05, "loss": 1.8725, "step": 8723 }, { "epoch": 0.4862605205952845, "grad_norm": 0.5590106248855591, "learning_rate": 5.353647863322943e-05, "loss": 1.6673, "step": 8724 }, { "epoch": 0.4863162588484477, "grad_norm": 0.560207188129425, "learning_rate": 5.3527596641306034e-05, "loss": 1.7026, "step": 8725 }, { "epoch": 0.48637199710161083, "grad_norm": 0.54021817445755, "learning_rate": 5.3518714537507855e-05, "loss": 1.3786, "step": 8726 }, { "epoch": 0.48642773535477396, "grad_norm": 0.5303489565849304, "learning_rate": 5.350983232211657e-05, "loss": 1.5461, "step": 8727 }, { "epoch": 0.48648347360793714, "grad_norm": 0.5234289169311523, "learning_rate": 5.350094999541385e-05, "loss": 1.8215, "step": 8728 }, { "epoch": 0.4865392118611003, "grad_norm": 0.6171209216117859, "learning_rate": 5.349206755768142e-05, "loss": 1.6419, "step": 8729 }, { "epoch": 0.4865949501142634, "grad_norm": 0.5630922317504883, "learning_rate": 5.3483185009200955e-05, "loss": 1.7303, "step": 8730 }, { "epoch": 0.4866506883674266, "grad_norm": 0.5881733298301697, "learning_rate": 5.347430235025419e-05, "loss": 1.8506, "step": 8731 }, { "epoch": 0.4867064266205897, "grad_norm": 0.5110684633255005, "learning_rate": 5.34654195811228e-05, "loss": 1.4549, "step": 8732 }, { "epoch": 0.48676216487375285, "grad_norm": 0.5621329545974731, "learning_rate": 5.345653670208851e-05, "loss": 1.6001, "step": 8733 }, { "epoch": 0.48681790312691603, "grad_norm": 0.5230090022087097, "learning_rate": 5.344765371343302e-05, "loss": 1.7102, "step": 8734 }, { "epoch": 0.48687364138007916, "grad_norm": 0.5325090289115906, "learning_rate": 5.343877061543806e-05, "loss": 1.5661, "step": 8735 }, { "epoch": 0.4869293796332423, "grad_norm": 0.5863301753997803, "learning_rate": 5.342988740838535e-05, "loss": 1.9036, "step": 8736 }, { "epoch": 0.4869851178864054, "grad_norm": 0.5872917175292969, "learning_rate": 5.342100409255659e-05, "loss": 1.8516, "step": 8737 }, { "epoch": 0.4870408561395686, "grad_norm": 0.5677287578582764, "learning_rate": 5.341212066823355e-05, "loss": 1.5462, "step": 8738 }, { "epoch": 0.48709659439273173, "grad_norm": 0.5717810392379761, "learning_rate": 5.340323713569792e-05, "loss": 1.7118, "step": 8739 }, { "epoch": 0.48715233264589486, "grad_norm": 0.5940883159637451, "learning_rate": 5.339435349523148e-05, "loss": 1.8225, "step": 8740 }, { "epoch": 0.48720807089905804, "grad_norm": 0.6162937879562378, "learning_rate": 5.33854697471159e-05, "loss": 1.9512, "step": 8741 }, { "epoch": 0.4872638091522212, "grad_norm": 0.5418954491615295, "learning_rate": 5.337658589163299e-05, "loss": 1.6836, "step": 8742 }, { "epoch": 0.4873195474053843, "grad_norm": 0.5783557295799255, "learning_rate": 5.3367701929064426e-05, "loss": 1.709, "step": 8743 }, { "epoch": 0.4873752856585475, "grad_norm": 0.5385530591011047, "learning_rate": 5.3358817859692025e-05, "loss": 1.5885, "step": 8744 }, { "epoch": 0.4874310239117106, "grad_norm": 0.5666008591651917, "learning_rate": 5.334993368379748e-05, "loss": 1.6946, "step": 8745 }, { "epoch": 0.48748676216487374, "grad_norm": 0.549767255783081, "learning_rate": 5.3341049401662594e-05, "loss": 1.5776, "step": 8746 }, { "epoch": 0.4875425004180369, "grad_norm": 0.5610424280166626, "learning_rate": 5.333216501356909e-05, "loss": 1.6057, "step": 8747 }, { "epoch": 0.48759823867120006, "grad_norm": 0.5643283724784851, "learning_rate": 5.332328051979873e-05, "loss": 1.7629, "step": 8748 }, { "epoch": 0.4876539769243632, "grad_norm": 0.5474547743797302, "learning_rate": 5.3314395920633306e-05, "loss": 1.7972, "step": 8749 }, { "epoch": 0.4877097151775263, "grad_norm": 0.56900554895401, "learning_rate": 5.330551121635454e-05, "loss": 1.7521, "step": 8750 }, { "epoch": 0.4877654534306895, "grad_norm": 0.6560434103012085, "learning_rate": 5.329662640724426e-05, "loss": 1.4613, "step": 8751 }, { "epoch": 0.48782119168385263, "grad_norm": 0.5190215110778809, "learning_rate": 5.32877414935842e-05, "loss": 1.4367, "step": 8752 }, { "epoch": 0.48787692993701576, "grad_norm": 0.5503537058830261, "learning_rate": 5.3278856475656144e-05, "loss": 1.649, "step": 8753 }, { "epoch": 0.48793266819017894, "grad_norm": 0.5634624361991882, "learning_rate": 5.326997135374189e-05, "loss": 1.9406, "step": 8754 }, { "epoch": 0.48798840644334207, "grad_norm": 0.5632345676422119, "learning_rate": 5.3261086128123206e-05, "loss": 1.6661, "step": 8755 }, { "epoch": 0.4880441446965052, "grad_norm": 0.6362982392311096, "learning_rate": 5.3252200799081875e-05, "loss": 1.9258, "step": 8756 }, { "epoch": 0.4880998829496684, "grad_norm": 0.5737461447715759, "learning_rate": 5.3243315366899694e-05, "loss": 1.6868, "step": 8757 }, { "epoch": 0.4881556212028315, "grad_norm": 0.5335796475410461, "learning_rate": 5.3234429831858466e-05, "loss": 1.4586, "step": 8758 }, { "epoch": 0.48821135945599464, "grad_norm": 0.5574231743812561, "learning_rate": 5.3225544194239984e-05, "loss": 1.6262, "step": 8759 }, { "epoch": 0.48826709770915777, "grad_norm": 0.5251532196998596, "learning_rate": 5.3216658454326043e-05, "loss": 1.5789, "step": 8760 }, { "epoch": 0.48832283596232096, "grad_norm": 0.5983790159225464, "learning_rate": 5.3207772612398444e-05, "loss": 1.8751, "step": 8761 }, { "epoch": 0.4883785742154841, "grad_norm": 0.5940685272216797, "learning_rate": 5.319888666873902e-05, "loss": 1.5181, "step": 8762 }, { "epoch": 0.4884343124686472, "grad_norm": 0.5403158664703369, "learning_rate": 5.319000062362953e-05, "loss": 1.6698, "step": 8763 }, { "epoch": 0.4884900507218104, "grad_norm": 0.5441331267356873, "learning_rate": 5.318111447735186e-05, "loss": 1.6822, "step": 8764 }, { "epoch": 0.4885457889749735, "grad_norm": 0.6151909232139587, "learning_rate": 5.317222823018775e-05, "loss": 1.8201, "step": 8765 }, { "epoch": 0.48860152722813666, "grad_norm": 0.5616387724876404, "learning_rate": 5.316334188241908e-05, "loss": 1.705, "step": 8766 }, { "epoch": 0.48865726548129984, "grad_norm": 0.570561408996582, "learning_rate": 5.3154455434327634e-05, "loss": 1.7352, "step": 8767 }, { "epoch": 0.48871300373446297, "grad_norm": 0.5549841523170471, "learning_rate": 5.314556888619527e-05, "loss": 1.7109, "step": 8768 }, { "epoch": 0.4887687419876261, "grad_norm": 0.6028071045875549, "learning_rate": 5.313668223830378e-05, "loss": 1.7114, "step": 8769 }, { "epoch": 0.4888244802407892, "grad_norm": 0.563991129398346, "learning_rate": 5.312779549093503e-05, "loss": 1.5484, "step": 8770 }, { "epoch": 0.4888802184939524, "grad_norm": 0.5773816108703613, "learning_rate": 5.3118908644370834e-05, "loss": 1.7072, "step": 8771 }, { "epoch": 0.48893595674711554, "grad_norm": 0.5592569708824158, "learning_rate": 5.3110021698893053e-05, "loss": 1.7843, "step": 8772 }, { "epoch": 0.48899169500027867, "grad_norm": 0.5349111557006836, "learning_rate": 5.310113465478351e-05, "loss": 1.5887, "step": 8773 }, { "epoch": 0.48904743325344185, "grad_norm": 0.5708144903182983, "learning_rate": 5.309224751232406e-05, "loss": 1.5671, "step": 8774 }, { "epoch": 0.489103171506605, "grad_norm": 0.5695350766181946, "learning_rate": 5.308336027179655e-05, "loss": 1.8061, "step": 8775 }, { "epoch": 0.4891589097597681, "grad_norm": 0.5757440328598022, "learning_rate": 5.307447293348281e-05, "loss": 1.7021, "step": 8776 }, { "epoch": 0.4892146480129313, "grad_norm": 0.5219387412071228, "learning_rate": 5.306558549766473e-05, "loss": 1.5089, "step": 8777 }, { "epoch": 0.4892703862660944, "grad_norm": 0.5836179256439209, "learning_rate": 5.305669796462415e-05, "loss": 1.764, "step": 8778 }, { "epoch": 0.48932612451925755, "grad_norm": 0.5617983341217041, "learning_rate": 5.3047810334642935e-05, "loss": 1.751, "step": 8779 }, { "epoch": 0.48938186277242074, "grad_norm": 0.5990623831748962, "learning_rate": 5.303892260800294e-05, "loss": 1.7939, "step": 8780 }, { "epoch": 0.48943760102558387, "grad_norm": 0.5625554323196411, "learning_rate": 5.303003478498605e-05, "loss": 1.8436, "step": 8781 }, { "epoch": 0.489493339278747, "grad_norm": 0.6201027631759644, "learning_rate": 5.3021146865874117e-05, "loss": 1.7894, "step": 8782 }, { "epoch": 0.4895490775319101, "grad_norm": 0.5482053160667419, "learning_rate": 5.301225885094902e-05, "loss": 1.7486, "step": 8783 }, { "epoch": 0.4896048157850733, "grad_norm": 0.5940152406692505, "learning_rate": 5.300337074049262e-05, "loss": 1.7971, "step": 8784 }, { "epoch": 0.48966055403823644, "grad_norm": 0.49621883034706116, "learning_rate": 5.299448253478683e-05, "loss": 1.6085, "step": 8785 }, { "epoch": 0.48971629229139957, "grad_norm": 0.5509806275367737, "learning_rate": 5.29855942341135e-05, "loss": 1.8445, "step": 8786 }, { "epoch": 0.48977203054456275, "grad_norm": 0.5669719576835632, "learning_rate": 5.297670583875454e-05, "loss": 1.7854, "step": 8787 }, { "epoch": 0.4898277687977259, "grad_norm": 0.5512406826019287, "learning_rate": 5.296781734899182e-05, "loss": 1.4982, "step": 8788 }, { "epoch": 0.489883507050889, "grad_norm": 0.56741863489151, "learning_rate": 5.295892876510723e-05, "loss": 1.7415, "step": 8789 }, { "epoch": 0.4899392453040522, "grad_norm": 0.5425149202346802, "learning_rate": 5.295004008738268e-05, "loss": 1.5488, "step": 8790 }, { "epoch": 0.4899949835572153, "grad_norm": 0.5617731213569641, "learning_rate": 5.294115131610006e-05, "loss": 1.7582, "step": 8791 }, { "epoch": 0.49005072181037845, "grad_norm": 0.5693073868751526, "learning_rate": 5.293226245154127e-05, "loss": 1.5738, "step": 8792 }, { "epoch": 0.4901064600635416, "grad_norm": 0.6429868340492249, "learning_rate": 5.292337349398821e-05, "loss": 1.7709, "step": 8793 }, { "epoch": 0.49016219831670477, "grad_norm": 0.568608283996582, "learning_rate": 5.291448444372279e-05, "loss": 1.5022, "step": 8794 }, { "epoch": 0.4902179365698679, "grad_norm": 0.5543949604034424, "learning_rate": 5.29055953010269e-05, "loss": 1.7136, "step": 8795 }, { "epoch": 0.490273674823031, "grad_norm": 0.5077717900276184, "learning_rate": 5.289670606618248e-05, "loss": 1.5791, "step": 8796 }, { "epoch": 0.4903294130761942, "grad_norm": 0.5588290691375732, "learning_rate": 5.288781673947143e-05, "loss": 1.7905, "step": 8797 }, { "epoch": 0.49038515132935734, "grad_norm": 0.5637931823730469, "learning_rate": 5.2878927321175676e-05, "loss": 1.7184, "step": 8798 }, { "epoch": 0.49044088958252047, "grad_norm": 0.5664627552032471, "learning_rate": 5.2870037811577125e-05, "loss": 1.5013, "step": 8799 }, { "epoch": 0.49049662783568365, "grad_norm": 0.5796491503715515, "learning_rate": 5.28611482109577e-05, "loss": 1.7939, "step": 8800 }, { "epoch": 0.4905523660888468, "grad_norm": 0.556143045425415, "learning_rate": 5.2852258519599365e-05, "loss": 1.5717, "step": 8801 }, { "epoch": 0.4906081043420099, "grad_norm": 0.5120705366134644, "learning_rate": 5.284336873778398e-05, "loss": 1.5725, "step": 8802 }, { "epoch": 0.4906638425951731, "grad_norm": 0.5616738200187683, "learning_rate": 5.2834478865793545e-05, "loss": 1.5918, "step": 8803 }, { "epoch": 0.4907195808483362, "grad_norm": 0.5868408679962158, "learning_rate": 5.282558890390995e-05, "loss": 1.7262, "step": 8804 }, { "epoch": 0.49077531910149935, "grad_norm": 0.5609720945358276, "learning_rate": 5.281669885241517e-05, "loss": 1.6374, "step": 8805 }, { "epoch": 0.4908310573546625, "grad_norm": 0.5879573225975037, "learning_rate": 5.280780871159111e-05, "loss": 1.7363, "step": 8806 }, { "epoch": 0.49088679560782567, "grad_norm": 0.5944104790687561, "learning_rate": 5.279891848171974e-05, "loss": 1.8078, "step": 8807 }, { "epoch": 0.4909425338609888, "grad_norm": 0.5318206548690796, "learning_rate": 5.2790028163082985e-05, "loss": 1.5397, "step": 8808 }, { "epoch": 0.4909982721141519, "grad_norm": 0.542536199092865, "learning_rate": 5.2781137755962794e-05, "loss": 1.6362, "step": 8809 }, { "epoch": 0.4910540103673151, "grad_norm": 0.5784698128700256, "learning_rate": 5.2772247260641136e-05, "loss": 1.765, "step": 8810 }, { "epoch": 0.49110974862047824, "grad_norm": 0.5454279184341431, "learning_rate": 5.276335667739998e-05, "loss": 1.7014, "step": 8811 }, { "epoch": 0.49116548687364137, "grad_norm": 0.519689679145813, "learning_rate": 5.275446600652123e-05, "loss": 1.7533, "step": 8812 }, { "epoch": 0.49122122512680455, "grad_norm": 0.7089325785636902, "learning_rate": 5.2745575248286895e-05, "loss": 2.1051, "step": 8813 }, { "epoch": 0.4912769633799677, "grad_norm": 0.5588321089744568, "learning_rate": 5.273668440297892e-05, "loss": 1.6069, "step": 8814 }, { "epoch": 0.4913327016331308, "grad_norm": 0.5273601412773132, "learning_rate": 5.272779347087925e-05, "loss": 1.4399, "step": 8815 }, { "epoch": 0.49138843988629394, "grad_norm": 0.5443345904350281, "learning_rate": 5.27189024522699e-05, "loss": 1.5401, "step": 8816 }, { "epoch": 0.4914441781394571, "grad_norm": 0.5727609395980835, "learning_rate": 5.271001134743281e-05, "loss": 1.6588, "step": 8817 }, { "epoch": 0.49149991639262025, "grad_norm": 0.5712710618972778, "learning_rate": 5.270112015664997e-05, "loss": 1.7393, "step": 8818 }, { "epoch": 0.4915556546457834, "grad_norm": 0.5474506616592407, "learning_rate": 5.2692228880203333e-05, "loss": 1.6144, "step": 8819 }, { "epoch": 0.49161139289894656, "grad_norm": 0.5622429251670837, "learning_rate": 5.2683337518374906e-05, "loss": 1.6107, "step": 8820 }, { "epoch": 0.4916671311521097, "grad_norm": 0.5528522729873657, "learning_rate": 5.267444607144665e-05, "loss": 1.5545, "step": 8821 }, { "epoch": 0.4917228694052728, "grad_norm": 0.5275382995605469, "learning_rate": 5.2665554539700554e-05, "loss": 1.6128, "step": 8822 }, { "epoch": 0.491778607658436, "grad_norm": 0.6423818469047546, "learning_rate": 5.265666292341861e-05, "loss": 2.064, "step": 8823 }, { "epoch": 0.49183434591159914, "grad_norm": 0.5372768640518188, "learning_rate": 5.26477712228828e-05, "loss": 1.2805, "step": 8824 }, { "epoch": 0.49189008416476226, "grad_norm": 0.600679337978363, "learning_rate": 5.2638879438375144e-05, "loss": 1.8211, "step": 8825 }, { "epoch": 0.49194582241792545, "grad_norm": 0.5628047585487366, "learning_rate": 5.2629987570177606e-05, "loss": 1.6321, "step": 8826 }, { "epoch": 0.4920015606710886, "grad_norm": 0.600486695766449, "learning_rate": 5.262109561857221e-05, "loss": 1.782, "step": 8827 }, { "epoch": 0.4920572989242517, "grad_norm": 0.5375781655311584, "learning_rate": 5.261220358384091e-05, "loss": 1.5132, "step": 8828 }, { "epoch": 0.49211303717741484, "grad_norm": 0.5441939830780029, "learning_rate": 5.260331146626578e-05, "loss": 1.4457, "step": 8829 }, { "epoch": 0.492168775430578, "grad_norm": 0.5390109419822693, "learning_rate": 5.259441926612877e-05, "loss": 1.6268, "step": 8830 }, { "epoch": 0.49222451368374115, "grad_norm": 0.5406618714332581, "learning_rate": 5.2585526983711916e-05, "loss": 1.5747, "step": 8831 }, { "epoch": 0.4922802519369043, "grad_norm": 0.5526447296142578, "learning_rate": 5.2576634619297216e-05, "loss": 1.6989, "step": 8832 }, { "epoch": 0.49233599019006746, "grad_norm": 0.5135407447814941, "learning_rate": 5.256774217316669e-05, "loss": 1.4546, "step": 8833 }, { "epoch": 0.4923917284432306, "grad_norm": 0.5286427736282349, "learning_rate": 5.255884964560235e-05, "loss": 1.6071, "step": 8834 }, { "epoch": 0.4924474666963937, "grad_norm": 0.5706698894500732, "learning_rate": 5.254995703688621e-05, "loss": 1.7096, "step": 8835 }, { "epoch": 0.4925032049495569, "grad_norm": 0.5597012042999268, "learning_rate": 5.2541064347300306e-05, "loss": 1.6175, "step": 8836 }, { "epoch": 0.49255894320272003, "grad_norm": 0.4902280271053314, "learning_rate": 5.253217157712666e-05, "loss": 1.2836, "step": 8837 }, { "epoch": 0.49261468145588316, "grad_norm": 0.598961591720581, "learning_rate": 5.2523278726647304e-05, "loss": 1.7038, "step": 8838 }, { "epoch": 0.4926704197090463, "grad_norm": 1.2628682851791382, "learning_rate": 5.251438579614425e-05, "loss": 1.8079, "step": 8839 }, { "epoch": 0.4927261579622095, "grad_norm": 0.5793728232383728, "learning_rate": 5.250549278589955e-05, "loss": 1.8102, "step": 8840 }, { "epoch": 0.4927818962153726, "grad_norm": 0.5742671489715576, "learning_rate": 5.249659969619519e-05, "loss": 1.6611, "step": 8841 }, { "epoch": 0.49283763446853573, "grad_norm": 0.5438802242279053, "learning_rate": 5.248770652731327e-05, "loss": 1.5826, "step": 8842 }, { "epoch": 0.4928933727216989, "grad_norm": 0.553573727607727, "learning_rate": 5.247881327953581e-05, "loss": 1.5787, "step": 8843 }, { "epoch": 0.49294911097486205, "grad_norm": 0.5531934499740601, "learning_rate": 5.246991995314484e-05, "loss": 1.7769, "step": 8844 }, { "epoch": 0.4930048492280252, "grad_norm": 0.5669671893119812, "learning_rate": 5.24610265484224e-05, "loss": 1.6973, "step": 8845 }, { "epoch": 0.49306058748118836, "grad_norm": 0.5406858921051025, "learning_rate": 5.2452133065650565e-05, "loss": 1.4484, "step": 8846 }, { "epoch": 0.4931163257343515, "grad_norm": 0.6136825084686279, "learning_rate": 5.2443239505111354e-05, "loss": 1.7145, "step": 8847 }, { "epoch": 0.4931720639875146, "grad_norm": 0.5375277400016785, "learning_rate": 5.243434586708682e-05, "loss": 1.5229, "step": 8848 }, { "epoch": 0.4932278022406778, "grad_norm": 0.5452854633331299, "learning_rate": 5.2425452151859045e-05, "loss": 1.4448, "step": 8849 }, { "epoch": 0.49328354049384093, "grad_norm": 0.5728045701980591, "learning_rate": 5.241655835971006e-05, "loss": 1.8291, "step": 8850 }, { "epoch": 0.49333927874700406, "grad_norm": 0.5290676951408386, "learning_rate": 5.240766449092194e-05, "loss": 1.53, "step": 8851 }, { "epoch": 0.4933950170001672, "grad_norm": 0.6011704206466675, "learning_rate": 5.239877054577673e-05, "loss": 1.7215, "step": 8852 }, { "epoch": 0.4934507552533304, "grad_norm": 0.5930907130241394, "learning_rate": 5.2389876524556526e-05, "loss": 1.8231, "step": 8853 }, { "epoch": 0.4935064935064935, "grad_norm": 0.5788987874984741, "learning_rate": 5.2380982427543346e-05, "loss": 1.7529, "step": 8854 }, { "epoch": 0.49356223175965663, "grad_norm": 0.5591574311256409, "learning_rate": 5.23720882550193e-05, "loss": 1.5894, "step": 8855 }, { "epoch": 0.4936179700128198, "grad_norm": 0.6035146117210388, "learning_rate": 5.2363194007266435e-05, "loss": 1.811, "step": 8856 }, { "epoch": 0.49367370826598295, "grad_norm": 0.5160028338432312, "learning_rate": 5.2354299684566856e-05, "loss": 1.6787, "step": 8857 }, { "epoch": 0.4937294465191461, "grad_norm": 0.5431737899780273, "learning_rate": 5.2345405287202596e-05, "loss": 1.4917, "step": 8858 }, { "epoch": 0.49378518477230926, "grad_norm": 0.5381173491477966, "learning_rate": 5.233651081545577e-05, "loss": 1.6775, "step": 8859 }, { "epoch": 0.4938409230254724, "grad_norm": 0.6041108965873718, "learning_rate": 5.232761626960844e-05, "loss": 1.6414, "step": 8860 }, { "epoch": 0.4938966612786355, "grad_norm": 0.6218950152397156, "learning_rate": 5.231872164994268e-05, "loss": 1.6513, "step": 8861 }, { "epoch": 0.49395239953179865, "grad_norm": 0.5222500562667847, "learning_rate": 5.230982695674059e-05, "loss": 1.7083, "step": 8862 }, { "epoch": 0.49400813778496183, "grad_norm": 0.5420836806297302, "learning_rate": 5.230093219028427e-05, "loss": 1.5971, "step": 8863 }, { "epoch": 0.49406387603812496, "grad_norm": 0.5384796857833862, "learning_rate": 5.229203735085579e-05, "loss": 1.5896, "step": 8864 }, { "epoch": 0.4941196142912881, "grad_norm": 0.6375717520713806, "learning_rate": 5.2283142438737245e-05, "loss": 1.8503, "step": 8865 }, { "epoch": 0.4941753525444513, "grad_norm": 0.5303763151168823, "learning_rate": 5.227424745421074e-05, "loss": 1.6416, "step": 8866 }, { "epoch": 0.4942310907976144, "grad_norm": 0.5153331756591797, "learning_rate": 5.2265352397558354e-05, "loss": 1.3659, "step": 8867 }, { "epoch": 0.49428682905077753, "grad_norm": 0.5397130846977234, "learning_rate": 5.225645726906222e-05, "loss": 1.5523, "step": 8868 }, { "epoch": 0.4943425673039407, "grad_norm": 0.5596987009048462, "learning_rate": 5.224756206900439e-05, "loss": 1.7921, "step": 8869 }, { "epoch": 0.49439830555710385, "grad_norm": 0.5709193348884583, "learning_rate": 5.2238666797667026e-05, "loss": 1.6013, "step": 8870 }, { "epoch": 0.494454043810267, "grad_norm": 0.5561599731445312, "learning_rate": 5.2229771455332176e-05, "loss": 1.4794, "step": 8871 }, { "epoch": 0.49450978206343016, "grad_norm": 0.5445564985275269, "learning_rate": 5.2220876042281995e-05, "loss": 1.5029, "step": 8872 }, { "epoch": 0.4945655203165933, "grad_norm": 0.5647691488265991, "learning_rate": 5.2211980558798565e-05, "loss": 1.7888, "step": 8873 }, { "epoch": 0.4946212585697564, "grad_norm": 0.5487396717071533, "learning_rate": 5.220308500516401e-05, "loss": 1.6931, "step": 8874 }, { "epoch": 0.49467699682291955, "grad_norm": 0.5969203114509583, "learning_rate": 5.219418938166044e-05, "loss": 1.6718, "step": 8875 }, { "epoch": 0.49473273507608273, "grad_norm": 0.564508855342865, "learning_rate": 5.218529368856997e-05, "loss": 1.6968, "step": 8876 }, { "epoch": 0.49478847332924586, "grad_norm": 0.5070094466209412, "learning_rate": 5.217639792617475e-05, "loss": 1.5859, "step": 8877 }, { "epoch": 0.494844211582409, "grad_norm": 0.5474216341972351, "learning_rate": 5.216750209475685e-05, "loss": 1.7858, "step": 8878 }, { "epoch": 0.4948999498355722, "grad_norm": 0.4998477101325989, "learning_rate": 5.2158606194598436e-05, "loss": 1.4827, "step": 8879 }, { "epoch": 0.4949556880887353, "grad_norm": 0.5660443305969238, "learning_rate": 5.214971022598162e-05, "loss": 1.7799, "step": 8880 }, { "epoch": 0.49501142634189843, "grad_norm": 0.5911859273910522, "learning_rate": 5.2140814189188514e-05, "loss": 1.6708, "step": 8881 }, { "epoch": 0.4950671645950616, "grad_norm": 0.5817141532897949, "learning_rate": 5.213191808450127e-05, "loss": 1.6558, "step": 8882 }, { "epoch": 0.49512290284822474, "grad_norm": 0.5510105490684509, "learning_rate": 5.212302191220203e-05, "loss": 1.5644, "step": 8883 }, { "epoch": 0.4951786411013879, "grad_norm": 0.6024221181869507, "learning_rate": 5.21141256725729e-05, "loss": 1.7236, "step": 8884 }, { "epoch": 0.495234379354551, "grad_norm": 0.5197804570198059, "learning_rate": 5.210522936589604e-05, "loss": 1.5429, "step": 8885 }, { "epoch": 0.4952901176077142, "grad_norm": 0.5537724494934082, "learning_rate": 5.209633299245357e-05, "loss": 1.7254, "step": 8886 }, { "epoch": 0.4953458558608773, "grad_norm": 0.5095260739326477, "learning_rate": 5.208743655252763e-05, "loss": 1.4012, "step": 8887 }, { "epoch": 0.49540159411404044, "grad_norm": 0.5599790811538696, "learning_rate": 5.207854004640038e-05, "loss": 1.7249, "step": 8888 }, { "epoch": 0.49545733236720363, "grad_norm": 0.555938184261322, "learning_rate": 5.206964347435396e-05, "loss": 1.6312, "step": 8889 }, { "epoch": 0.49551307062036676, "grad_norm": 0.5438600182533264, "learning_rate": 5.206074683667053e-05, "loss": 1.7241, "step": 8890 }, { "epoch": 0.4955688088735299, "grad_norm": 0.5477585792541504, "learning_rate": 5.2051850133632206e-05, "loss": 1.6946, "step": 8891 }, { "epoch": 0.49562454712669307, "grad_norm": 0.5788122415542603, "learning_rate": 5.204295336552117e-05, "loss": 1.503, "step": 8892 }, { "epoch": 0.4956802853798562, "grad_norm": 0.5613676309585571, "learning_rate": 5.203405653261956e-05, "loss": 1.5574, "step": 8893 }, { "epoch": 0.49573602363301933, "grad_norm": 0.5826630592346191, "learning_rate": 5.202515963520953e-05, "loss": 1.85, "step": 8894 }, { "epoch": 0.4957917618861825, "grad_norm": 0.5635188817977905, "learning_rate": 5.2016262673573246e-05, "loss": 1.3931, "step": 8895 }, { "epoch": 0.49584750013934564, "grad_norm": 0.5745763182640076, "learning_rate": 5.200736564799288e-05, "loss": 1.7307, "step": 8896 }, { "epoch": 0.49590323839250877, "grad_norm": 0.5301480889320374, "learning_rate": 5.199846855875057e-05, "loss": 1.4952, "step": 8897 }, { "epoch": 0.4959589766456719, "grad_norm": 0.561489999294281, "learning_rate": 5.19895714061285e-05, "loss": 1.5023, "step": 8898 }, { "epoch": 0.4960147148988351, "grad_norm": 0.5963059663772583, "learning_rate": 5.198067419040881e-05, "loss": 1.7862, "step": 8899 }, { "epoch": 0.4960704531519982, "grad_norm": 0.5533133149147034, "learning_rate": 5.197177691187368e-05, "loss": 1.6099, "step": 8900 }, { "epoch": 0.49612619140516134, "grad_norm": 0.5286788940429688, "learning_rate": 5.196287957080529e-05, "loss": 1.5929, "step": 8901 }, { "epoch": 0.4961819296583245, "grad_norm": 0.5352204442024231, "learning_rate": 5.195398216748579e-05, "loss": 1.5723, "step": 8902 }, { "epoch": 0.49623766791148766, "grad_norm": 0.5606736540794373, "learning_rate": 5.194508470219739e-05, "loss": 1.6633, "step": 8903 }, { "epoch": 0.4962934061646508, "grad_norm": 0.5791866779327393, "learning_rate": 5.193618717522224e-05, "loss": 1.6933, "step": 8904 }, { "epoch": 0.49634914441781397, "grad_norm": 0.5928483009338379, "learning_rate": 5.192728958684252e-05, "loss": 1.8085, "step": 8905 }, { "epoch": 0.4964048826709771, "grad_norm": 0.545987606048584, "learning_rate": 5.1918391937340405e-05, "loss": 1.6682, "step": 8906 }, { "epoch": 0.49646062092414023, "grad_norm": 0.5828558206558228, "learning_rate": 5.190949422699808e-05, "loss": 1.7887, "step": 8907 }, { "epoch": 0.49651635917730336, "grad_norm": 0.5636189579963684, "learning_rate": 5.1900596456097736e-05, "loss": 1.6192, "step": 8908 }, { "epoch": 0.49657209743046654, "grad_norm": 0.5548069477081299, "learning_rate": 5.189169862492156e-05, "loss": 1.482, "step": 8909 }, { "epoch": 0.49662783568362967, "grad_norm": 0.5686978697776794, "learning_rate": 5.188280073375173e-05, "loss": 1.5428, "step": 8910 }, { "epoch": 0.4966835739367928, "grad_norm": 0.5715393424034119, "learning_rate": 5.187390278287043e-05, "loss": 1.751, "step": 8911 }, { "epoch": 0.496739312189956, "grad_norm": 0.5473306775093079, "learning_rate": 5.1865004772559876e-05, "loss": 1.6317, "step": 8912 }, { "epoch": 0.4967950504431191, "grad_norm": 0.5280557870864868, "learning_rate": 5.1856106703102225e-05, "loss": 1.382, "step": 8913 }, { "epoch": 0.49685078869628224, "grad_norm": 0.566477358341217, "learning_rate": 5.18472085747797e-05, "loss": 1.6059, "step": 8914 }, { "epoch": 0.4969065269494454, "grad_norm": 0.618401288986206, "learning_rate": 5.183831038787449e-05, "loss": 1.7905, "step": 8915 }, { "epoch": 0.49696226520260856, "grad_norm": 0.555980384349823, "learning_rate": 5.18294121426688e-05, "loss": 1.7827, "step": 8916 }, { "epoch": 0.4970180034557717, "grad_norm": 0.5835009813308716, "learning_rate": 5.1820513839444804e-05, "loss": 1.5225, "step": 8917 }, { "epoch": 0.49707374170893487, "grad_norm": 0.5366058945655823, "learning_rate": 5.181161547848474e-05, "loss": 1.584, "step": 8918 }, { "epoch": 0.497129479962098, "grad_norm": 0.5382677316665649, "learning_rate": 5.1802717060070795e-05, "loss": 1.7048, "step": 8919 }, { "epoch": 0.4971852182152611, "grad_norm": 0.5656511783599854, "learning_rate": 5.1793818584485166e-05, "loss": 1.7254, "step": 8920 }, { "epoch": 0.49724095646842426, "grad_norm": 0.4968765377998352, "learning_rate": 5.178492005201007e-05, "loss": 1.4276, "step": 8921 }, { "epoch": 0.49729669472158744, "grad_norm": 0.599624514579773, "learning_rate": 5.177602146292773e-05, "loss": 1.7886, "step": 8922 }, { "epoch": 0.49735243297475057, "grad_norm": 0.5555099844932556, "learning_rate": 5.176712281752033e-05, "loss": 1.5135, "step": 8923 }, { "epoch": 0.4974081712279137, "grad_norm": 0.5166276693344116, "learning_rate": 5.17582241160701e-05, "loss": 1.284, "step": 8924 }, { "epoch": 0.4974639094810769, "grad_norm": 0.5706877708435059, "learning_rate": 5.1749325358859255e-05, "loss": 1.5666, "step": 8925 }, { "epoch": 0.49751964773424, "grad_norm": 0.6055343747138977, "learning_rate": 5.1740426546170003e-05, "loss": 1.7793, "step": 8926 }, { "epoch": 0.49757538598740314, "grad_norm": 0.551367998123169, "learning_rate": 5.1731527678284575e-05, "loss": 1.7579, "step": 8927 }, { "epoch": 0.4976311242405663, "grad_norm": 0.6338830590248108, "learning_rate": 5.172262875548518e-05, "loss": 1.691, "step": 8928 }, { "epoch": 0.49768686249372945, "grad_norm": 0.5556480884552002, "learning_rate": 5.171372977805405e-05, "loss": 1.5507, "step": 8929 }, { "epoch": 0.4977426007468926, "grad_norm": 0.5841500163078308, "learning_rate": 5.17048307462734e-05, "loss": 1.8044, "step": 8930 }, { "epoch": 0.4977983390000557, "grad_norm": 0.5762627124786377, "learning_rate": 5.169593166042547e-05, "loss": 1.6068, "step": 8931 }, { "epoch": 0.4978540772532189, "grad_norm": 0.5406793355941772, "learning_rate": 5.1687032520792464e-05, "loss": 1.6587, "step": 8932 }, { "epoch": 0.497909815506382, "grad_norm": 0.5948076248168945, "learning_rate": 5.1678133327656616e-05, "loss": 1.7269, "step": 8933 }, { "epoch": 0.49796555375954515, "grad_norm": 0.5559920072555542, "learning_rate": 5.166923408130016e-05, "loss": 1.7147, "step": 8934 }, { "epoch": 0.49802129201270834, "grad_norm": 0.5676483511924744, "learning_rate": 5.166033478200536e-05, "loss": 1.5815, "step": 8935 }, { "epoch": 0.49807703026587147, "grad_norm": 0.5557644367218018, "learning_rate": 5.1651435430054396e-05, "loss": 1.7004, "step": 8936 }, { "epoch": 0.4981327685190346, "grad_norm": 0.5279107093811035, "learning_rate": 5.164253602572954e-05, "loss": 1.5522, "step": 8937 }, { "epoch": 0.4981885067721978, "grad_norm": 0.5402976870536804, "learning_rate": 5.1633636569313014e-05, "loss": 1.6626, "step": 8938 }, { "epoch": 0.4982442450253609, "grad_norm": 0.5484632849693298, "learning_rate": 5.1624737061087056e-05, "loss": 1.5598, "step": 8939 }, { "epoch": 0.49829998327852404, "grad_norm": 0.5460349321365356, "learning_rate": 5.161583750133392e-05, "loss": 1.6661, "step": 8940 }, { "epoch": 0.4983557215316872, "grad_norm": 0.5012972950935364, "learning_rate": 5.160693789033583e-05, "loss": 1.3436, "step": 8941 }, { "epoch": 0.49841145978485035, "grad_norm": 0.5560734272003174, "learning_rate": 5.159803822837506e-05, "loss": 1.5994, "step": 8942 }, { "epoch": 0.4984671980380135, "grad_norm": 0.5721739530563354, "learning_rate": 5.1589138515733805e-05, "loss": 1.8826, "step": 8943 }, { "epoch": 0.4985229362911766, "grad_norm": 0.548629105091095, "learning_rate": 5.158023875269436e-05, "loss": 1.465, "step": 8944 }, { "epoch": 0.4985786745443398, "grad_norm": 0.5386154651641846, "learning_rate": 5.157133893953895e-05, "loss": 1.624, "step": 8945 }, { "epoch": 0.4986344127975029, "grad_norm": 0.6287878155708313, "learning_rate": 5.156243907654983e-05, "loss": 1.6433, "step": 8946 }, { "epoch": 0.49869015105066605, "grad_norm": 0.6134181022644043, "learning_rate": 5.155353916400925e-05, "loss": 1.7598, "step": 8947 }, { "epoch": 0.49874588930382924, "grad_norm": 0.5654070377349854, "learning_rate": 5.154463920219947e-05, "loss": 1.7002, "step": 8948 }, { "epoch": 0.49880162755699237, "grad_norm": 0.5511396527290344, "learning_rate": 5.153573919140274e-05, "loss": 1.5513, "step": 8949 }, { "epoch": 0.4988573658101555, "grad_norm": 0.5892798900604248, "learning_rate": 5.1526839131901315e-05, "loss": 1.8855, "step": 8950 }, { "epoch": 0.4989131040633187, "grad_norm": 0.6024952530860901, "learning_rate": 5.151793902397747e-05, "loss": 1.591, "step": 8951 }, { "epoch": 0.4989688423164818, "grad_norm": 0.545107901096344, "learning_rate": 5.150903886791343e-05, "loss": 1.54, "step": 8952 }, { "epoch": 0.49902458056964494, "grad_norm": 0.5680729746818542, "learning_rate": 5.150013866399147e-05, "loss": 1.417, "step": 8953 }, { "epoch": 0.49908031882280807, "grad_norm": 0.5475823879241943, "learning_rate": 5.149123841249387e-05, "loss": 1.5283, "step": 8954 }, { "epoch": 0.49913605707597125, "grad_norm": 0.6003718376159668, "learning_rate": 5.148233811370289e-05, "loss": 1.9128, "step": 8955 }, { "epoch": 0.4991917953291344, "grad_norm": 0.5217127203941345, "learning_rate": 5.1473437767900766e-05, "loss": 1.5466, "step": 8956 }, { "epoch": 0.4992475335822975, "grad_norm": 0.5930051803588867, "learning_rate": 5.1464537375369816e-05, "loss": 1.7227, "step": 8957 }, { "epoch": 0.4993032718354607, "grad_norm": 0.5506693124771118, "learning_rate": 5.145563693639226e-05, "loss": 1.5488, "step": 8958 }, { "epoch": 0.4993590100886238, "grad_norm": 0.5341318845748901, "learning_rate": 5.144673645125039e-05, "loss": 1.6493, "step": 8959 }, { "epoch": 0.49941474834178695, "grad_norm": 0.5735641717910767, "learning_rate": 5.143783592022646e-05, "loss": 1.6502, "step": 8960 }, { "epoch": 0.49947048659495014, "grad_norm": 0.5525271892547607, "learning_rate": 5.142893534360278e-05, "loss": 1.389, "step": 8961 }, { "epoch": 0.49952622484811326, "grad_norm": 0.6138321161270142, "learning_rate": 5.1420034721661594e-05, "loss": 1.882, "step": 8962 }, { "epoch": 0.4995819631012764, "grad_norm": 0.5286270380020142, "learning_rate": 5.1411134054685185e-05, "loss": 1.6304, "step": 8963 }, { "epoch": 0.4996377013544396, "grad_norm": 0.5324103832244873, "learning_rate": 5.140223334295584e-05, "loss": 1.7474, "step": 8964 }, { "epoch": 0.4996934396076027, "grad_norm": 0.598732590675354, "learning_rate": 5.139333258675582e-05, "loss": 1.7623, "step": 8965 }, { "epoch": 0.49974917786076584, "grad_norm": 0.5680933594703674, "learning_rate": 5.138443178636742e-05, "loss": 1.5633, "step": 8966 }, { "epoch": 0.49980491611392897, "grad_norm": 0.5769996047019958, "learning_rate": 5.13755309420729e-05, "loss": 1.6215, "step": 8967 }, { "epoch": 0.49986065436709215, "grad_norm": 0.5486459732055664, "learning_rate": 5.1366630054154576e-05, "loss": 1.6782, "step": 8968 }, { "epoch": 0.4999163926202553, "grad_norm": 0.6276679635047913, "learning_rate": 5.1357729122894706e-05, "loss": 1.7972, "step": 8969 }, { "epoch": 0.4999721308734184, "grad_norm": 0.5534047484397888, "learning_rate": 5.134882814857559e-05, "loss": 1.5217, "step": 8970 }, { "epoch": 0.5000278691265816, "grad_norm": 0.7427502274513245, "learning_rate": 5.1339927131479503e-05, "loss": 1.7474, "step": 8971 }, { "epoch": 0.5000836073797447, "grad_norm": 0.5830016136169434, "learning_rate": 5.133102607188874e-05, "loss": 1.7703, "step": 8972 }, { "epoch": 0.5001393456329079, "grad_norm": 0.5821530818939209, "learning_rate": 5.132212497008559e-05, "loss": 1.6809, "step": 8973 }, { "epoch": 0.500195083886071, "grad_norm": 0.5597349405288696, "learning_rate": 5.1313223826352365e-05, "loss": 1.6982, "step": 8974 }, { "epoch": 0.5002508221392341, "grad_norm": 0.5627524256706238, "learning_rate": 5.1304322640971315e-05, "loss": 1.5646, "step": 8975 }, { "epoch": 0.5003065603923973, "grad_norm": 0.568310558795929, "learning_rate": 5.1295421414224754e-05, "loss": 1.6019, "step": 8976 }, { "epoch": 0.5003622986455605, "grad_norm": 0.5768476128578186, "learning_rate": 5.128652014639499e-05, "loss": 1.6455, "step": 8977 }, { "epoch": 0.5004180368987236, "grad_norm": 0.5494751930236816, "learning_rate": 5.1277618837764294e-05, "loss": 1.5586, "step": 8978 }, { "epoch": 0.5004737751518867, "grad_norm": 0.5893326997756958, "learning_rate": 5.126871748861499e-05, "loss": 1.8271, "step": 8979 }, { "epoch": 0.5005295134050499, "grad_norm": 0.5742121934890747, "learning_rate": 5.125981609922935e-05, "loss": 1.7673, "step": 8980 }, { "epoch": 0.500585251658213, "grad_norm": 0.5225714445114136, "learning_rate": 5.1250914669889714e-05, "loss": 1.5127, "step": 8981 }, { "epoch": 0.5006409899113762, "grad_norm": 0.5902960300445557, "learning_rate": 5.124201320087833e-05, "loss": 1.7471, "step": 8982 }, { "epoch": 0.5006967281645394, "grad_norm": 0.5950215458869934, "learning_rate": 5.1233111692477555e-05, "loss": 1.6188, "step": 8983 }, { "epoch": 0.5007524664177024, "grad_norm": 0.5525108575820923, "learning_rate": 5.122421014496965e-05, "loss": 1.6802, "step": 8984 }, { "epoch": 0.5008082046708656, "grad_norm": 0.5543337464332581, "learning_rate": 5.1215308558636944e-05, "loss": 1.5793, "step": 8985 }, { "epoch": 0.5008639429240288, "grad_norm": 0.5265454053878784, "learning_rate": 5.1206406933761716e-05, "loss": 1.3947, "step": 8986 }, { "epoch": 0.5009196811771919, "grad_norm": 0.6150608658790588, "learning_rate": 5.119750527062632e-05, "loss": 1.9244, "step": 8987 }, { "epoch": 0.5009754194303551, "grad_norm": 0.5269333124160767, "learning_rate": 5.1188603569513025e-05, "loss": 1.6002, "step": 8988 }, { "epoch": 0.5010311576835182, "grad_norm": 0.6029527187347412, "learning_rate": 5.117970183070416e-05, "loss": 1.8124, "step": 8989 }, { "epoch": 0.5010868959366813, "grad_norm": 0.5682185292243958, "learning_rate": 5.1170800054482035e-05, "loss": 1.6561, "step": 8990 }, { "epoch": 0.5011426341898445, "grad_norm": 0.5897371172904968, "learning_rate": 5.116189824112896e-05, "loss": 1.7734, "step": 8991 }, { "epoch": 0.5011983724430077, "grad_norm": 0.5152097940444946, "learning_rate": 5.115299639092723e-05, "loss": 1.4226, "step": 8992 }, { "epoch": 0.5012541106961708, "grad_norm": 0.546345591545105, "learning_rate": 5.114409450415919e-05, "loss": 1.4967, "step": 8993 }, { "epoch": 0.501309848949334, "grad_norm": 0.5303710103034973, "learning_rate": 5.113519258110715e-05, "loss": 1.6527, "step": 8994 }, { "epoch": 0.501365587202497, "grad_norm": 0.5513923764228821, "learning_rate": 5.1126290622053405e-05, "loss": 1.7632, "step": 8995 }, { "epoch": 0.5014213254556602, "grad_norm": 0.5321218371391296, "learning_rate": 5.1117388627280305e-05, "loss": 1.5339, "step": 8996 }, { "epoch": 0.5014770637088234, "grad_norm": 0.5597907900810242, "learning_rate": 5.1108486597070125e-05, "loss": 1.6767, "step": 8997 }, { "epoch": 0.5015328019619865, "grad_norm": 0.5612991452217102, "learning_rate": 5.109958453170524e-05, "loss": 1.7141, "step": 8998 }, { "epoch": 0.5015885402151496, "grad_norm": 0.549898087978363, "learning_rate": 5.109068243146793e-05, "loss": 1.393, "step": 8999 }, { "epoch": 0.5016442784683128, "grad_norm": 0.5984362959861755, "learning_rate": 5.1081780296640535e-05, "loss": 1.8804, "step": 9000 }, { "epoch": 0.5017000167214759, "grad_norm": 0.567398190498352, "learning_rate": 5.107287812750538e-05, "loss": 1.6947, "step": 9001 }, { "epoch": 0.5017557549746391, "grad_norm": 0.5649966597557068, "learning_rate": 5.106397592434478e-05, "loss": 1.6008, "step": 9002 }, { "epoch": 0.5018114932278023, "grad_norm": 0.5383644700050354, "learning_rate": 5.105507368744108e-05, "loss": 1.6802, "step": 9003 }, { "epoch": 0.5018672314809653, "grad_norm": 0.5765425562858582, "learning_rate": 5.1046171417076584e-05, "loss": 1.653, "step": 9004 }, { "epoch": 0.5019229697341285, "grad_norm": 0.5408610105514526, "learning_rate": 5.103726911353363e-05, "loss": 1.7098, "step": 9005 }, { "epoch": 0.5019787079872917, "grad_norm": 0.5842016339302063, "learning_rate": 5.1028366777094536e-05, "loss": 1.9008, "step": 9006 }, { "epoch": 0.5020344462404548, "grad_norm": 0.5333168506622314, "learning_rate": 5.101946440804166e-05, "loss": 1.4168, "step": 9007 }, { "epoch": 0.502090184493618, "grad_norm": 0.5522457957267761, "learning_rate": 5.101056200665731e-05, "loss": 1.7717, "step": 9008 }, { "epoch": 0.5021459227467812, "grad_norm": 0.5111657381057739, "learning_rate": 5.100165957322384e-05, "loss": 1.4358, "step": 9009 }, { "epoch": 0.5022016609999442, "grad_norm": 0.5850957632064819, "learning_rate": 5.099275710802355e-05, "loss": 1.7202, "step": 9010 }, { "epoch": 0.5022573992531074, "grad_norm": 0.5885518789291382, "learning_rate": 5.09838546113388e-05, "loss": 1.8804, "step": 9011 }, { "epoch": 0.5023131375062706, "grad_norm": 0.5729745626449585, "learning_rate": 5.097495208345191e-05, "loss": 1.7409, "step": 9012 }, { "epoch": 0.5023688757594337, "grad_norm": 0.5428875088691711, "learning_rate": 5.096604952464524e-05, "loss": 1.6353, "step": 9013 }, { "epoch": 0.5024246140125969, "grad_norm": 0.598082423210144, "learning_rate": 5.095714693520111e-05, "loss": 1.8436, "step": 9014 }, { "epoch": 0.50248035226576, "grad_norm": 0.5146722197532654, "learning_rate": 5.094824431540184e-05, "loss": 1.5617, "step": 9015 }, { "epoch": 0.5025360905189231, "grad_norm": 0.5724582076072693, "learning_rate": 5.093934166552981e-05, "loss": 1.7339, "step": 9016 }, { "epoch": 0.5025918287720863, "grad_norm": 0.5695306658744812, "learning_rate": 5.0930438985867326e-05, "loss": 1.892, "step": 9017 }, { "epoch": 0.5026475670252494, "grad_norm": 0.5484499931335449, "learning_rate": 5.0921536276696745e-05, "loss": 1.6645, "step": 9018 }, { "epoch": 0.5027033052784126, "grad_norm": 0.561751127243042, "learning_rate": 5.09126335383004e-05, "loss": 1.8816, "step": 9019 }, { "epoch": 0.5027590435315757, "grad_norm": 0.5813974142074585, "learning_rate": 5.090373077096067e-05, "loss": 1.8134, "step": 9020 }, { "epoch": 0.5028147817847388, "grad_norm": 0.5623780488967896, "learning_rate": 5.089482797495984e-05, "loss": 1.625, "step": 9021 }, { "epoch": 0.502870520037902, "grad_norm": 0.5759438872337341, "learning_rate": 5.0885925150580295e-05, "loss": 1.8258, "step": 9022 }, { "epoch": 0.5029262582910652, "grad_norm": 0.5717414617538452, "learning_rate": 5.0877022298104356e-05, "loss": 1.5994, "step": 9023 }, { "epoch": 0.5029819965442283, "grad_norm": 0.5251317620277405, "learning_rate": 5.08681194178144e-05, "loss": 1.4209, "step": 9024 }, { "epoch": 0.5030377347973914, "grad_norm": 0.628030002117157, "learning_rate": 5.0859216509992743e-05, "loss": 2.1234, "step": 9025 }, { "epoch": 0.5030934730505546, "grad_norm": 0.6082812547683716, "learning_rate": 5.085031357492177e-05, "loss": 1.6014, "step": 9026 }, { "epoch": 0.5031492113037177, "grad_norm": 0.5856479406356812, "learning_rate": 5.0841410612883786e-05, "loss": 1.6218, "step": 9027 }, { "epoch": 0.5032049495568809, "grad_norm": 0.5050733089447021, "learning_rate": 5.083250762416116e-05, "loss": 1.4808, "step": 9028 }, { "epoch": 0.5032606878100441, "grad_norm": 0.5920116901397705, "learning_rate": 5.082360460903627e-05, "loss": 1.7044, "step": 9029 }, { "epoch": 0.5033164260632071, "grad_norm": 0.588408350944519, "learning_rate": 5.0814701567791436e-05, "loss": 1.923, "step": 9030 }, { "epoch": 0.5033721643163703, "grad_norm": 0.5859766602516174, "learning_rate": 5.0805798500709e-05, "loss": 1.888, "step": 9031 }, { "epoch": 0.5034279025695335, "grad_norm": 0.5343489646911621, "learning_rate": 5.0796895408071344e-05, "loss": 1.7227, "step": 9032 }, { "epoch": 0.5034836408226966, "grad_norm": 0.574579656124115, "learning_rate": 5.0787992290160827e-05, "loss": 1.7073, "step": 9033 }, { "epoch": 0.5035393790758598, "grad_norm": 0.5644822716712952, "learning_rate": 5.0779089147259774e-05, "loss": 1.6084, "step": 9034 }, { "epoch": 0.503595117329023, "grad_norm": 0.5493994355201721, "learning_rate": 5.077018597965056e-05, "loss": 1.6793, "step": 9035 }, { "epoch": 0.503650855582186, "grad_norm": 0.5413119196891785, "learning_rate": 5.076128278761554e-05, "loss": 1.6211, "step": 9036 }, { "epoch": 0.5037065938353492, "grad_norm": 0.5473475456237793, "learning_rate": 5.075237957143706e-05, "loss": 1.5416, "step": 9037 }, { "epoch": 0.5037623320885124, "grad_norm": 0.5547932982444763, "learning_rate": 5.0743476331397474e-05, "loss": 1.7137, "step": 9038 }, { "epoch": 0.5038180703416755, "grad_norm": 0.5651285648345947, "learning_rate": 5.073457306777919e-05, "loss": 1.2725, "step": 9039 }, { "epoch": 0.5038738085948387, "grad_norm": 0.559619128704071, "learning_rate": 5.0725669780864505e-05, "loss": 1.7186, "step": 9040 }, { "epoch": 0.5039295468480017, "grad_norm": 0.5500231385231018, "learning_rate": 5.0716766470935806e-05, "loss": 1.5712, "step": 9041 }, { "epoch": 0.5039852851011649, "grad_norm": 0.5345457792282104, "learning_rate": 5.070786313827547e-05, "loss": 1.4333, "step": 9042 }, { "epoch": 0.5040410233543281, "grad_norm": 0.5673493146896362, "learning_rate": 5.069895978316582e-05, "loss": 1.7388, "step": 9043 }, { "epoch": 0.5040967616074912, "grad_norm": 0.5534777641296387, "learning_rate": 5.0690056405889255e-05, "loss": 1.5896, "step": 9044 }, { "epoch": 0.5041524998606544, "grad_norm": 0.542965292930603, "learning_rate": 5.068115300672812e-05, "loss": 1.4889, "step": 9045 }, { "epoch": 0.5042082381138175, "grad_norm": 0.6177462339401245, "learning_rate": 5.0672249585964796e-05, "loss": 1.6881, "step": 9046 }, { "epoch": 0.5042639763669806, "grad_norm": 0.581512987613678, "learning_rate": 5.0663346143881617e-05, "loss": 1.9196, "step": 9047 }, { "epoch": 0.5043197146201438, "grad_norm": 0.5823097825050354, "learning_rate": 5.065444268076097e-05, "loss": 1.8109, "step": 9048 }, { "epoch": 0.504375452873307, "grad_norm": 0.6185294389724731, "learning_rate": 5.0645539196885214e-05, "loss": 1.7364, "step": 9049 }, { "epoch": 0.5044311911264701, "grad_norm": 0.5381544828414917, "learning_rate": 5.0636635692536724e-05, "loss": 1.4389, "step": 9050 }, { "epoch": 0.5044869293796332, "grad_norm": 0.547680675983429, "learning_rate": 5.062773216799786e-05, "loss": 1.5368, "step": 9051 }, { "epoch": 0.5045426676327964, "grad_norm": 0.5290063619613647, "learning_rate": 5.0618828623550996e-05, "loss": 1.561, "step": 9052 }, { "epoch": 0.5045984058859595, "grad_norm": 0.6034530997276306, "learning_rate": 5.060992505947849e-05, "loss": 1.6605, "step": 9053 }, { "epoch": 0.5046541441391227, "grad_norm": 0.5535921454429626, "learning_rate": 5.0601021476062714e-05, "loss": 1.6417, "step": 9054 }, { "epoch": 0.5047098823922859, "grad_norm": 0.5762230157852173, "learning_rate": 5.059211787358607e-05, "loss": 1.8285, "step": 9055 }, { "epoch": 0.5047656206454489, "grad_norm": 0.5755069255828857, "learning_rate": 5.058321425233087e-05, "loss": 1.8365, "step": 9056 }, { "epoch": 0.5048213588986121, "grad_norm": 0.5716124773025513, "learning_rate": 5.0574310612579515e-05, "loss": 1.7859, "step": 9057 }, { "epoch": 0.5048770971517753, "grad_norm": 0.5171856880187988, "learning_rate": 5.056540695461437e-05, "loss": 1.6426, "step": 9058 }, { "epoch": 0.5049328354049384, "grad_norm": 0.5806797742843628, "learning_rate": 5.0556503278717836e-05, "loss": 1.9062, "step": 9059 }, { "epoch": 0.5049885736581016, "grad_norm": 0.556565523147583, "learning_rate": 5.0547599585172245e-05, "loss": 1.7185, "step": 9060 }, { "epoch": 0.5050443119112648, "grad_norm": 0.5945353507995605, "learning_rate": 5.053869587426e-05, "loss": 1.5759, "step": 9061 }, { "epoch": 0.5051000501644278, "grad_norm": 0.581937313079834, "learning_rate": 5.052979214626346e-05, "loss": 1.7592, "step": 9062 }, { "epoch": 0.505155788417591, "grad_norm": 0.5184255838394165, "learning_rate": 5.0520888401464994e-05, "loss": 1.4266, "step": 9063 }, { "epoch": 0.5052115266707541, "grad_norm": 0.568466305732727, "learning_rate": 5.051198464014698e-05, "loss": 1.852, "step": 9064 }, { "epoch": 0.5052672649239173, "grad_norm": 0.5698969960212708, "learning_rate": 5.0503080862591824e-05, "loss": 1.6573, "step": 9065 }, { "epoch": 0.5053230031770805, "grad_norm": 0.6118015050888062, "learning_rate": 5.049417706908185e-05, "loss": 1.9084, "step": 9066 }, { "epoch": 0.5053787414302435, "grad_norm": 0.5635191798210144, "learning_rate": 5.0485273259899465e-05, "loss": 1.6099, "step": 9067 }, { "epoch": 0.5054344796834067, "grad_norm": 0.6044256091117859, "learning_rate": 5.0476369435327066e-05, "loss": 1.7669, "step": 9068 }, { "epoch": 0.5054902179365699, "grad_norm": 0.5191871523857117, "learning_rate": 5.046746559564698e-05, "loss": 1.6294, "step": 9069 }, { "epoch": 0.505545956189733, "grad_norm": 0.5460189580917358, "learning_rate": 5.045856174114161e-05, "loss": 1.4943, "step": 9070 }, { "epoch": 0.5056016944428962, "grad_norm": 0.5360379219055176, "learning_rate": 5.044965787209333e-05, "loss": 1.4914, "step": 9071 }, { "epoch": 0.5056574326960593, "grad_norm": 0.5588350296020508, "learning_rate": 5.044075398878456e-05, "loss": 1.5848, "step": 9072 }, { "epoch": 0.5057131709492224, "grad_norm": 0.5703949928283691, "learning_rate": 5.0431850091497614e-05, "loss": 1.8014, "step": 9073 }, { "epoch": 0.5057689092023856, "grad_norm": 0.5233216285705566, "learning_rate": 5.042294618051492e-05, "loss": 1.5506, "step": 9074 }, { "epoch": 0.5058246474555488, "grad_norm": 0.603326141834259, "learning_rate": 5.041404225611882e-05, "loss": 1.7151, "step": 9075 }, { "epoch": 0.5058803857087119, "grad_norm": 0.5280753374099731, "learning_rate": 5.040513831859172e-05, "loss": 1.6034, "step": 9076 }, { "epoch": 0.505936123961875, "grad_norm": 0.5462760925292969, "learning_rate": 5.0396234368215986e-05, "loss": 1.7025, "step": 9077 }, { "epoch": 0.5059918622150382, "grad_norm": 0.5069268345832825, "learning_rate": 5.0387330405274027e-05, "loss": 1.6266, "step": 9078 }, { "epoch": 0.5060476004682013, "grad_norm": 0.619596004486084, "learning_rate": 5.0378426430048185e-05, "loss": 1.9665, "step": 9079 }, { "epoch": 0.5061033387213645, "grad_norm": 0.6080803275108337, "learning_rate": 5.036952244282087e-05, "loss": 1.7505, "step": 9080 }, { "epoch": 0.5061590769745277, "grad_norm": 0.5581051111221313, "learning_rate": 5.036061844387447e-05, "loss": 1.3145, "step": 9081 }, { "epoch": 0.5062148152276907, "grad_norm": 0.6103323698043823, "learning_rate": 5.035171443349135e-05, "loss": 1.8727, "step": 9082 }, { "epoch": 0.5062705534808539, "grad_norm": 0.5805239081382751, "learning_rate": 5.034281041195389e-05, "loss": 1.8522, "step": 9083 }, { "epoch": 0.5063262917340171, "grad_norm": 0.5124911665916443, "learning_rate": 5.0333906379544485e-05, "loss": 1.4206, "step": 9084 }, { "epoch": 0.5063820299871802, "grad_norm": 0.5628135204315186, "learning_rate": 5.0325002336545525e-05, "loss": 1.6741, "step": 9085 }, { "epoch": 0.5064377682403434, "grad_norm": 0.6049720644950867, "learning_rate": 5.031609828323938e-05, "loss": 1.6622, "step": 9086 }, { "epoch": 0.5064935064935064, "grad_norm": 0.5559591054916382, "learning_rate": 5.030719421990845e-05, "loss": 1.6901, "step": 9087 }, { "epoch": 0.5065492447466696, "grad_norm": 0.5482590198516846, "learning_rate": 5.029829014683509e-05, "loss": 1.5533, "step": 9088 }, { "epoch": 0.5066049829998328, "grad_norm": 0.617445170879364, "learning_rate": 5.0289386064301715e-05, "loss": 2.0952, "step": 9089 }, { "epoch": 0.5066607212529959, "grad_norm": 0.5329674482345581, "learning_rate": 5.0280481972590696e-05, "loss": 1.5283, "step": 9090 }, { "epoch": 0.5067164595061591, "grad_norm": 0.5704628825187683, "learning_rate": 5.027157787198443e-05, "loss": 1.5935, "step": 9091 }, { "epoch": 0.5067721977593223, "grad_norm": 0.5466018319129944, "learning_rate": 5.0262673762765314e-05, "loss": 1.6714, "step": 9092 }, { "epoch": 0.5068279360124853, "grad_norm": 0.581349790096283, "learning_rate": 5.0253769645215684e-05, "loss": 1.6966, "step": 9093 }, { "epoch": 0.5068836742656485, "grad_norm": 0.5872965455055237, "learning_rate": 5.024486551961799e-05, "loss": 1.8099, "step": 9094 }, { "epoch": 0.5069394125188117, "grad_norm": 0.5728545188903809, "learning_rate": 5.0235961386254584e-05, "loss": 1.8034, "step": 9095 }, { "epoch": 0.5069951507719748, "grad_norm": 0.6576269268989563, "learning_rate": 5.022705724540785e-05, "loss": 1.761, "step": 9096 }, { "epoch": 0.507050889025138, "grad_norm": 0.49354177713394165, "learning_rate": 5.0218153097360174e-05, "loss": 1.5518, "step": 9097 }, { "epoch": 0.5071066272783011, "grad_norm": 0.5540168881416321, "learning_rate": 5.0209248942393975e-05, "loss": 1.6151, "step": 9098 }, { "epoch": 0.5071623655314642, "grad_norm": 0.5904643535614014, "learning_rate": 5.020034478079161e-05, "loss": 1.6079, "step": 9099 }, { "epoch": 0.5072181037846274, "grad_norm": 0.52375328540802, "learning_rate": 5.0191440612835484e-05, "loss": 1.4861, "step": 9100 }, { "epoch": 0.5072738420377906, "grad_norm": 0.5494303703308105, "learning_rate": 5.018253643880797e-05, "loss": 1.6863, "step": 9101 }, { "epoch": 0.5073295802909537, "grad_norm": 0.5974358320236206, "learning_rate": 5.017363225899147e-05, "loss": 1.5901, "step": 9102 }, { "epoch": 0.5073853185441168, "grad_norm": 0.5655843615531921, "learning_rate": 5.0164728073668354e-05, "loss": 1.6177, "step": 9103 }, { "epoch": 0.50744105679728, "grad_norm": 0.5226239562034607, "learning_rate": 5.0155823883121025e-05, "loss": 1.5947, "step": 9104 }, { "epoch": 0.5074967950504431, "grad_norm": 0.5766085982322693, "learning_rate": 5.014691968763189e-05, "loss": 1.6997, "step": 9105 }, { "epoch": 0.5075525333036063, "grad_norm": 0.5901437997817993, "learning_rate": 5.0138015487483305e-05, "loss": 1.7778, "step": 9106 }, { "epoch": 0.5076082715567695, "grad_norm": 0.5961461663246155, "learning_rate": 5.012911128295768e-05, "loss": 1.6958, "step": 9107 }, { "epoch": 0.5076640098099325, "grad_norm": 0.5779803991317749, "learning_rate": 5.012020707433739e-05, "loss": 1.7133, "step": 9108 }, { "epoch": 0.5077197480630957, "grad_norm": 0.5328028202056885, "learning_rate": 5.011130286190483e-05, "loss": 1.3029, "step": 9109 }, { "epoch": 0.5077754863162588, "grad_norm": 0.5509020686149597, "learning_rate": 5.0102398645942404e-05, "loss": 1.6388, "step": 9110 }, { "epoch": 0.507831224569422, "grad_norm": 0.5838056802749634, "learning_rate": 5.009349442673249e-05, "loss": 1.6256, "step": 9111 }, { "epoch": 0.5078869628225852, "grad_norm": 0.560120701789856, "learning_rate": 5.008459020455747e-05, "loss": 1.7203, "step": 9112 }, { "epoch": 0.5079427010757482, "grad_norm": 0.5297266840934753, "learning_rate": 5.007568597969975e-05, "loss": 1.6371, "step": 9113 }, { "epoch": 0.5079984393289114, "grad_norm": 0.5594682097434998, "learning_rate": 5.00667817524417e-05, "loss": 1.7731, "step": 9114 }, { "epoch": 0.5080541775820746, "grad_norm": 0.5355550050735474, "learning_rate": 5.005787752306573e-05, "loss": 1.7304, "step": 9115 }, { "epoch": 0.5081099158352377, "grad_norm": 0.5299372673034668, "learning_rate": 5.0048973291854215e-05, "loss": 1.6358, "step": 9116 }, { "epoch": 0.5081656540884009, "grad_norm": 0.5633680820465088, "learning_rate": 5.004006905908956e-05, "loss": 1.6261, "step": 9117 }, { "epoch": 0.508221392341564, "grad_norm": 0.621525764465332, "learning_rate": 5.0031164825054154e-05, "loss": 1.7944, "step": 9118 }, { "epoch": 0.5082771305947271, "grad_norm": 0.5595192313194275, "learning_rate": 5.0022260590030365e-05, "loss": 1.6483, "step": 9119 }, { "epoch": 0.5083328688478903, "grad_norm": 0.6154051423072815, "learning_rate": 5.0013356354300625e-05, "loss": 1.7028, "step": 9120 }, { "epoch": 0.5083886071010535, "grad_norm": 0.5414613485336304, "learning_rate": 5.000445211814727e-05, "loss": 1.7512, "step": 9121 }, { "epoch": 0.5084443453542166, "grad_norm": 0.5534652471542358, "learning_rate": 4.9995547881852745e-05, "loss": 1.6766, "step": 9122 }, { "epoch": 0.5085000836073797, "grad_norm": 0.5453454256057739, "learning_rate": 4.9986643645699387e-05, "loss": 1.3173, "step": 9123 }, { "epoch": 0.5085558218605429, "grad_norm": 0.6073355674743652, "learning_rate": 4.997773940996964e-05, "loss": 1.7782, "step": 9124 }, { "epoch": 0.508611560113706, "grad_norm": 0.5917234420776367, "learning_rate": 4.9968835174945864e-05, "loss": 1.566, "step": 9125 }, { "epoch": 0.5086672983668692, "grad_norm": 0.5518240928649902, "learning_rate": 4.995993094091044e-05, "loss": 1.61, "step": 9126 }, { "epoch": 0.5087230366200324, "grad_norm": 0.5641380548477173, "learning_rate": 4.995102670814579e-05, "loss": 1.3672, "step": 9127 }, { "epoch": 0.5087787748731954, "grad_norm": 0.5821805596351624, "learning_rate": 4.9942122476934286e-05, "loss": 1.8647, "step": 9128 }, { "epoch": 0.5088345131263586, "grad_norm": 0.536016047000885, "learning_rate": 4.9933218247558316e-05, "loss": 1.4283, "step": 9129 }, { "epoch": 0.5088902513795218, "grad_norm": 0.5962494015693665, "learning_rate": 4.992431402030026e-05, "loss": 1.7816, "step": 9130 }, { "epoch": 0.5089459896326849, "grad_norm": 0.5421521067619324, "learning_rate": 4.9915409795442553e-05, "loss": 1.5547, "step": 9131 }, { "epoch": 0.5090017278858481, "grad_norm": 0.5603907108306885, "learning_rate": 4.990650557326752e-05, "loss": 1.6848, "step": 9132 }, { "epoch": 0.5090574661390111, "grad_norm": 0.5411096215248108, "learning_rate": 4.98976013540576e-05, "loss": 1.5241, "step": 9133 }, { "epoch": 0.5091132043921743, "grad_norm": 0.5356809496879578, "learning_rate": 4.988869713809518e-05, "loss": 1.3605, "step": 9134 }, { "epoch": 0.5091689426453375, "grad_norm": 0.5351254940032959, "learning_rate": 4.9879792925662624e-05, "loss": 1.663, "step": 9135 }, { "epoch": 0.5092246808985006, "grad_norm": 0.5317051410675049, "learning_rate": 4.987088871704234e-05, "loss": 1.7112, "step": 9136 }, { "epoch": 0.5092804191516638, "grad_norm": 0.5253128409385681, "learning_rate": 4.9861984512516706e-05, "loss": 1.5613, "step": 9137 }, { "epoch": 0.509336157404827, "grad_norm": 0.5916141271591187, "learning_rate": 4.985308031236811e-05, "loss": 1.7832, "step": 9138 }, { "epoch": 0.50939189565799, "grad_norm": 0.5364113450050354, "learning_rate": 4.984417611687899e-05, "loss": 1.6785, "step": 9139 }, { "epoch": 0.5094476339111532, "grad_norm": 0.5543467402458191, "learning_rate": 4.983527192633165e-05, "loss": 1.6664, "step": 9140 }, { "epoch": 0.5095033721643164, "grad_norm": 0.5683530569076538, "learning_rate": 4.982636774100855e-05, "loss": 1.5922, "step": 9141 }, { "epoch": 0.5095591104174795, "grad_norm": 0.5523553490638733, "learning_rate": 4.981746356119204e-05, "loss": 1.51, "step": 9142 }, { "epoch": 0.5096148486706427, "grad_norm": 0.5828970074653625, "learning_rate": 4.980855938716454e-05, "loss": 1.661, "step": 9143 }, { "epoch": 0.5096705869238058, "grad_norm": 0.556447446346283, "learning_rate": 4.97996552192084e-05, "loss": 1.5701, "step": 9144 }, { "epoch": 0.5097263251769689, "grad_norm": 0.5221887826919556, "learning_rate": 4.979075105760603e-05, "loss": 1.389, "step": 9145 }, { "epoch": 0.5097820634301321, "grad_norm": 0.528141438961029, "learning_rate": 4.978184690263983e-05, "loss": 1.585, "step": 9146 }, { "epoch": 0.5098378016832953, "grad_norm": 0.5802522897720337, "learning_rate": 4.9772942754592156e-05, "loss": 1.7289, "step": 9147 }, { "epoch": 0.5098935399364584, "grad_norm": 0.6549295783042908, "learning_rate": 4.976403861374545e-05, "loss": 1.6774, "step": 9148 }, { "epoch": 0.5099492781896215, "grad_norm": 0.5203224420547485, "learning_rate": 4.975513448038202e-05, "loss": 1.4913, "step": 9149 }, { "epoch": 0.5100050164427847, "grad_norm": 0.5493060946464539, "learning_rate": 4.974623035478432e-05, "loss": 1.8086, "step": 9150 }, { "epoch": 0.5100607546959478, "grad_norm": 0.5371272563934326, "learning_rate": 4.9737326237234704e-05, "loss": 1.48, "step": 9151 }, { "epoch": 0.510116492949111, "grad_norm": 0.5722330212593079, "learning_rate": 4.972842212801557e-05, "loss": 1.6754, "step": 9152 }, { "epoch": 0.5101722312022742, "grad_norm": 0.5680810809135437, "learning_rate": 4.9719518027409315e-05, "loss": 1.6418, "step": 9153 }, { "epoch": 0.5102279694554372, "grad_norm": 0.5572932362556458, "learning_rate": 4.9710613935698296e-05, "loss": 1.5825, "step": 9154 }, { "epoch": 0.5102837077086004, "grad_norm": 0.56486976146698, "learning_rate": 4.970170985316493e-05, "loss": 1.7214, "step": 9155 }, { "epoch": 0.5103394459617635, "grad_norm": 0.5949878692626953, "learning_rate": 4.969280578009157e-05, "loss": 1.6079, "step": 9156 }, { "epoch": 0.5103951842149267, "grad_norm": 0.5963469743728638, "learning_rate": 4.9683901716760645e-05, "loss": 1.7796, "step": 9157 }, { "epoch": 0.5104509224680899, "grad_norm": 0.518810510635376, "learning_rate": 4.967499766345449e-05, "loss": 1.5331, "step": 9158 }, { "epoch": 0.510506660721253, "grad_norm": 0.5369781255722046, "learning_rate": 4.966609362045552e-05, "loss": 1.5991, "step": 9159 }, { "epoch": 0.5105623989744161, "grad_norm": 0.603927731513977, "learning_rate": 4.9657189588046125e-05, "loss": 1.6963, "step": 9160 }, { "epoch": 0.5106181372275793, "grad_norm": 0.512532651424408, "learning_rate": 4.964828556650867e-05, "loss": 1.4591, "step": 9161 }, { "epoch": 0.5106738754807424, "grad_norm": 0.5606699585914612, "learning_rate": 4.9639381556125545e-05, "loss": 1.7269, "step": 9162 }, { "epoch": 0.5107296137339056, "grad_norm": 0.5832485556602478, "learning_rate": 4.963047755717914e-05, "loss": 1.7202, "step": 9163 }, { "epoch": 0.5107853519870688, "grad_norm": 0.549618661403656, "learning_rate": 4.962157356995181e-05, "loss": 1.543, "step": 9164 }, { "epoch": 0.5108410902402318, "grad_norm": 0.6027174592018127, "learning_rate": 4.9612669594725985e-05, "loss": 1.506, "step": 9165 }, { "epoch": 0.510896828493395, "grad_norm": 0.5265709161758423, "learning_rate": 4.960376563178402e-05, "loss": 1.4054, "step": 9166 }, { "epoch": 0.5109525667465582, "grad_norm": 0.6092290282249451, "learning_rate": 4.95948616814083e-05, "loss": 1.8014, "step": 9167 }, { "epoch": 0.5110083049997213, "grad_norm": 0.581297755241394, "learning_rate": 4.958595774388119e-05, "loss": 1.6825, "step": 9168 }, { "epoch": 0.5110640432528845, "grad_norm": 0.5989497303962708, "learning_rate": 4.9577053819485106e-05, "loss": 1.8822, "step": 9169 }, { "epoch": 0.5111197815060476, "grad_norm": 0.5412517786026001, "learning_rate": 4.95681499085024e-05, "loss": 1.5337, "step": 9170 }, { "epoch": 0.5111755197592107, "grad_norm": 0.5634650588035583, "learning_rate": 4.9559246011215445e-05, "loss": 1.6926, "step": 9171 }, { "epoch": 0.5112312580123739, "grad_norm": 0.56587815284729, "learning_rate": 4.955034212790667e-05, "loss": 1.8106, "step": 9172 }, { "epoch": 0.5112869962655371, "grad_norm": 0.5138219594955444, "learning_rate": 4.954143825885839e-05, "loss": 1.5418, "step": 9173 }, { "epoch": 0.5113427345187002, "grad_norm": 0.5837535262107849, "learning_rate": 4.9532534404353045e-05, "loss": 1.7995, "step": 9174 }, { "epoch": 0.5113984727718633, "grad_norm": 0.5723191499710083, "learning_rate": 4.952363056467295e-05, "loss": 1.7018, "step": 9175 }, { "epoch": 0.5114542110250265, "grad_norm": 0.5086800456047058, "learning_rate": 4.951472674010054e-05, "loss": 1.4375, "step": 9176 }, { "epoch": 0.5115099492781896, "grad_norm": 0.5769858360290527, "learning_rate": 4.9505822930918154e-05, "loss": 1.856, "step": 9177 }, { "epoch": 0.5115656875313528, "grad_norm": 0.5059775114059448, "learning_rate": 4.9496919137408194e-05, "loss": 1.5449, "step": 9178 }, { "epoch": 0.5116214257845159, "grad_norm": 0.5650221705436707, "learning_rate": 4.948801535985302e-05, "loss": 1.5744, "step": 9179 }, { "epoch": 0.511677164037679, "grad_norm": 0.6061702966690063, "learning_rate": 4.947911159853502e-05, "loss": 1.5903, "step": 9180 }, { "epoch": 0.5117329022908422, "grad_norm": 0.6307567358016968, "learning_rate": 4.947020785373657e-05, "loss": 1.6904, "step": 9181 }, { "epoch": 0.5117886405440053, "grad_norm": 0.5376378297805786, "learning_rate": 4.9461304125740006e-05, "loss": 1.6402, "step": 9182 }, { "epoch": 0.5118443787971685, "grad_norm": 0.5407423973083496, "learning_rate": 4.945240041482777e-05, "loss": 1.6268, "step": 9183 }, { "epoch": 0.5119001170503317, "grad_norm": 0.575613260269165, "learning_rate": 4.9443496721282176e-05, "loss": 1.7676, "step": 9184 }, { "epoch": 0.5119558553034947, "grad_norm": 0.5587126612663269, "learning_rate": 4.943459304538562e-05, "loss": 1.787, "step": 9185 }, { "epoch": 0.5120115935566579, "grad_norm": 0.5674868226051331, "learning_rate": 4.94256893874205e-05, "loss": 1.4734, "step": 9186 }, { "epoch": 0.5120673318098211, "grad_norm": 0.5866878032684326, "learning_rate": 4.941678574766915e-05, "loss": 1.6512, "step": 9187 }, { "epoch": 0.5121230700629842, "grad_norm": 0.5577338337898254, "learning_rate": 4.9407882126413964e-05, "loss": 1.6187, "step": 9188 }, { "epoch": 0.5121788083161474, "grad_norm": 0.551834225654602, "learning_rate": 4.939897852393729e-05, "loss": 1.7205, "step": 9189 }, { "epoch": 0.5122345465693106, "grad_norm": 0.5380664467811584, "learning_rate": 4.939007494052153e-05, "loss": 1.7667, "step": 9190 }, { "epoch": 0.5122902848224736, "grad_norm": 0.5532002449035645, "learning_rate": 4.938117137644901e-05, "loss": 1.6104, "step": 9191 }, { "epoch": 0.5123460230756368, "grad_norm": 0.532942533493042, "learning_rate": 4.937226783200214e-05, "loss": 1.5063, "step": 9192 }, { "epoch": 0.5124017613288, "grad_norm": 0.6073448657989502, "learning_rate": 4.936336430746328e-05, "loss": 1.8014, "step": 9193 }, { "epoch": 0.5124574995819631, "grad_norm": 0.6055412292480469, "learning_rate": 4.935446080311479e-05, "loss": 2.0275, "step": 9194 }, { "epoch": 0.5125132378351263, "grad_norm": 0.5389105081558228, "learning_rate": 4.934555731923905e-05, "loss": 1.4672, "step": 9195 }, { "epoch": 0.5125689760882894, "grad_norm": 0.5867198705673218, "learning_rate": 4.9336653856118395e-05, "loss": 1.9117, "step": 9196 }, { "epoch": 0.5126247143414525, "grad_norm": 0.5566348433494568, "learning_rate": 4.932775041403521e-05, "loss": 1.7259, "step": 9197 }, { "epoch": 0.5126804525946157, "grad_norm": 0.5522982478141785, "learning_rate": 4.93188469932719e-05, "loss": 1.6182, "step": 9198 }, { "epoch": 0.5127361908477789, "grad_norm": 0.5474398136138916, "learning_rate": 4.9309943594110743e-05, "loss": 1.6146, "step": 9199 }, { "epoch": 0.512791929100942, "grad_norm": 0.5723056793212891, "learning_rate": 4.93010402168342e-05, "loss": 1.5467, "step": 9200 }, { "epoch": 0.5128476673541051, "grad_norm": 0.5831982493400574, "learning_rate": 4.9292136861724544e-05, "loss": 1.7189, "step": 9201 }, { "epoch": 0.5129034056072682, "grad_norm": 0.5088267922401428, "learning_rate": 4.9283233529064205e-05, "loss": 1.6331, "step": 9202 }, { "epoch": 0.5129591438604314, "grad_norm": 0.5105864405632019, "learning_rate": 4.9274330219135506e-05, "loss": 1.3119, "step": 9203 }, { "epoch": 0.5130148821135946, "grad_norm": 0.5990265011787415, "learning_rate": 4.926542693222083e-05, "loss": 1.7628, "step": 9204 }, { "epoch": 0.5130706203667577, "grad_norm": 0.5516785383224487, "learning_rate": 4.925652366860253e-05, "loss": 1.8385, "step": 9205 }, { "epoch": 0.5131263586199208, "grad_norm": 0.5530927777290344, "learning_rate": 4.9247620428562954e-05, "loss": 1.5642, "step": 9206 }, { "epoch": 0.513182096873084, "grad_norm": 0.553615152835846, "learning_rate": 4.9238717212384485e-05, "loss": 1.5955, "step": 9207 }, { "epoch": 0.5132378351262471, "grad_norm": 0.553424060344696, "learning_rate": 4.922981402034945e-05, "loss": 1.4373, "step": 9208 }, { "epoch": 0.5132935733794103, "grad_norm": 0.52947598695755, "learning_rate": 4.922091085274025e-05, "loss": 1.5492, "step": 9209 }, { "epoch": 0.5133493116325735, "grad_norm": 0.5600340366363525, "learning_rate": 4.9212007709839185e-05, "loss": 1.7274, "step": 9210 }, { "epoch": 0.5134050498857365, "grad_norm": 0.525035560131073, "learning_rate": 4.9203104591928654e-05, "loss": 1.6216, "step": 9211 }, { "epoch": 0.5134607881388997, "grad_norm": 0.5979744791984558, "learning_rate": 4.919420149929101e-05, "loss": 1.7127, "step": 9212 }, { "epoch": 0.5135165263920629, "grad_norm": 0.5735787153244019, "learning_rate": 4.918529843220858e-05, "loss": 1.8044, "step": 9213 }, { "epoch": 0.513572264645226, "grad_norm": 0.544146716594696, "learning_rate": 4.917639539096375e-05, "loss": 1.7176, "step": 9214 }, { "epoch": 0.5136280028983892, "grad_norm": 0.6068428158760071, "learning_rate": 4.9167492375838844e-05, "loss": 1.8507, "step": 9215 }, { "epoch": 0.5136837411515524, "grad_norm": 0.5296306014060974, "learning_rate": 4.915858938711624e-05, "loss": 1.4948, "step": 9216 }, { "epoch": 0.5137394794047154, "grad_norm": 0.5465298891067505, "learning_rate": 4.914968642507824e-05, "loss": 1.7211, "step": 9217 }, { "epoch": 0.5137952176578786, "grad_norm": 0.5519313812255859, "learning_rate": 4.9140783490007255e-05, "loss": 1.6642, "step": 9218 }, { "epoch": 0.5138509559110418, "grad_norm": 0.5677876472473145, "learning_rate": 4.9131880582185614e-05, "loss": 1.7359, "step": 9219 }, { "epoch": 0.5139066941642049, "grad_norm": 0.5681816339492798, "learning_rate": 4.912297770189565e-05, "loss": 1.6898, "step": 9220 }, { "epoch": 0.513962432417368, "grad_norm": 0.5291382670402527, "learning_rate": 4.911407484941973e-05, "loss": 1.6172, "step": 9221 }, { "epoch": 0.5140181706705312, "grad_norm": 0.5665237307548523, "learning_rate": 4.910517202504017e-05, "loss": 1.6677, "step": 9222 }, { "epoch": 0.5140739089236943, "grad_norm": 0.5834330320358276, "learning_rate": 4.909626922903934e-05, "loss": 1.8476, "step": 9223 }, { "epoch": 0.5141296471768575, "grad_norm": 0.5709316730499268, "learning_rate": 4.90873664616996e-05, "loss": 1.9254, "step": 9224 }, { "epoch": 0.5141853854300206, "grad_norm": 0.5521526336669922, "learning_rate": 4.907846372330325e-05, "loss": 2.0271, "step": 9225 }, { "epoch": 0.5142411236831838, "grad_norm": 0.6235350370407104, "learning_rate": 4.906956101413269e-05, "loss": 1.8016, "step": 9226 }, { "epoch": 0.5142968619363469, "grad_norm": 0.5495184659957886, "learning_rate": 4.90606583344702e-05, "loss": 1.7529, "step": 9227 }, { "epoch": 0.51435260018951, "grad_norm": 0.5534826517105103, "learning_rate": 4.905175568459817e-05, "loss": 1.7477, "step": 9228 }, { "epoch": 0.5144083384426732, "grad_norm": 0.5249108076095581, "learning_rate": 4.904285306479891e-05, "loss": 1.627, "step": 9229 }, { "epoch": 0.5144640766958364, "grad_norm": 0.49146464467048645, "learning_rate": 4.903395047535477e-05, "loss": 1.459, "step": 9230 }, { "epoch": 0.5145198149489995, "grad_norm": 0.5858702659606934, "learning_rate": 4.90250479165481e-05, "loss": 1.6794, "step": 9231 }, { "epoch": 0.5145755532021626, "grad_norm": 0.5300642848014832, "learning_rate": 4.901614538866121e-05, "loss": 1.5589, "step": 9232 }, { "epoch": 0.5146312914553258, "grad_norm": 0.6229625344276428, "learning_rate": 4.900724289197647e-05, "loss": 1.9199, "step": 9233 }, { "epoch": 0.5146870297084889, "grad_norm": 0.565897524356842, "learning_rate": 4.899834042677617e-05, "loss": 1.8075, "step": 9234 }, { "epoch": 0.5147427679616521, "grad_norm": 0.5347508192062378, "learning_rate": 4.898943799334271e-05, "loss": 1.4777, "step": 9235 }, { "epoch": 0.5147985062148153, "grad_norm": 0.6027230024337769, "learning_rate": 4.8980535591958346e-05, "loss": 1.8581, "step": 9236 }, { "epoch": 0.5148542444679783, "grad_norm": 0.603020429611206, "learning_rate": 4.897163322290546e-05, "loss": 1.9628, "step": 9237 }, { "epoch": 0.5149099827211415, "grad_norm": 0.5772542357444763, "learning_rate": 4.896273088646639e-05, "loss": 1.5789, "step": 9238 }, { "epoch": 0.5149657209743047, "grad_norm": 0.537726879119873, "learning_rate": 4.8953828582923435e-05, "loss": 1.6334, "step": 9239 }, { "epoch": 0.5150214592274678, "grad_norm": 0.5418381094932556, "learning_rate": 4.894492631255895e-05, "loss": 1.7236, "step": 9240 }, { "epoch": 0.515077197480631, "grad_norm": 0.5719316601753235, "learning_rate": 4.8936024075655234e-05, "loss": 1.8026, "step": 9241 }, { "epoch": 0.5151329357337942, "grad_norm": 0.5505056381225586, "learning_rate": 4.892712187249465e-05, "loss": 1.7085, "step": 9242 }, { "epoch": 0.5151886739869572, "grad_norm": 0.5943016409873962, "learning_rate": 4.891821970335948e-05, "loss": 1.8545, "step": 9243 }, { "epoch": 0.5152444122401204, "grad_norm": 0.5240996479988098, "learning_rate": 4.8909317568532074e-05, "loss": 1.5689, "step": 9244 }, { "epoch": 0.5153001504932836, "grad_norm": 0.5402621030807495, "learning_rate": 4.890041546829478e-05, "loss": 1.6177, "step": 9245 }, { "epoch": 0.5153558887464467, "grad_norm": 0.5427978038787842, "learning_rate": 4.889151340292988e-05, "loss": 1.6859, "step": 9246 }, { "epoch": 0.5154116269996099, "grad_norm": 0.5826436281204224, "learning_rate": 4.888261137271972e-05, "loss": 1.7225, "step": 9247 }, { "epoch": 0.5154673652527729, "grad_norm": 0.5483592748641968, "learning_rate": 4.8873709377946607e-05, "loss": 1.8317, "step": 9248 }, { "epoch": 0.5155231035059361, "grad_norm": 0.5138580799102783, "learning_rate": 4.886480741889285e-05, "loss": 1.3468, "step": 9249 }, { "epoch": 0.5155788417590993, "grad_norm": 0.5693102478981018, "learning_rate": 4.8855905495840824e-05, "loss": 1.8712, "step": 9250 }, { "epoch": 0.5156345800122624, "grad_norm": 0.5839586853981018, "learning_rate": 4.8847003609072766e-05, "loss": 1.7937, "step": 9251 }, { "epoch": 0.5156903182654256, "grad_norm": 0.5809890627861023, "learning_rate": 4.883810175887106e-05, "loss": 1.6511, "step": 9252 }, { "epoch": 0.5157460565185887, "grad_norm": 0.5595370531082153, "learning_rate": 4.882919994551797e-05, "loss": 1.6945, "step": 9253 }, { "epoch": 0.5158017947717518, "grad_norm": 0.5431410074234009, "learning_rate": 4.882029816929585e-05, "loss": 1.555, "step": 9254 }, { "epoch": 0.515857533024915, "grad_norm": 0.516463577747345, "learning_rate": 4.881139643048698e-05, "loss": 1.5392, "step": 9255 }, { "epoch": 0.5159132712780782, "grad_norm": 0.5569630265235901, "learning_rate": 4.8802494729373684e-05, "loss": 1.5023, "step": 9256 }, { "epoch": 0.5159690095312413, "grad_norm": 0.6542758941650391, "learning_rate": 4.879359306623829e-05, "loss": 1.7664, "step": 9257 }, { "epoch": 0.5160247477844044, "grad_norm": 0.5755527019500732, "learning_rate": 4.878469144136306e-05, "loss": 1.7523, "step": 9258 }, { "epoch": 0.5160804860375676, "grad_norm": 0.603937029838562, "learning_rate": 4.8775789855030366e-05, "loss": 1.7377, "step": 9259 }, { "epoch": 0.5161362242907307, "grad_norm": 0.6183059215545654, "learning_rate": 4.876688830752245e-05, "loss": 1.9714, "step": 9260 }, { "epoch": 0.5161919625438939, "grad_norm": 0.5435531139373779, "learning_rate": 4.8757986799121685e-05, "loss": 1.5455, "step": 9261 }, { "epoch": 0.5162477007970571, "grad_norm": 0.5262885689735413, "learning_rate": 4.87490853301103e-05, "loss": 1.547, "step": 9262 }, { "epoch": 0.5163034390502201, "grad_norm": 0.5731160640716553, "learning_rate": 4.874018390077065e-05, "loss": 1.7112, "step": 9263 }, { "epoch": 0.5163591773033833, "grad_norm": 0.5427829623222351, "learning_rate": 4.8731282511385025e-05, "loss": 1.5646, "step": 9264 }, { "epoch": 0.5164149155565465, "grad_norm": 0.5715686678886414, "learning_rate": 4.872238116223571e-05, "loss": 1.7205, "step": 9265 }, { "epoch": 0.5164706538097096, "grad_norm": 0.5412135124206543, "learning_rate": 4.871347985360503e-05, "loss": 1.641, "step": 9266 }, { "epoch": 0.5165263920628728, "grad_norm": 0.5642713308334351, "learning_rate": 4.870457858577526e-05, "loss": 1.7817, "step": 9267 }, { "epoch": 0.516582130316036, "grad_norm": 0.6018970608711243, "learning_rate": 4.869567735902871e-05, "loss": 2.0283, "step": 9268 }, { "epoch": 0.516637868569199, "grad_norm": 0.5858074426651001, "learning_rate": 4.8686776173647653e-05, "loss": 1.7466, "step": 9269 }, { "epoch": 0.5166936068223622, "grad_norm": 0.5206944942474365, "learning_rate": 4.867787502991441e-05, "loss": 1.4493, "step": 9270 }, { "epoch": 0.5167493450755253, "grad_norm": 0.5470089912414551, "learning_rate": 4.866897392811126e-05, "loss": 1.6095, "step": 9271 }, { "epoch": 0.5168050833286885, "grad_norm": 0.5739067196846008, "learning_rate": 4.866007286852051e-05, "loss": 1.6357, "step": 9272 }, { "epoch": 0.5168608215818516, "grad_norm": 0.567419171333313, "learning_rate": 4.865117185142443e-05, "loss": 1.6251, "step": 9273 }, { "epoch": 0.5169165598350147, "grad_norm": 0.5760751366615295, "learning_rate": 4.8642270877105305e-05, "loss": 1.6139, "step": 9274 }, { "epoch": 0.5169722980881779, "grad_norm": 0.5624504089355469, "learning_rate": 4.863336994584542e-05, "loss": 1.7346, "step": 9275 }, { "epoch": 0.5170280363413411, "grad_norm": 0.5871464610099792, "learning_rate": 4.8624469057927116e-05, "loss": 1.8586, "step": 9276 }, { "epoch": 0.5170837745945042, "grad_norm": 0.5720483064651489, "learning_rate": 4.861556821363259e-05, "loss": 1.6571, "step": 9277 }, { "epoch": 0.5171395128476673, "grad_norm": 0.6062625646591187, "learning_rate": 4.860666741324419e-05, "loss": 1.6364, "step": 9278 }, { "epoch": 0.5171952511008305, "grad_norm": 0.5324755907058716, "learning_rate": 4.8597766657044166e-05, "loss": 1.5787, "step": 9279 }, { "epoch": 0.5172509893539936, "grad_norm": 0.5596499443054199, "learning_rate": 4.8588865945314826e-05, "loss": 1.7362, "step": 9280 }, { "epoch": 0.5173067276071568, "grad_norm": 0.5920062065124512, "learning_rate": 4.857996527833841e-05, "loss": 1.9419, "step": 9281 }, { "epoch": 0.51736246586032, "grad_norm": 0.574780285358429, "learning_rate": 4.857106465639723e-05, "loss": 1.8925, "step": 9282 }, { "epoch": 0.517418204113483, "grad_norm": 0.5297632813453674, "learning_rate": 4.8562164079773545e-05, "loss": 1.5802, "step": 9283 }, { "epoch": 0.5174739423666462, "grad_norm": 0.6014637351036072, "learning_rate": 4.855326354874962e-05, "loss": 1.651, "step": 9284 }, { "epoch": 0.5175296806198094, "grad_norm": 0.5652185082435608, "learning_rate": 4.8544363063607764e-05, "loss": 1.5509, "step": 9285 }, { "epoch": 0.5175854188729725, "grad_norm": 0.5464864373207092, "learning_rate": 4.8535462624630196e-05, "loss": 1.5164, "step": 9286 }, { "epoch": 0.5176411571261357, "grad_norm": 0.5614228248596191, "learning_rate": 4.852656223209925e-05, "loss": 1.4815, "step": 9287 }, { "epoch": 0.5176968953792989, "grad_norm": 0.5757022500038147, "learning_rate": 4.8517661886297124e-05, "loss": 1.6221, "step": 9288 }, { "epoch": 0.5177526336324619, "grad_norm": 0.5105504393577576, "learning_rate": 4.850876158750613e-05, "loss": 1.5974, "step": 9289 }, { "epoch": 0.5178083718856251, "grad_norm": 0.5799221992492676, "learning_rate": 4.849986133600854e-05, "loss": 1.7308, "step": 9290 }, { "epoch": 0.5178641101387883, "grad_norm": 0.6229887008666992, "learning_rate": 4.849096113208658e-05, "loss": 1.828, "step": 9291 }, { "epoch": 0.5179198483919514, "grad_norm": 0.6005191206932068, "learning_rate": 4.848206097602256e-05, "loss": 1.7686, "step": 9292 }, { "epoch": 0.5179755866451146, "grad_norm": 0.6147307753562927, "learning_rate": 4.8473160868098697e-05, "loss": 1.8687, "step": 9293 }, { "epoch": 0.5180313248982776, "grad_norm": 0.5599120259284973, "learning_rate": 4.8464260808597276e-05, "loss": 1.4875, "step": 9294 }, { "epoch": 0.5180870631514408, "grad_norm": 0.63963782787323, "learning_rate": 4.8455360797800534e-05, "loss": 1.7863, "step": 9295 }, { "epoch": 0.518142801404604, "grad_norm": 0.5774217247962952, "learning_rate": 4.844646083599075e-05, "loss": 1.842, "step": 9296 }, { "epoch": 0.5181985396577671, "grad_norm": 0.5250087380409241, "learning_rate": 4.843756092345018e-05, "loss": 1.4797, "step": 9297 }, { "epoch": 0.5182542779109303, "grad_norm": 0.6038861274719238, "learning_rate": 4.8428661060461055e-05, "loss": 1.6694, "step": 9298 }, { "epoch": 0.5183100161640934, "grad_norm": 0.5457639098167419, "learning_rate": 4.8419761247305655e-05, "loss": 1.611, "step": 9299 }, { "epoch": 0.5183657544172565, "grad_norm": 0.5245123505592346, "learning_rate": 4.8410861484266206e-05, "loss": 1.4735, "step": 9300 }, { "epoch": 0.5184214926704197, "grad_norm": 0.5180814266204834, "learning_rate": 4.8401961771624946e-05, "loss": 1.5019, "step": 9301 }, { "epoch": 0.5184772309235829, "grad_norm": 0.5676085352897644, "learning_rate": 4.839306210966418e-05, "loss": 1.7617, "step": 9302 }, { "epoch": 0.518532969176746, "grad_norm": 0.6066186428070068, "learning_rate": 4.838416249866608e-05, "loss": 1.8268, "step": 9303 }, { "epoch": 0.5185887074299091, "grad_norm": 0.5835402607917786, "learning_rate": 4.837526293891295e-05, "loss": 1.816, "step": 9304 }, { "epoch": 0.5186444456830723, "grad_norm": 0.520706057548523, "learning_rate": 4.8366363430687e-05, "loss": 1.6345, "step": 9305 }, { "epoch": 0.5187001839362354, "grad_norm": 0.54007488489151, "learning_rate": 4.8357463974270474e-05, "loss": 1.5671, "step": 9306 }, { "epoch": 0.5187559221893986, "grad_norm": 0.5502505302429199, "learning_rate": 4.834856456994561e-05, "loss": 1.5713, "step": 9307 }, { "epoch": 0.5188116604425618, "grad_norm": 0.5642566084861755, "learning_rate": 4.8339665217994654e-05, "loss": 1.6636, "step": 9308 }, { "epoch": 0.5188673986957248, "grad_norm": 0.5338882207870483, "learning_rate": 4.833076591869984e-05, "loss": 1.6706, "step": 9309 }, { "epoch": 0.518923136948888, "grad_norm": 0.5252307057380676, "learning_rate": 4.832186667234338e-05, "loss": 1.6778, "step": 9310 }, { "epoch": 0.5189788752020512, "grad_norm": 0.5578994750976562, "learning_rate": 4.831296747920756e-05, "loss": 1.7534, "step": 9311 }, { "epoch": 0.5190346134552143, "grad_norm": 0.5512505173683167, "learning_rate": 4.8304068339574536e-05, "loss": 1.5795, "step": 9312 }, { "epoch": 0.5190903517083775, "grad_norm": 0.5381572842597961, "learning_rate": 4.829516925372662e-05, "loss": 1.5837, "step": 9313 }, { "epoch": 0.5191460899615407, "grad_norm": 0.5063994526863098, "learning_rate": 4.828627022194596e-05, "loss": 1.3961, "step": 9314 }, { "epoch": 0.5192018282147037, "grad_norm": 0.5659567713737488, "learning_rate": 4.827737124451482e-05, "loss": 1.6917, "step": 9315 }, { "epoch": 0.5192575664678669, "grad_norm": 0.6182090640068054, "learning_rate": 4.8268472321715437e-05, "loss": 1.4865, "step": 9316 }, { "epoch": 0.51931330472103, "grad_norm": 0.5523496270179749, "learning_rate": 4.825957345383e-05, "loss": 1.6661, "step": 9317 }, { "epoch": 0.5193690429741932, "grad_norm": 0.5721933245658875, "learning_rate": 4.8250674641140763e-05, "loss": 1.8197, "step": 9318 }, { "epoch": 0.5194247812273564, "grad_norm": 0.5994561910629272, "learning_rate": 4.8241775883929914e-05, "loss": 1.6962, "step": 9319 }, { "epoch": 0.5194805194805194, "grad_norm": 0.5856831073760986, "learning_rate": 4.82328771824797e-05, "loss": 1.7966, "step": 9320 }, { "epoch": 0.5195362577336826, "grad_norm": 0.5815552473068237, "learning_rate": 4.822397853707228e-05, "loss": 1.7016, "step": 9321 }, { "epoch": 0.5195919959868458, "grad_norm": 0.566786527633667, "learning_rate": 4.821507994798993e-05, "loss": 1.4905, "step": 9322 }, { "epoch": 0.5196477342400089, "grad_norm": 0.5902820229530334, "learning_rate": 4.820618141551485e-05, "loss": 1.6682, "step": 9323 }, { "epoch": 0.5197034724931721, "grad_norm": 0.5534100532531738, "learning_rate": 4.819728293992922e-05, "loss": 1.6271, "step": 9324 }, { "epoch": 0.5197592107463352, "grad_norm": 0.5736867189407349, "learning_rate": 4.8188384521515276e-05, "loss": 1.8624, "step": 9325 }, { "epoch": 0.5198149489994983, "grad_norm": 0.5427315831184387, "learning_rate": 4.817948616055521e-05, "loss": 1.6311, "step": 9326 }, { "epoch": 0.5198706872526615, "grad_norm": 0.5504226684570312, "learning_rate": 4.817058785733123e-05, "loss": 1.706, "step": 9327 }, { "epoch": 0.5199264255058247, "grad_norm": 0.560772180557251, "learning_rate": 4.8161689612125524e-05, "loss": 1.5919, "step": 9328 }, { "epoch": 0.5199821637589878, "grad_norm": 0.5797060132026672, "learning_rate": 4.8152791425220304e-05, "loss": 1.6905, "step": 9329 }, { "epoch": 0.5200379020121509, "grad_norm": 0.5403047800064087, "learning_rate": 4.814389329689778e-05, "loss": 1.6947, "step": 9330 }, { "epoch": 0.5200936402653141, "grad_norm": 0.5620684027671814, "learning_rate": 4.8134995227440136e-05, "loss": 1.6495, "step": 9331 }, { "epoch": 0.5201493785184772, "grad_norm": 0.563530683517456, "learning_rate": 4.8126097217129576e-05, "loss": 1.6507, "step": 9332 }, { "epoch": 0.5202051167716404, "grad_norm": 0.5273337960243225, "learning_rate": 4.811719926624828e-05, "loss": 1.6496, "step": 9333 }, { "epoch": 0.5202608550248036, "grad_norm": 0.5833011865615845, "learning_rate": 4.8108301375078445e-05, "loss": 1.7513, "step": 9334 }, { "epoch": 0.5203165932779666, "grad_norm": 0.5634909868240356, "learning_rate": 4.809940354390227e-05, "loss": 1.5815, "step": 9335 }, { "epoch": 0.5203723315311298, "grad_norm": 0.6327871084213257, "learning_rate": 4.809050577300191e-05, "loss": 1.8808, "step": 9336 }, { "epoch": 0.520428069784293, "grad_norm": 0.5909899473190308, "learning_rate": 4.8081608062659614e-05, "loss": 1.5373, "step": 9337 }, { "epoch": 0.5204838080374561, "grad_norm": 0.5492987632751465, "learning_rate": 4.807271041315749e-05, "loss": 1.7677, "step": 9338 }, { "epoch": 0.5205395462906193, "grad_norm": 0.5624071955680847, "learning_rate": 4.806381282477778e-05, "loss": 1.7192, "step": 9339 }, { "epoch": 0.5205952845437823, "grad_norm": 0.5824905037879944, "learning_rate": 4.8054915297802616e-05, "loss": 1.6621, "step": 9340 }, { "epoch": 0.5206510227969455, "grad_norm": 0.6446887254714966, "learning_rate": 4.8046017832514206e-05, "loss": 1.8134, "step": 9341 }, { "epoch": 0.5207067610501087, "grad_norm": 0.5947240591049194, "learning_rate": 4.803712042919473e-05, "loss": 1.811, "step": 9342 }, { "epoch": 0.5207624993032718, "grad_norm": 0.6278781890869141, "learning_rate": 4.8028223088126336e-05, "loss": 1.6691, "step": 9343 }, { "epoch": 0.520818237556435, "grad_norm": 0.5742304921150208, "learning_rate": 4.8019325809591216e-05, "loss": 1.8073, "step": 9344 }, { "epoch": 0.5208739758095982, "grad_norm": 0.626422107219696, "learning_rate": 4.8010428593871517e-05, "loss": 1.5679, "step": 9345 }, { "epoch": 0.5209297140627612, "grad_norm": 0.5343272089958191, "learning_rate": 4.8001531441249457e-05, "loss": 1.4528, "step": 9346 }, { "epoch": 0.5209854523159244, "grad_norm": 0.7227659225463867, "learning_rate": 4.7992634352007125e-05, "loss": 1.5618, "step": 9347 }, { "epoch": 0.5210411905690876, "grad_norm": 0.5783427953720093, "learning_rate": 4.7983737326426746e-05, "loss": 1.7091, "step": 9348 }, { "epoch": 0.5210969288222507, "grad_norm": 0.5899874567985535, "learning_rate": 4.7974840364790476e-05, "loss": 1.7043, "step": 9349 }, { "epoch": 0.5211526670754139, "grad_norm": 0.5830110907554626, "learning_rate": 4.7965943467380446e-05, "loss": 1.5431, "step": 9350 }, { "epoch": 0.521208405328577, "grad_norm": 0.5403499603271484, "learning_rate": 4.7957046634478846e-05, "loss": 1.5314, "step": 9351 }, { "epoch": 0.5212641435817401, "grad_norm": 0.6497839093208313, "learning_rate": 4.7948149866367806e-05, "loss": 1.9862, "step": 9352 }, { "epoch": 0.5213198818349033, "grad_norm": 0.5488117337226868, "learning_rate": 4.7939253163329496e-05, "loss": 1.6921, "step": 9353 }, { "epoch": 0.5213756200880665, "grad_norm": 0.6064301133155823, "learning_rate": 4.7930356525646046e-05, "loss": 1.5896, "step": 9354 }, { "epoch": 0.5214313583412296, "grad_norm": 0.5546178221702576, "learning_rate": 4.792145995359962e-05, "loss": 1.7059, "step": 9355 }, { "epoch": 0.5214870965943927, "grad_norm": 0.5294743180274963, "learning_rate": 4.791256344747238e-05, "loss": 1.5835, "step": 9356 }, { "epoch": 0.5215428348475559, "grad_norm": 0.5221080780029297, "learning_rate": 4.790366700754644e-05, "loss": 1.5472, "step": 9357 }, { "epoch": 0.521598573100719, "grad_norm": 0.555313229560852, "learning_rate": 4.789477063410399e-05, "loss": 1.7969, "step": 9358 }, { "epoch": 0.5216543113538822, "grad_norm": 0.5328066349029541, "learning_rate": 4.788587432742711e-05, "loss": 1.6338, "step": 9359 }, { "epoch": 0.5217100496070454, "grad_norm": 0.5458719730377197, "learning_rate": 4.787697808779798e-05, "loss": 1.7335, "step": 9360 }, { "epoch": 0.5217657878602084, "grad_norm": 0.5909193158149719, "learning_rate": 4.7868081915498734e-05, "loss": 1.9916, "step": 9361 }, { "epoch": 0.5218215261133716, "grad_norm": 0.5532034039497375, "learning_rate": 4.785918581081148e-05, "loss": 1.6839, "step": 9362 }, { "epoch": 0.5218772643665347, "grad_norm": 0.5652511119842529, "learning_rate": 4.7850289774018404e-05, "loss": 1.6613, "step": 9363 }, { "epoch": 0.5219330026196979, "grad_norm": 0.5659765005111694, "learning_rate": 4.784139380540157e-05, "loss": 1.4462, "step": 9364 }, { "epoch": 0.5219887408728611, "grad_norm": 0.6014359593391418, "learning_rate": 4.7832497905243164e-05, "loss": 1.826, "step": 9365 }, { "epoch": 0.5220444791260241, "grad_norm": 0.5442059636116028, "learning_rate": 4.782360207382527e-05, "loss": 1.6403, "step": 9366 }, { "epoch": 0.5221002173791873, "grad_norm": 0.571991503238678, "learning_rate": 4.781470631143003e-05, "loss": 1.6031, "step": 9367 }, { "epoch": 0.5221559556323505, "grad_norm": 0.6860571503639221, "learning_rate": 4.780581061833958e-05, "loss": 1.7744, "step": 9368 }, { "epoch": 0.5222116938855136, "grad_norm": 0.5633348226547241, "learning_rate": 4.7796914994836003e-05, "loss": 1.7062, "step": 9369 }, { "epoch": 0.5222674321386768, "grad_norm": 0.5520535111427307, "learning_rate": 4.778801944120146e-05, "loss": 1.6158, "step": 9370 }, { "epoch": 0.52232317039184, "grad_norm": 0.5730091333389282, "learning_rate": 4.7779123957718016e-05, "loss": 1.8501, "step": 9371 }, { "epoch": 0.522378908645003, "grad_norm": 0.5603798031806946, "learning_rate": 4.777022854466784e-05, "loss": 1.677, "step": 9372 }, { "epoch": 0.5224346468981662, "grad_norm": 0.5554346442222595, "learning_rate": 4.7761333202332986e-05, "loss": 1.6819, "step": 9373 }, { "epoch": 0.5224903851513294, "grad_norm": 0.5741342306137085, "learning_rate": 4.7752437930995605e-05, "loss": 1.8114, "step": 9374 }, { "epoch": 0.5225461234044925, "grad_norm": 0.5575484037399292, "learning_rate": 4.7743542730937794e-05, "loss": 1.6216, "step": 9375 }, { "epoch": 0.5226018616576557, "grad_norm": 0.5593728423118591, "learning_rate": 4.7734647602441644e-05, "loss": 1.5714, "step": 9376 }, { "epoch": 0.5226575999108188, "grad_norm": 0.5570329427719116, "learning_rate": 4.7725752545789276e-05, "loss": 1.7383, "step": 9377 }, { "epoch": 0.5227133381639819, "grad_norm": 0.5562308430671692, "learning_rate": 4.771685756126276e-05, "loss": 1.4746, "step": 9378 }, { "epoch": 0.5227690764171451, "grad_norm": 0.5393458008766174, "learning_rate": 4.7707962649144225e-05, "loss": 1.762, "step": 9379 }, { "epoch": 0.5228248146703083, "grad_norm": 0.5442481637001038, "learning_rate": 4.769906780971575e-05, "loss": 1.6694, "step": 9380 }, { "epoch": 0.5228805529234714, "grad_norm": 0.5860007405281067, "learning_rate": 4.769017304325941e-05, "loss": 1.6952, "step": 9381 }, { "epoch": 0.5229362911766345, "grad_norm": 0.6174299716949463, "learning_rate": 4.768127835005733e-05, "loss": 1.8446, "step": 9382 }, { "epoch": 0.5229920294297977, "grad_norm": 0.5522156953811646, "learning_rate": 4.767238373039157e-05, "loss": 1.5253, "step": 9383 }, { "epoch": 0.5230477676829608, "grad_norm": 0.5956835150718689, "learning_rate": 4.7663489184544246e-05, "loss": 1.7674, "step": 9384 }, { "epoch": 0.523103505936124, "grad_norm": 0.5649197101593018, "learning_rate": 4.7654594712797415e-05, "loss": 1.6636, "step": 9385 }, { "epoch": 0.523159244189287, "grad_norm": 0.5424702167510986, "learning_rate": 4.7645700315433155e-05, "loss": 1.6065, "step": 9386 }, { "epoch": 0.5232149824424502, "grad_norm": 0.5742893218994141, "learning_rate": 4.763680599273357e-05, "loss": 1.8092, "step": 9387 }, { "epoch": 0.5232707206956134, "grad_norm": 0.565175473690033, "learning_rate": 4.76279117449807e-05, "loss": 1.5921, "step": 9388 }, { "epoch": 0.5233264589487765, "grad_norm": 0.599720299243927, "learning_rate": 4.761901757245667e-05, "loss": 1.7831, "step": 9389 }, { "epoch": 0.5233821972019397, "grad_norm": 0.600030243396759, "learning_rate": 4.7610123475443486e-05, "loss": 1.7594, "step": 9390 }, { "epoch": 0.5234379354551029, "grad_norm": 0.5445983409881592, "learning_rate": 4.7601229454223275e-05, "loss": 1.635, "step": 9391 }, { "epoch": 0.5234936737082659, "grad_norm": 1.3782683610916138, "learning_rate": 4.759233550907807e-05, "loss": 1.6621, "step": 9392 }, { "epoch": 0.5235494119614291, "grad_norm": 0.58378005027771, "learning_rate": 4.7583441640289946e-05, "loss": 1.6687, "step": 9393 }, { "epoch": 0.5236051502145923, "grad_norm": 0.5960495471954346, "learning_rate": 4.757454784814097e-05, "loss": 1.7393, "step": 9394 }, { "epoch": 0.5236608884677554, "grad_norm": 0.6179077625274658, "learning_rate": 4.756565413291318e-05, "loss": 1.7908, "step": 9395 }, { "epoch": 0.5237166267209186, "grad_norm": 0.5232189297676086, "learning_rate": 4.755676049488867e-05, "loss": 1.6024, "step": 9396 }, { "epoch": 0.5237723649740817, "grad_norm": 0.5634143352508545, "learning_rate": 4.7547866934349447e-05, "loss": 1.7352, "step": 9397 }, { "epoch": 0.5238281032272448, "grad_norm": 0.5540798306465149, "learning_rate": 4.753897345157762e-05, "loss": 1.6196, "step": 9398 }, { "epoch": 0.523883841480408, "grad_norm": 0.5112434029579163, "learning_rate": 4.753008004685517e-05, "loss": 1.5433, "step": 9399 }, { "epoch": 0.5239395797335712, "grad_norm": 0.5160391926765442, "learning_rate": 4.752118672046419e-05, "loss": 1.4725, "step": 9400 }, { "epoch": 0.5239953179867343, "grad_norm": 0.5671103000640869, "learning_rate": 4.751229347268673e-05, "loss": 1.6878, "step": 9401 }, { "epoch": 0.5240510562398975, "grad_norm": 0.5739786028862, "learning_rate": 4.750340030380481e-05, "loss": 1.4368, "step": 9402 }, { "epoch": 0.5241067944930606, "grad_norm": 0.5829623937606812, "learning_rate": 4.749450721410048e-05, "loss": 1.8745, "step": 9403 }, { "epoch": 0.5241625327462237, "grad_norm": 0.5581690669059753, "learning_rate": 4.748561420385577e-05, "loss": 1.8433, "step": 9404 }, { "epoch": 0.5242182709993869, "grad_norm": 0.5548933148384094, "learning_rate": 4.747672127335272e-05, "loss": 1.6408, "step": 9405 }, { "epoch": 0.5242740092525501, "grad_norm": 0.5583091378211975, "learning_rate": 4.746782842287335e-05, "loss": 1.74, "step": 9406 }, { "epoch": 0.5243297475057132, "grad_norm": 0.6239990592002869, "learning_rate": 4.7458935652699686e-05, "loss": 1.7131, "step": 9407 }, { "epoch": 0.5243854857588763, "grad_norm": 0.5649636387825012, "learning_rate": 4.7450042963113794e-05, "loss": 1.6917, "step": 9408 }, { "epoch": 0.5244412240120394, "grad_norm": 0.5509878993034363, "learning_rate": 4.744115035439766e-05, "loss": 1.5985, "step": 9409 }, { "epoch": 0.5244969622652026, "grad_norm": 0.5211341977119446, "learning_rate": 4.743225782683333e-05, "loss": 1.3233, "step": 9410 }, { "epoch": 0.5245527005183658, "grad_norm": 0.5903692245483398, "learning_rate": 4.74233653807028e-05, "loss": 1.7022, "step": 9411 }, { "epoch": 0.5246084387715289, "grad_norm": 0.5562416911125183, "learning_rate": 4.7414473016288096e-05, "loss": 1.5126, "step": 9412 }, { "epoch": 0.524664177024692, "grad_norm": 0.5590984225273132, "learning_rate": 4.740558073387124e-05, "loss": 1.736, "step": 9413 }, { "epoch": 0.5247199152778552, "grad_norm": 0.5605709552764893, "learning_rate": 4.7396688533734224e-05, "loss": 1.7664, "step": 9414 }, { "epoch": 0.5247756535310183, "grad_norm": 0.56081622838974, "learning_rate": 4.7387796416159094e-05, "loss": 1.6379, "step": 9415 }, { "epoch": 0.5248313917841815, "grad_norm": 0.5936822891235352, "learning_rate": 4.7378904381427805e-05, "loss": 1.6248, "step": 9416 }, { "epoch": 0.5248871300373447, "grad_norm": 0.5852161049842834, "learning_rate": 4.7370012429822405e-05, "loss": 1.8269, "step": 9417 }, { "epoch": 0.5249428682905077, "grad_norm": 0.5660523772239685, "learning_rate": 4.736112056162486e-05, "loss": 1.6686, "step": 9418 }, { "epoch": 0.5249986065436709, "grad_norm": 0.5601064562797546, "learning_rate": 4.7352228777117195e-05, "loss": 1.6043, "step": 9419 }, { "epoch": 0.5250543447968341, "grad_norm": 0.531576931476593, "learning_rate": 4.73433370765814e-05, "loss": 1.3681, "step": 9420 }, { "epoch": 0.5251100830499972, "grad_norm": 0.609130322933197, "learning_rate": 4.733444546029946e-05, "loss": 1.8752, "step": 9421 }, { "epoch": 0.5251658213031604, "grad_norm": 0.5157068371772766, "learning_rate": 4.7325553928553375e-05, "loss": 1.6649, "step": 9422 }, { "epoch": 0.5252215595563235, "grad_norm": 0.42130622267723083, "learning_rate": 4.73166624816251e-05, "loss": 0.7808, "step": 9423 }, { "epoch": 0.5252772978094866, "grad_norm": 0.5316475629806519, "learning_rate": 4.7307771119796685e-05, "loss": 1.9264, "step": 9424 }, { "epoch": 0.5253330360626498, "grad_norm": 0.5346727967262268, "learning_rate": 4.729887984335004e-05, "loss": 1.4697, "step": 9425 }, { "epoch": 0.525388774315813, "grad_norm": 0.5731312036514282, "learning_rate": 4.728998865256718e-05, "loss": 1.7123, "step": 9426 }, { "epoch": 0.5254445125689761, "grad_norm": 0.5310966968536377, "learning_rate": 4.728109754773011e-05, "loss": 1.6069, "step": 9427 }, { "epoch": 0.5255002508221392, "grad_norm": 0.5562901496887207, "learning_rate": 4.727220652912074e-05, "loss": 1.5449, "step": 9428 }, { "epoch": 0.5255559890753024, "grad_norm": 0.5125192403793335, "learning_rate": 4.72633155970211e-05, "loss": 1.3929, "step": 9429 }, { "epoch": 0.5256117273284655, "grad_norm": 0.5134342908859253, "learning_rate": 4.725442475171312e-05, "loss": 1.5311, "step": 9430 }, { "epoch": 0.5256674655816287, "grad_norm": 0.56780606508255, "learning_rate": 4.724553399347879e-05, "loss": 1.8386, "step": 9431 }, { "epoch": 0.5257232038347918, "grad_norm": 0.527378499507904, "learning_rate": 4.723664332260004e-05, "loss": 1.4743, "step": 9432 }, { "epoch": 0.525778942087955, "grad_norm": 0.5406578779220581, "learning_rate": 4.722775273935886e-05, "loss": 1.5645, "step": 9433 }, { "epoch": 0.5258346803411181, "grad_norm": 0.5987953543663025, "learning_rate": 4.721886224403722e-05, "loss": 1.8844, "step": 9434 }, { "epoch": 0.5258904185942812, "grad_norm": 0.6220631003379822, "learning_rate": 4.720997183691703e-05, "loss": 1.6094, "step": 9435 }, { "epoch": 0.5259461568474444, "grad_norm": 0.5748035311698914, "learning_rate": 4.720108151828028e-05, "loss": 1.6859, "step": 9436 }, { "epoch": 0.5260018951006076, "grad_norm": 0.6056424379348755, "learning_rate": 4.71921912884089e-05, "loss": 1.8112, "step": 9437 }, { "epoch": 0.5260576333537706, "grad_norm": 0.5912368893623352, "learning_rate": 4.7183301147584854e-05, "loss": 1.783, "step": 9438 }, { "epoch": 0.5261133716069338, "grad_norm": 0.5289324522018433, "learning_rate": 4.717441109609006e-05, "loss": 1.5162, "step": 9439 }, { "epoch": 0.526169109860097, "grad_norm": 0.5573659539222717, "learning_rate": 4.716552113420646e-05, "loss": 1.5676, "step": 9440 }, { "epoch": 0.5262248481132601, "grad_norm": 0.5835697054862976, "learning_rate": 4.715663126221603e-05, "loss": 1.812, "step": 9441 }, { "epoch": 0.5262805863664233, "grad_norm": 0.617939293384552, "learning_rate": 4.714774148040065e-05, "loss": 1.8001, "step": 9442 }, { "epoch": 0.5263363246195865, "grad_norm": 0.59937584400177, "learning_rate": 4.713885178904231e-05, "loss": 1.8504, "step": 9443 }, { "epoch": 0.5263920628727495, "grad_norm": 0.5530192852020264, "learning_rate": 4.7129962188422886e-05, "loss": 1.7862, "step": 9444 }, { "epoch": 0.5264478011259127, "grad_norm": 0.5564062595367432, "learning_rate": 4.712107267882434e-05, "loss": 1.6889, "step": 9445 }, { "epoch": 0.5265035393790759, "grad_norm": 0.5669463276863098, "learning_rate": 4.7112183260528584e-05, "loss": 1.4817, "step": 9446 }, { "epoch": 0.526559277632239, "grad_norm": 0.5424147248268127, "learning_rate": 4.710329393381753e-05, "loss": 1.7066, "step": 9447 }, { "epoch": 0.5266150158854022, "grad_norm": 0.5391395092010498, "learning_rate": 4.709440469897312e-05, "loss": 1.4955, "step": 9448 }, { "epoch": 0.5266707541385653, "grad_norm": 0.5175044536590576, "learning_rate": 4.708551555627723e-05, "loss": 1.6005, "step": 9449 }, { "epoch": 0.5267264923917284, "grad_norm": 0.5783989429473877, "learning_rate": 4.707662650601182e-05, "loss": 1.6138, "step": 9450 }, { "epoch": 0.5267822306448916, "grad_norm": 0.6266419887542725, "learning_rate": 4.706773754845874e-05, "loss": 1.8049, "step": 9451 }, { "epoch": 0.5268379688980548, "grad_norm": 0.5239512920379639, "learning_rate": 4.705884868389994e-05, "loss": 1.4986, "step": 9452 }, { "epoch": 0.5268937071512179, "grad_norm": 0.504352867603302, "learning_rate": 4.704995991261733e-05, "loss": 1.6794, "step": 9453 }, { "epoch": 0.526949445404381, "grad_norm": 0.5516874194145203, "learning_rate": 4.704107123489277e-05, "loss": 1.544, "step": 9454 }, { "epoch": 0.5270051836575441, "grad_norm": 0.5346981883049011, "learning_rate": 4.70321826510082e-05, "loss": 1.556, "step": 9455 }, { "epoch": 0.5270609219107073, "grad_norm": 0.5733329057693481, "learning_rate": 4.702329416124548e-05, "loss": 1.5109, "step": 9456 }, { "epoch": 0.5271166601638705, "grad_norm": 0.5586609244346619, "learning_rate": 4.701440576588652e-05, "loss": 1.6984, "step": 9457 }, { "epoch": 0.5271723984170336, "grad_norm": 0.6048542261123657, "learning_rate": 4.700551746521318e-05, "loss": 1.6543, "step": 9458 }, { "epoch": 0.5272281366701967, "grad_norm": 0.5741638541221619, "learning_rate": 4.699662925950738e-05, "loss": 1.5402, "step": 9459 }, { "epoch": 0.5272838749233599, "grad_norm": 0.5675785541534424, "learning_rate": 4.6987741149051e-05, "loss": 1.6698, "step": 9460 }, { "epoch": 0.527339613176523, "grad_norm": 0.5488637685775757, "learning_rate": 4.69788531341259e-05, "loss": 1.7072, "step": 9461 }, { "epoch": 0.5273953514296862, "grad_norm": 0.5417453646659851, "learning_rate": 4.6969965215013964e-05, "loss": 1.443, "step": 9462 }, { "epoch": 0.5274510896828494, "grad_norm": 0.5321457982063293, "learning_rate": 4.696107739199707e-05, "loss": 1.498, "step": 9463 }, { "epoch": 0.5275068279360124, "grad_norm": 0.5696976780891418, "learning_rate": 4.695218966535708e-05, "loss": 1.7932, "step": 9464 }, { "epoch": 0.5275625661891756, "grad_norm": 0.5530003905296326, "learning_rate": 4.6943302035375864e-05, "loss": 1.5405, "step": 9465 }, { "epoch": 0.5276183044423388, "grad_norm": 0.5903899669647217, "learning_rate": 4.693441450233527e-05, "loss": 1.6725, "step": 9466 }, { "epoch": 0.5276740426955019, "grad_norm": 0.5503592491149902, "learning_rate": 4.69255270665172e-05, "loss": 1.7555, "step": 9467 }, { "epoch": 0.5277297809486651, "grad_norm": 0.5256405472755432, "learning_rate": 4.6916639728203465e-05, "loss": 1.5811, "step": 9468 }, { "epoch": 0.5277855192018283, "grad_norm": 0.5961898565292358, "learning_rate": 4.6907752487675954e-05, "loss": 1.7571, "step": 9469 }, { "epoch": 0.5278412574549913, "grad_norm": 0.6401336193084717, "learning_rate": 4.68988653452165e-05, "loss": 1.8604, "step": 9470 }, { "epoch": 0.5278969957081545, "grad_norm": 0.5445451140403748, "learning_rate": 4.688997830110695e-05, "loss": 1.7073, "step": 9471 }, { "epoch": 0.5279527339613177, "grad_norm": 0.5979543924331665, "learning_rate": 4.688109135562918e-05, "loss": 1.8051, "step": 9472 }, { "epoch": 0.5280084722144808, "grad_norm": 0.5162997841835022, "learning_rate": 4.6872204509064984e-05, "loss": 1.4152, "step": 9473 }, { "epoch": 0.528064210467644, "grad_norm": 0.5626786351203918, "learning_rate": 4.686331776169624e-05, "loss": 1.8541, "step": 9474 }, { "epoch": 0.5281199487208071, "grad_norm": 0.5397034883499146, "learning_rate": 4.685443111380474e-05, "loss": 1.5225, "step": 9475 }, { "epoch": 0.5281756869739702, "grad_norm": 0.549978494644165, "learning_rate": 4.6845544565672385e-05, "loss": 1.8114, "step": 9476 }, { "epoch": 0.5282314252271334, "grad_norm": 0.564751148223877, "learning_rate": 4.683665811758093e-05, "loss": 1.698, "step": 9477 }, { "epoch": 0.5282871634802965, "grad_norm": 0.5972959399223328, "learning_rate": 4.6827771769812247e-05, "loss": 1.9423, "step": 9478 }, { "epoch": 0.5283429017334597, "grad_norm": 0.5752547979354858, "learning_rate": 4.681888552264816e-05, "loss": 1.8993, "step": 9479 }, { "epoch": 0.5283986399866228, "grad_norm": 0.5367037057876587, "learning_rate": 4.680999937637047e-05, "loss": 1.4992, "step": 9480 }, { "epoch": 0.5284543782397859, "grad_norm": 0.5151523947715759, "learning_rate": 4.6801113331261e-05, "loss": 1.469, "step": 9481 }, { "epoch": 0.5285101164929491, "grad_norm": 0.5293115973472595, "learning_rate": 4.679222738760156e-05, "loss": 1.5147, "step": 9482 }, { "epoch": 0.5285658547461123, "grad_norm": 0.5823219418525696, "learning_rate": 4.6783341545673975e-05, "loss": 1.7303, "step": 9483 }, { "epoch": 0.5286215929992754, "grad_norm": 0.5544847846031189, "learning_rate": 4.677445580576003e-05, "loss": 1.4783, "step": 9484 }, { "epoch": 0.5286773312524385, "grad_norm": 0.5406891107559204, "learning_rate": 4.676557016814154e-05, "loss": 1.6925, "step": 9485 }, { "epoch": 0.5287330695056017, "grad_norm": 0.5609269738197327, "learning_rate": 4.675668463310032e-05, "loss": 1.562, "step": 9486 }, { "epoch": 0.5287888077587648, "grad_norm": 0.5836624503135681, "learning_rate": 4.674779920091814e-05, "loss": 1.8974, "step": 9487 }, { "epoch": 0.528844546011928, "grad_norm": 0.6158092617988586, "learning_rate": 4.673891387187682e-05, "loss": 1.6163, "step": 9488 }, { "epoch": 0.5289002842650912, "grad_norm": 0.5655474066734314, "learning_rate": 4.673002864625813e-05, "loss": 1.7773, "step": 9489 }, { "epoch": 0.5289560225182542, "grad_norm": 0.572187066078186, "learning_rate": 4.6721143524343874e-05, "loss": 1.6477, "step": 9490 }, { "epoch": 0.5290117607714174, "grad_norm": 0.5250730514526367, "learning_rate": 4.671225850641582e-05, "loss": 1.3446, "step": 9491 }, { "epoch": 0.5290674990245806, "grad_norm": 0.576943039894104, "learning_rate": 4.670337359275574e-05, "loss": 1.864, "step": 9492 }, { "epoch": 0.5291232372777437, "grad_norm": 0.6366379857063293, "learning_rate": 4.6694488783645466e-05, "loss": 1.9024, "step": 9493 }, { "epoch": 0.5291789755309069, "grad_norm": 0.5497097969055176, "learning_rate": 4.6685604079366706e-05, "loss": 1.7019, "step": 9494 }, { "epoch": 0.52923471378407, "grad_norm": 0.5463730692863464, "learning_rate": 4.667671948020128e-05, "loss": 1.545, "step": 9495 }, { "epoch": 0.5292904520372331, "grad_norm": 0.5959593653678894, "learning_rate": 4.666783498643093e-05, "loss": 1.4809, "step": 9496 }, { "epoch": 0.5293461902903963, "grad_norm": 0.6760483384132385, "learning_rate": 4.665895059833741e-05, "loss": 1.3123, "step": 9497 }, { "epoch": 0.5294019285435595, "grad_norm": 0.5294803977012634, "learning_rate": 4.6650066316202525e-05, "loss": 1.5795, "step": 9498 }, { "epoch": 0.5294576667967226, "grad_norm": 0.5938780903816223, "learning_rate": 4.6641182140307986e-05, "loss": 1.7409, "step": 9499 }, { "epoch": 0.5295134050498858, "grad_norm": 0.5527142882347107, "learning_rate": 4.663229807093558e-05, "loss": 1.5922, "step": 9500 }, { "epoch": 0.5295691433030488, "grad_norm": 0.5486581325531006, "learning_rate": 4.662341410836703e-05, "loss": 1.6494, "step": 9501 }, { "epoch": 0.529624881556212, "grad_norm": 0.548119068145752, "learning_rate": 4.661453025288411e-05, "loss": 1.5342, "step": 9502 }, { "epoch": 0.5296806198093752, "grad_norm": 0.5932400822639465, "learning_rate": 4.660564650476854e-05, "loss": 1.6725, "step": 9503 }, { "epoch": 0.5297363580625383, "grad_norm": 0.614427387714386, "learning_rate": 4.6596762864302076e-05, "loss": 1.9948, "step": 9504 }, { "epoch": 0.5297920963157015, "grad_norm": 0.5420172810554504, "learning_rate": 4.658787933176646e-05, "loss": 1.4934, "step": 9505 }, { "epoch": 0.5298478345688646, "grad_norm": 0.5479914546012878, "learning_rate": 4.657899590744341e-05, "loss": 1.627, "step": 9506 }, { "epoch": 0.5299035728220277, "grad_norm": 0.5667080879211426, "learning_rate": 4.6570112591614664e-05, "loss": 1.5898, "step": 9507 }, { "epoch": 0.5299593110751909, "grad_norm": 0.5239989161491394, "learning_rate": 4.656122938456195e-05, "loss": 1.4714, "step": 9508 }, { "epoch": 0.5300150493283541, "grad_norm": 0.5880669951438904, "learning_rate": 4.6552346286567e-05, "loss": 1.6165, "step": 9509 }, { "epoch": 0.5300707875815172, "grad_norm": 0.6253079175949097, "learning_rate": 4.65434632979115e-05, "loss": 1.8067, "step": 9510 }, { "epoch": 0.5301265258346803, "grad_norm": 0.5400813817977905, "learning_rate": 4.6534580418877205e-05, "loss": 1.5718, "step": 9511 }, { "epoch": 0.5301822640878435, "grad_norm": 0.5349458456039429, "learning_rate": 4.652569764974582e-05, "loss": 1.5877, "step": 9512 }, { "epoch": 0.5302380023410066, "grad_norm": 0.5760993957519531, "learning_rate": 4.651681499079904e-05, "loss": 1.7207, "step": 9513 }, { "epoch": 0.5302937405941698, "grad_norm": 0.48260366916656494, "learning_rate": 4.6507932442318596e-05, "loss": 1.1782, "step": 9514 }, { "epoch": 0.530349478847333, "grad_norm": 0.5174147486686707, "learning_rate": 4.649905000458616e-05, "loss": 1.2772, "step": 9515 }, { "epoch": 0.530405217100496, "grad_norm": 0.543880820274353, "learning_rate": 4.6490167677883457e-05, "loss": 1.6083, "step": 9516 }, { "epoch": 0.5304609553536592, "grad_norm": 0.5448428392410278, "learning_rate": 4.648128546249216e-05, "loss": 1.687, "step": 9517 }, { "epoch": 0.5305166936068224, "grad_norm": 0.5626906752586365, "learning_rate": 4.6472403358693964e-05, "loss": 1.5031, "step": 9518 }, { "epoch": 0.5305724318599855, "grad_norm": 0.5578361749649048, "learning_rate": 4.646352136677058e-05, "loss": 1.7177, "step": 9519 }, { "epoch": 0.5306281701131487, "grad_norm": 0.5288956165313721, "learning_rate": 4.645463948700368e-05, "loss": 1.4941, "step": 9520 }, { "epoch": 0.5306839083663119, "grad_norm": 0.5862405896186829, "learning_rate": 4.644575771967495e-05, "loss": 1.5097, "step": 9521 }, { "epoch": 0.5307396466194749, "grad_norm": 0.5509393811225891, "learning_rate": 4.6436876065066046e-05, "loss": 1.6904, "step": 9522 }, { "epoch": 0.5307953848726381, "grad_norm": 0.5741393566131592, "learning_rate": 4.642799452345867e-05, "loss": 1.7481, "step": 9523 }, { "epoch": 0.5308511231258012, "grad_norm": 0.5851439833641052, "learning_rate": 4.6419113095134485e-05, "loss": 1.8227, "step": 9524 }, { "epoch": 0.5309068613789644, "grad_norm": 0.5458952784538269, "learning_rate": 4.641023178037514e-05, "loss": 1.5991, "step": 9525 }, { "epoch": 0.5309625996321276, "grad_norm": 0.5807502269744873, "learning_rate": 4.6401350579462337e-05, "loss": 1.7392, "step": 9526 }, { "epoch": 0.5310183378852906, "grad_norm": 0.5653144121170044, "learning_rate": 4.6392469492677685e-05, "loss": 1.9319, "step": 9527 }, { "epoch": 0.5310740761384538, "grad_norm": 0.559446394443512, "learning_rate": 4.63835885203029e-05, "loss": 1.4941, "step": 9528 }, { "epoch": 0.531129814391617, "grad_norm": 0.6032963991165161, "learning_rate": 4.637470766261956e-05, "loss": 1.8894, "step": 9529 }, { "epoch": 0.5311855526447801, "grad_norm": 0.546187698841095, "learning_rate": 4.636582691990937e-05, "loss": 1.5257, "step": 9530 }, { "epoch": 0.5312412908979433, "grad_norm": 0.552087664604187, "learning_rate": 4.6356946292453984e-05, "loss": 1.6217, "step": 9531 }, { "epoch": 0.5312970291511064, "grad_norm": 0.5413661599159241, "learning_rate": 4.6348065780535e-05, "loss": 1.4773, "step": 9532 }, { "epoch": 0.5313527674042695, "grad_norm": 0.5477663278579712, "learning_rate": 4.633918538443409e-05, "loss": 1.5962, "step": 9533 }, { "epoch": 0.5314085056574327, "grad_norm": 0.5874429941177368, "learning_rate": 4.633030510443287e-05, "loss": 1.9244, "step": 9534 }, { "epoch": 0.5314642439105959, "grad_norm": 0.5683996677398682, "learning_rate": 4.632142494081298e-05, "loss": 1.5354, "step": 9535 }, { "epoch": 0.531519982163759, "grad_norm": 0.5978541374206543, "learning_rate": 4.631254489385602e-05, "loss": 1.7756, "step": 9536 }, { "epoch": 0.5315757204169221, "grad_norm": 0.5998654365539551, "learning_rate": 4.630366496384365e-05, "loss": 1.7578, "step": 9537 }, { "epoch": 0.5316314586700853, "grad_norm": 0.6490985751152039, "learning_rate": 4.629478515105749e-05, "loss": 1.8384, "step": 9538 }, { "epoch": 0.5316871969232484, "grad_norm": 0.5708163380622864, "learning_rate": 4.6285905455779136e-05, "loss": 1.7661, "step": 9539 }, { "epoch": 0.5317429351764116, "grad_norm": 0.5977619886398315, "learning_rate": 4.6277025878290204e-05, "loss": 1.965, "step": 9540 }, { "epoch": 0.5317986734295748, "grad_norm": 0.5742282271385193, "learning_rate": 4.6268146418872305e-05, "loss": 1.7107, "step": 9541 }, { "epoch": 0.5318544116827378, "grad_norm": 0.5834643840789795, "learning_rate": 4.625926707780705e-05, "loss": 1.7749, "step": 9542 }, { "epoch": 0.531910149935901, "grad_norm": 0.5550711154937744, "learning_rate": 4.625038785537602e-05, "loss": 1.6059, "step": 9543 }, { "epoch": 0.5319658881890642, "grad_norm": 0.5790189504623413, "learning_rate": 4.6241508751860816e-05, "loss": 1.8484, "step": 9544 }, { "epoch": 0.5320216264422273, "grad_norm": 0.5297401547431946, "learning_rate": 4.623262976754307e-05, "loss": 1.6221, "step": 9545 }, { "epoch": 0.5320773646953905, "grad_norm": 0.5399177074432373, "learning_rate": 4.622375090270432e-05, "loss": 1.6821, "step": 9546 }, { "epoch": 0.5321331029485535, "grad_norm": 0.5894103050231934, "learning_rate": 4.621487215762619e-05, "loss": 1.8067, "step": 9547 }, { "epoch": 0.5321888412017167, "grad_norm": 0.5290741920471191, "learning_rate": 4.620599353259023e-05, "loss": 1.4202, "step": 9548 }, { "epoch": 0.5322445794548799, "grad_norm": 0.6190316081047058, "learning_rate": 4.619711502787805e-05, "loss": 1.832, "step": 9549 }, { "epoch": 0.532300317708043, "grad_norm": 0.5280694365501404, "learning_rate": 4.618823664377121e-05, "loss": 1.6861, "step": 9550 }, { "epoch": 0.5323560559612062, "grad_norm": 0.5568878054618835, "learning_rate": 4.6179358380551255e-05, "loss": 1.6477, "step": 9551 }, { "epoch": 0.5324117942143693, "grad_norm": 0.6240448951721191, "learning_rate": 4.617048023849981e-05, "loss": 1.8258, "step": 9552 }, { "epoch": 0.5324675324675324, "grad_norm": 0.6594541072845459, "learning_rate": 4.616160221789837e-05, "loss": 2.1279, "step": 9553 }, { "epoch": 0.5325232707206956, "grad_norm": 0.5653007626533508, "learning_rate": 4.615272431902857e-05, "loss": 1.8833, "step": 9554 }, { "epoch": 0.5325790089738588, "grad_norm": 0.5489795207977295, "learning_rate": 4.614384654217189e-05, "loss": 1.5675, "step": 9555 }, { "epoch": 0.5326347472270219, "grad_norm": 0.5744782090187073, "learning_rate": 4.6134968887609915e-05, "loss": 1.7209, "step": 9556 }, { "epoch": 0.532690485480185, "grad_norm": 0.5887755751609802, "learning_rate": 4.6126091355624215e-05, "loss": 1.8778, "step": 9557 }, { "epoch": 0.5327462237333482, "grad_norm": 0.5370951890945435, "learning_rate": 4.611721394649629e-05, "loss": 1.6063, "step": 9558 }, { "epoch": 0.5328019619865113, "grad_norm": 0.5396353006362915, "learning_rate": 4.610833666050771e-05, "loss": 1.7225, "step": 9559 }, { "epoch": 0.5328577002396745, "grad_norm": 0.5278332233428955, "learning_rate": 4.609945949794e-05, "loss": 1.6031, "step": 9560 }, { "epoch": 0.5329134384928377, "grad_norm": 0.5371220707893372, "learning_rate": 4.60905824590747e-05, "loss": 1.519, "step": 9561 }, { "epoch": 0.5329691767460008, "grad_norm": 0.5495018362998962, "learning_rate": 4.60817055441933e-05, "loss": 1.5729, "step": 9562 }, { "epoch": 0.5330249149991639, "grad_norm": 0.5440717339515686, "learning_rate": 4.607282875357738e-05, "loss": 1.4728, "step": 9563 }, { "epoch": 0.5330806532523271, "grad_norm": 0.502078115940094, "learning_rate": 4.606395208750844e-05, "loss": 1.4295, "step": 9564 }, { "epoch": 0.5331363915054902, "grad_norm": 0.629677414894104, "learning_rate": 4.605507554626798e-05, "loss": 1.836, "step": 9565 }, { "epoch": 0.5331921297586534, "grad_norm": 0.5432531237602234, "learning_rate": 4.6046199130137536e-05, "loss": 1.6115, "step": 9566 }, { "epoch": 0.5332478680118166, "grad_norm": 0.5538272261619568, "learning_rate": 4.6037322839398586e-05, "loss": 1.5308, "step": 9567 }, { "epoch": 0.5333036062649796, "grad_norm": 0.6601541042327881, "learning_rate": 4.602844667433267e-05, "loss": 1.6254, "step": 9568 }, { "epoch": 0.5333593445181428, "grad_norm": 0.6120070219039917, "learning_rate": 4.601957063522125e-05, "loss": 1.7533, "step": 9569 }, { "epoch": 0.5334150827713059, "grad_norm": 0.6346586346626282, "learning_rate": 4.601069472234584e-05, "loss": 1.8627, "step": 9570 }, { "epoch": 0.5334708210244691, "grad_norm": 0.606587827205658, "learning_rate": 4.6001818935987954e-05, "loss": 1.8637, "step": 9571 }, { "epoch": 0.5335265592776323, "grad_norm": 0.6264762282371521, "learning_rate": 4.599294327642905e-05, "loss": 1.8194, "step": 9572 }, { "epoch": 0.5335822975307953, "grad_norm": 0.5685368180274963, "learning_rate": 4.598406774395063e-05, "loss": 1.7858, "step": 9573 }, { "epoch": 0.5336380357839585, "grad_norm": 0.5685484409332275, "learning_rate": 4.597519233883416e-05, "loss": 1.8332, "step": 9574 }, { "epoch": 0.5336937740371217, "grad_norm": 0.5560395121574402, "learning_rate": 4.596631706136113e-05, "loss": 1.7374, "step": 9575 }, { "epoch": 0.5337495122902848, "grad_norm": 0.5584644079208374, "learning_rate": 4.595744191181299e-05, "loss": 1.6939, "step": 9576 }, { "epoch": 0.533805250543448, "grad_norm": 0.5306901335716248, "learning_rate": 4.5948566890471226e-05, "loss": 1.5649, "step": 9577 }, { "epoch": 0.5338609887966111, "grad_norm": 0.5599258542060852, "learning_rate": 4.593969199761732e-05, "loss": 1.6355, "step": 9578 }, { "epoch": 0.5339167270497742, "grad_norm": 0.5288627743721008, "learning_rate": 4.593081723353267e-05, "loss": 1.5703, "step": 9579 }, { "epoch": 0.5339724653029374, "grad_norm": 0.5489224791526794, "learning_rate": 4.592194259849882e-05, "loss": 1.6562, "step": 9580 }, { "epoch": 0.5340282035561006, "grad_norm": 0.5630218386650085, "learning_rate": 4.591306809279714e-05, "loss": 1.3497, "step": 9581 }, { "epoch": 0.5340839418092637, "grad_norm": 0.5344558954238892, "learning_rate": 4.590419371670912e-05, "loss": 1.6694, "step": 9582 }, { "epoch": 0.5341396800624268, "grad_norm": 0.5834851861000061, "learning_rate": 4.5895319470516204e-05, "loss": 1.823, "step": 9583 }, { "epoch": 0.53419541831559, "grad_norm": 0.5720002055168152, "learning_rate": 4.5886445354499814e-05, "loss": 1.6351, "step": 9584 }, { "epoch": 0.5342511565687531, "grad_norm": 0.6013414859771729, "learning_rate": 4.58775713689414e-05, "loss": 1.6793, "step": 9585 }, { "epoch": 0.5343068948219163, "grad_norm": 0.5077610611915588, "learning_rate": 4.5868697514122384e-05, "loss": 1.5092, "step": 9586 }, { "epoch": 0.5343626330750795, "grad_norm": 0.5603808164596558, "learning_rate": 4.5859823790324194e-05, "loss": 1.7538, "step": 9587 }, { "epoch": 0.5344183713282425, "grad_norm": 0.5666378736495972, "learning_rate": 4.5850950197828247e-05, "loss": 1.7059, "step": 9588 }, { "epoch": 0.5344741095814057, "grad_norm": 0.6286757588386536, "learning_rate": 4.5842076736915974e-05, "loss": 1.9543, "step": 9589 }, { "epoch": 0.5345298478345689, "grad_norm": 0.5461648106575012, "learning_rate": 4.583320340786879e-05, "loss": 1.5304, "step": 9590 }, { "epoch": 0.534585586087732, "grad_norm": 0.5395976901054382, "learning_rate": 4.58243302109681e-05, "loss": 1.6066, "step": 9591 }, { "epoch": 0.5346413243408952, "grad_norm": 0.557267963886261, "learning_rate": 4.581545714649531e-05, "loss": 1.5607, "step": 9592 }, { "epoch": 0.5346970625940582, "grad_norm": 0.5258218050003052, "learning_rate": 4.5806584214731816e-05, "loss": 1.5174, "step": 9593 }, { "epoch": 0.5347528008472214, "grad_norm": 0.5536956787109375, "learning_rate": 4.579771141595903e-05, "loss": 1.5984, "step": 9594 }, { "epoch": 0.5348085391003846, "grad_norm": 0.591376781463623, "learning_rate": 4.578883875045833e-05, "loss": 1.7833, "step": 9595 }, { "epoch": 0.5348642773535477, "grad_norm": 0.5513203144073486, "learning_rate": 4.5779966218511094e-05, "loss": 1.8074, "step": 9596 }, { "epoch": 0.5349200156067109, "grad_norm": 0.5725258588790894, "learning_rate": 4.5771093820398756e-05, "loss": 1.6128, "step": 9597 }, { "epoch": 0.5349757538598741, "grad_norm": 0.6103070378303528, "learning_rate": 4.576222155640265e-05, "loss": 1.9675, "step": 9598 }, { "epoch": 0.5350314921130371, "grad_norm": 0.5657504796981812, "learning_rate": 4.5753349426804176e-05, "loss": 1.7877, "step": 9599 }, { "epoch": 0.5350872303662003, "grad_norm": 0.5394463539123535, "learning_rate": 4.574447743188469e-05, "loss": 1.7376, "step": 9600 }, { "epoch": 0.5351429686193635, "grad_norm": 0.582391619682312, "learning_rate": 4.573560557192558e-05, "loss": 1.5553, "step": 9601 }, { "epoch": 0.5351987068725266, "grad_norm": 0.5822402238845825, "learning_rate": 4.572673384720819e-05, "loss": 1.5464, "step": 9602 }, { "epoch": 0.5352544451256898, "grad_norm": 0.5555740594863892, "learning_rate": 4.571786225801388e-05, "loss": 1.7513, "step": 9603 }, { "epoch": 0.5353101833788529, "grad_norm": 0.5371900200843811, "learning_rate": 4.570899080462404e-05, "loss": 1.658, "step": 9604 }, { "epoch": 0.535365921632016, "grad_norm": 0.6092341542243958, "learning_rate": 4.570011948731996e-05, "loss": 1.7345, "step": 9605 }, { "epoch": 0.5354216598851792, "grad_norm": 0.5381090044975281, "learning_rate": 4.569124830638304e-05, "loss": 1.5001, "step": 9606 }, { "epoch": 0.5354773981383424, "grad_norm": 0.6168373227119446, "learning_rate": 4.568237726209459e-05, "loss": 1.8614, "step": 9607 }, { "epoch": 0.5355331363915055, "grad_norm": 0.5389047265052795, "learning_rate": 4.567350635473596e-05, "loss": 1.5247, "step": 9608 }, { "epoch": 0.5355888746446686, "grad_norm": 0.5485653877258301, "learning_rate": 4.56646355845885e-05, "loss": 1.4937, "step": 9609 }, { "epoch": 0.5356446128978318, "grad_norm": 0.5548982620239258, "learning_rate": 4.565576495193351e-05, "loss": 1.6458, "step": 9610 }, { "epoch": 0.5357003511509949, "grad_norm": 0.5642284750938416, "learning_rate": 4.564689445705233e-05, "loss": 1.6966, "step": 9611 }, { "epoch": 0.5357560894041581, "grad_norm": 0.5454595685005188, "learning_rate": 4.563802410022627e-05, "loss": 1.5728, "step": 9612 }, { "epoch": 0.5358118276573213, "grad_norm": 0.5459495782852173, "learning_rate": 4.562915388173668e-05, "loss": 1.5377, "step": 9613 }, { "epoch": 0.5358675659104843, "grad_norm": 0.5787219405174255, "learning_rate": 4.562028380186481e-05, "loss": 1.6114, "step": 9614 }, { "epoch": 0.5359233041636475, "grad_norm": 0.550297200679779, "learning_rate": 4.561141386089201e-05, "loss": 1.2671, "step": 9615 }, { "epoch": 0.5359790424168106, "grad_norm": 0.5597065687179565, "learning_rate": 4.560254405909959e-05, "loss": 1.5903, "step": 9616 }, { "epoch": 0.5360347806699738, "grad_norm": 0.5175857543945312, "learning_rate": 4.559367439676882e-05, "loss": 1.5522, "step": 9617 }, { "epoch": 0.536090518923137, "grad_norm": 0.5799580216407776, "learning_rate": 4.558480487418102e-05, "loss": 1.6349, "step": 9618 }, { "epoch": 0.5361462571763, "grad_norm": 0.536422610282898, "learning_rate": 4.557593549161746e-05, "loss": 1.7025, "step": 9619 }, { "epoch": 0.5362019954294632, "grad_norm": 0.534331738948822, "learning_rate": 4.556706624935944e-05, "loss": 1.5889, "step": 9620 }, { "epoch": 0.5362577336826264, "grad_norm": 0.5340269804000854, "learning_rate": 4.555819714768822e-05, "loss": 1.5922, "step": 9621 }, { "epoch": 0.5363134719357895, "grad_norm": 0.5497701168060303, "learning_rate": 4.554932818688508e-05, "loss": 1.4581, "step": 9622 }, { "epoch": 0.5363692101889527, "grad_norm": 0.6451365947723389, "learning_rate": 4.554045936723132e-05, "loss": 1.8858, "step": 9623 }, { "epoch": 0.5364249484421159, "grad_norm": 0.5696703195571899, "learning_rate": 4.553159068900818e-05, "loss": 1.6621, "step": 9624 }, { "epoch": 0.5364806866952789, "grad_norm": 0.5307613611221313, "learning_rate": 4.552272215249694e-05, "loss": 1.6915, "step": 9625 }, { "epoch": 0.5365364249484421, "grad_norm": 0.5900824666023254, "learning_rate": 4.551385375797884e-05, "loss": 1.6064, "step": 9626 }, { "epoch": 0.5365921632016053, "grad_norm": 0.579522967338562, "learning_rate": 4.5504985505735154e-05, "loss": 1.721, "step": 9627 }, { "epoch": 0.5366479014547684, "grad_norm": 0.5785220861434937, "learning_rate": 4.5496117396047107e-05, "loss": 1.6382, "step": 9628 }, { "epoch": 0.5367036397079316, "grad_norm": 0.5899390578269958, "learning_rate": 4.5487249429195946e-05, "loss": 1.8254, "step": 9629 }, { "epoch": 0.5367593779610947, "grad_norm": 0.6377177238464355, "learning_rate": 4.5478381605462955e-05, "loss": 1.9322, "step": 9630 }, { "epoch": 0.5368151162142578, "grad_norm": 0.6333304047584534, "learning_rate": 4.54695139251293e-05, "loss": 1.6886, "step": 9631 }, { "epoch": 0.536870854467421, "grad_norm": 0.5392901301383972, "learning_rate": 4.546064638847628e-05, "loss": 1.6018, "step": 9632 }, { "epoch": 0.5369265927205842, "grad_norm": 0.5325821042060852, "learning_rate": 4.545177899578507e-05, "loss": 1.6915, "step": 9633 }, { "epoch": 0.5369823309737473, "grad_norm": 0.5551168918609619, "learning_rate": 4.544291174733692e-05, "loss": 1.6215, "step": 9634 }, { "epoch": 0.5370380692269104, "grad_norm": 0.6074517965316772, "learning_rate": 4.543404464341304e-05, "loss": 1.5721, "step": 9635 }, { "epoch": 0.5370938074800736, "grad_norm": 0.5474486947059631, "learning_rate": 4.5425177684294645e-05, "loss": 1.6407, "step": 9636 }, { "epoch": 0.5371495457332367, "grad_norm": 0.5517297387123108, "learning_rate": 4.541631087026294e-05, "loss": 1.6858, "step": 9637 }, { "epoch": 0.5372052839863999, "grad_norm": 0.5288307666778564, "learning_rate": 4.5407444201599115e-05, "loss": 1.4761, "step": 9638 }, { "epoch": 0.537261022239563, "grad_norm": 0.4987405836582184, "learning_rate": 4.539857767858441e-05, "loss": 1.6962, "step": 9639 }, { "epoch": 0.5373167604927261, "grad_norm": 0.5545489192008972, "learning_rate": 4.538971130149997e-05, "loss": 1.5394, "step": 9640 }, { "epoch": 0.5373724987458893, "grad_norm": 0.5300205945968628, "learning_rate": 4.538084507062702e-05, "loss": 1.629, "step": 9641 }, { "epoch": 0.5374282369990524, "grad_norm": 0.5651752352714539, "learning_rate": 4.537197898624673e-05, "loss": 1.5614, "step": 9642 }, { "epoch": 0.5374839752522156, "grad_norm": 0.56346195936203, "learning_rate": 4.536311304864028e-05, "loss": 1.532, "step": 9643 }, { "epoch": 0.5375397135053788, "grad_norm": 0.5524798035621643, "learning_rate": 4.5354247258088854e-05, "loss": 1.5766, "step": 9644 }, { "epoch": 0.5375954517585418, "grad_norm": 0.5094345808029175, "learning_rate": 4.534538161487362e-05, "loss": 1.727, "step": 9645 }, { "epoch": 0.537651190011705, "grad_norm": 0.5881072282791138, "learning_rate": 4.533651611927574e-05, "loss": 1.6667, "step": 9646 }, { "epoch": 0.5377069282648682, "grad_norm": 0.5255770087242126, "learning_rate": 4.532765077157637e-05, "loss": 1.6659, "step": 9647 }, { "epoch": 0.5377626665180313, "grad_norm": 0.6107676029205322, "learning_rate": 4.5318785572056674e-05, "loss": 1.6792, "step": 9648 }, { "epoch": 0.5378184047711945, "grad_norm": 0.596538245677948, "learning_rate": 4.530992052099782e-05, "loss": 1.5461, "step": 9649 }, { "epoch": 0.5378741430243577, "grad_norm": 0.5855775475502014, "learning_rate": 4.530105561868094e-05, "loss": 1.6144, "step": 9650 }, { "epoch": 0.5379298812775207, "grad_norm": 0.5489295721054077, "learning_rate": 4.529219086538718e-05, "loss": 1.7566, "step": 9651 }, { "epoch": 0.5379856195306839, "grad_norm": 0.5393614172935486, "learning_rate": 4.528332626139767e-05, "loss": 1.6659, "step": 9652 }, { "epoch": 0.5380413577838471, "grad_norm": 0.5832717418670654, "learning_rate": 4.527446180699356e-05, "loss": 1.7152, "step": 9653 }, { "epoch": 0.5380970960370102, "grad_norm": 0.5892272591590881, "learning_rate": 4.526559750245597e-05, "loss": 1.7, "step": 9654 }, { "epoch": 0.5381528342901734, "grad_norm": 0.5694185495376587, "learning_rate": 4.5256733348066e-05, "loss": 1.5537, "step": 9655 }, { "epoch": 0.5382085725433365, "grad_norm": 0.5511647462844849, "learning_rate": 4.524786934410483e-05, "loss": 1.5007, "step": 9656 }, { "epoch": 0.5382643107964996, "grad_norm": 0.5417333245277405, "learning_rate": 4.5239005490853505e-05, "loss": 1.7346, "step": 9657 }, { "epoch": 0.5383200490496628, "grad_norm": 0.5600014328956604, "learning_rate": 4.523014178859319e-05, "loss": 1.5088, "step": 9658 }, { "epoch": 0.538375787302826, "grad_norm": 0.5783017873764038, "learning_rate": 4.522127823760495e-05, "loss": 1.6899, "step": 9659 }, { "epoch": 0.538431525555989, "grad_norm": 0.5410140156745911, "learning_rate": 4.5212414838169905e-05, "loss": 1.564, "step": 9660 }, { "epoch": 0.5384872638091522, "grad_norm": 0.5686094164848328, "learning_rate": 4.520355159056917e-05, "loss": 1.7485, "step": 9661 }, { "epoch": 0.5385430020623153, "grad_norm": 0.5296582579612732, "learning_rate": 4.519468849508379e-05, "loss": 1.4511, "step": 9662 }, { "epoch": 0.5385987403154785, "grad_norm": 0.5661003589630127, "learning_rate": 4.5185825551994884e-05, "loss": 1.7853, "step": 9663 }, { "epoch": 0.5386544785686417, "grad_norm": 0.5485278964042664, "learning_rate": 4.5176962761583505e-05, "loss": 1.6016, "step": 9664 }, { "epoch": 0.5387102168218048, "grad_norm": 0.6073358654975891, "learning_rate": 4.5168100124130787e-05, "loss": 1.8608, "step": 9665 }, { "epoch": 0.5387659550749679, "grad_norm": 0.6006177663803101, "learning_rate": 4.515923763991772e-05, "loss": 1.6971, "step": 9666 }, { "epoch": 0.5388216933281311, "grad_norm": 0.6587806344032288, "learning_rate": 4.515037530922542e-05, "loss": 2.0262, "step": 9667 }, { "epoch": 0.5388774315812942, "grad_norm": 0.5338617563247681, "learning_rate": 4.5141513132334956e-05, "loss": 1.5727, "step": 9668 }, { "epoch": 0.5389331698344574, "grad_norm": 0.5626280307769775, "learning_rate": 4.513265110952736e-05, "loss": 1.7543, "step": 9669 }, { "epoch": 0.5389889080876206, "grad_norm": 0.5913511514663696, "learning_rate": 4.5123789241083696e-05, "loss": 1.7567, "step": 9670 }, { "epoch": 0.5390446463407836, "grad_norm": 0.5821614265441895, "learning_rate": 4.5114927527285e-05, "loss": 1.8279, "step": 9671 }, { "epoch": 0.5391003845939468, "grad_norm": 0.5600893497467041, "learning_rate": 4.510606596841233e-05, "loss": 1.7168, "step": 9672 }, { "epoch": 0.53915612284711, "grad_norm": 0.57114577293396, "learning_rate": 4.5097204564746705e-05, "loss": 1.8184, "step": 9673 }, { "epoch": 0.5392118611002731, "grad_norm": 0.6253485679626465, "learning_rate": 4.5088343316569156e-05, "loss": 1.5677, "step": 9674 }, { "epoch": 0.5392675993534363, "grad_norm": 0.5010294318199158, "learning_rate": 4.507948222416074e-05, "loss": 1.423, "step": 9675 }, { "epoch": 0.5393233376065995, "grad_norm": 0.5963042974472046, "learning_rate": 4.507062128780245e-05, "loss": 1.7762, "step": 9676 }, { "epoch": 0.5393790758597625, "grad_norm": 0.5247800946235657, "learning_rate": 4.506176050777532e-05, "loss": 1.5931, "step": 9677 }, { "epoch": 0.5394348141129257, "grad_norm": 0.5738952159881592, "learning_rate": 4.505289988436034e-05, "loss": 1.8376, "step": 9678 }, { "epoch": 0.5394905523660889, "grad_norm": 0.5756804347038269, "learning_rate": 4.504403941783855e-05, "loss": 1.695, "step": 9679 }, { "epoch": 0.539546290619252, "grad_norm": 0.5405778884887695, "learning_rate": 4.503517910849093e-05, "loss": 1.7173, "step": 9680 }, { "epoch": 0.5396020288724152, "grad_norm": 0.501308023929596, "learning_rate": 4.502631895659846e-05, "loss": 1.3706, "step": 9681 }, { "epoch": 0.5396577671255783, "grad_norm": 0.5409435629844666, "learning_rate": 4.501745896244219e-05, "loss": 1.6029, "step": 9682 }, { "epoch": 0.5397135053787414, "grad_norm": 0.6150081157684326, "learning_rate": 4.500859912630303e-05, "loss": 1.8895, "step": 9683 }, { "epoch": 0.5397692436319046, "grad_norm": 0.5330567359924316, "learning_rate": 4.499973944846204e-05, "loss": 1.5352, "step": 9684 }, { "epoch": 0.5398249818850677, "grad_norm": 0.5629265308380127, "learning_rate": 4.499087992920015e-05, "loss": 1.5303, "step": 9685 }, { "epoch": 0.5398807201382309, "grad_norm": 0.6040432453155518, "learning_rate": 4.4982020568798344e-05, "loss": 1.9174, "step": 9686 }, { "epoch": 0.539936458391394, "grad_norm": 0.5688749551773071, "learning_rate": 4.497316136753759e-05, "loss": 1.858, "step": 9687 }, { "epoch": 0.5399921966445571, "grad_norm": 0.5480324029922485, "learning_rate": 4.496430232569884e-05, "loss": 1.6544, "step": 9688 }, { "epoch": 0.5400479348977203, "grad_norm": 0.5562218427658081, "learning_rate": 4.4955443443563064e-05, "loss": 1.6606, "step": 9689 }, { "epoch": 0.5401036731508835, "grad_norm": 0.5424361824989319, "learning_rate": 4.49465847214112e-05, "loss": 1.5377, "step": 9690 }, { "epoch": 0.5401594114040466, "grad_norm": 0.6881382465362549, "learning_rate": 4.4937726159524235e-05, "loss": 1.6281, "step": 9691 }, { "epoch": 0.5402151496572097, "grad_norm": 0.5246618390083313, "learning_rate": 4.492886775818305e-05, "loss": 1.6659, "step": 9692 }, { "epoch": 0.5402708879103729, "grad_norm": 0.5500891804695129, "learning_rate": 4.492000951766862e-05, "loss": 1.5169, "step": 9693 }, { "epoch": 0.540326626163536, "grad_norm": 0.5503033399581909, "learning_rate": 4.4911151438261885e-05, "loss": 1.4738, "step": 9694 }, { "epoch": 0.5403823644166992, "grad_norm": 0.5710744857788086, "learning_rate": 4.4902293520243746e-05, "loss": 1.7376, "step": 9695 }, { "epoch": 0.5404381026698624, "grad_norm": 0.5603642463684082, "learning_rate": 4.489343576389514e-05, "loss": 1.7419, "step": 9696 }, { "epoch": 0.5404938409230254, "grad_norm": 0.60004723072052, "learning_rate": 4.488457816949697e-05, "loss": 1.4912, "step": 9697 }, { "epoch": 0.5405495791761886, "grad_norm": 0.5611134171485901, "learning_rate": 4.487572073733017e-05, "loss": 1.8182, "step": 9698 }, { "epoch": 0.5406053174293518, "grad_norm": 0.5735024809837341, "learning_rate": 4.48668634676756e-05, "loss": 1.7492, "step": 9699 }, { "epoch": 0.5406610556825149, "grad_norm": 0.5404465794563293, "learning_rate": 4.4858006360814215e-05, "loss": 1.7785, "step": 9700 }, { "epoch": 0.5407167939356781, "grad_norm": 0.5365709066390991, "learning_rate": 4.484914941702689e-05, "loss": 1.6768, "step": 9701 }, { "epoch": 0.5407725321888412, "grad_norm": 0.5550958514213562, "learning_rate": 4.484029263659451e-05, "loss": 1.6993, "step": 9702 }, { "epoch": 0.5408282704420043, "grad_norm": 0.5951088666915894, "learning_rate": 4.4831436019797976e-05, "loss": 1.882, "step": 9703 }, { "epoch": 0.5408840086951675, "grad_norm": 0.5994411110877991, "learning_rate": 4.482257956691814e-05, "loss": 1.8899, "step": 9704 }, { "epoch": 0.5409397469483307, "grad_norm": 0.5515292882919312, "learning_rate": 4.48137232782359e-05, "loss": 1.576, "step": 9705 }, { "epoch": 0.5409954852014938, "grad_norm": 0.5630680322647095, "learning_rate": 4.480486715403212e-05, "loss": 1.7682, "step": 9706 }, { "epoch": 0.541051223454657, "grad_norm": 0.608163058757782, "learning_rate": 4.479601119458765e-05, "loss": 1.8614, "step": 9707 }, { "epoch": 0.54110696170782, "grad_norm": 0.5568028688430786, "learning_rate": 4.478715540018339e-05, "loss": 1.9185, "step": 9708 }, { "epoch": 0.5411626999609832, "grad_norm": 0.5182965993881226, "learning_rate": 4.477829977110014e-05, "loss": 1.5266, "step": 9709 }, { "epoch": 0.5412184382141464, "grad_norm": 0.5429890751838684, "learning_rate": 4.4769444307618804e-05, "loss": 1.5938, "step": 9710 }, { "epoch": 0.5412741764673095, "grad_norm": 0.5619489550590515, "learning_rate": 4.476058901002018e-05, "loss": 1.7021, "step": 9711 }, { "epoch": 0.5413299147204726, "grad_norm": 0.6017050743103027, "learning_rate": 4.475173387858513e-05, "loss": 1.7607, "step": 9712 }, { "epoch": 0.5413856529736358, "grad_norm": 0.536908745765686, "learning_rate": 4.4742878913594485e-05, "loss": 1.6063, "step": 9713 }, { "epoch": 0.5414413912267989, "grad_norm": 0.5397683382034302, "learning_rate": 4.4734024115329066e-05, "loss": 1.5175, "step": 9714 }, { "epoch": 0.5414971294799621, "grad_norm": 0.6045666337013245, "learning_rate": 4.4725169484069706e-05, "loss": 1.9511, "step": 9715 }, { "epoch": 0.5415528677331253, "grad_norm": 0.5848411321640015, "learning_rate": 4.47163150200972e-05, "loss": 1.7098, "step": 9716 }, { "epoch": 0.5416086059862883, "grad_norm": 0.5425751209259033, "learning_rate": 4.4707460723692396e-05, "loss": 1.6926, "step": 9717 }, { "epoch": 0.5416643442394515, "grad_norm": 0.5430467128753662, "learning_rate": 4.469860659513606e-05, "loss": 1.5529, "step": 9718 }, { "epoch": 0.5417200824926147, "grad_norm": 0.5357252359390259, "learning_rate": 4.468975263470902e-05, "loss": 1.5383, "step": 9719 }, { "epoch": 0.5417758207457778, "grad_norm": 0.6040672659873962, "learning_rate": 4.468089884269207e-05, "loss": 1.437, "step": 9720 }, { "epoch": 0.541831558998941, "grad_norm": 0.5664768218994141, "learning_rate": 4.467204521936599e-05, "loss": 1.5692, "step": 9721 }, { "epoch": 0.5418872972521042, "grad_norm": 0.5839176774024963, "learning_rate": 4.466319176501159e-05, "loss": 1.7301, "step": 9722 }, { "epoch": 0.5419430355052672, "grad_norm": 0.6070646643638611, "learning_rate": 4.465433847990961e-05, "loss": 1.806, "step": 9723 }, { "epoch": 0.5419987737584304, "grad_norm": 0.6136497855186462, "learning_rate": 4.464548536434086e-05, "loss": 1.8704, "step": 9724 }, { "epoch": 0.5420545120115936, "grad_norm": 0.5477949976921082, "learning_rate": 4.463663241858607e-05, "loss": 1.5875, "step": 9725 }, { "epoch": 0.5421102502647567, "grad_norm": 0.5016915798187256, "learning_rate": 4.462777964292605e-05, "loss": 1.504, "step": 9726 }, { "epoch": 0.5421659885179199, "grad_norm": 0.588592529296875, "learning_rate": 4.461892703764154e-05, "loss": 1.7438, "step": 9727 }, { "epoch": 0.542221726771083, "grad_norm": 0.6035858392715454, "learning_rate": 4.461007460301328e-05, "loss": 1.8666, "step": 9728 }, { "epoch": 0.5422774650242461, "grad_norm": 0.5583263635635376, "learning_rate": 4.4601222339322045e-05, "loss": 1.456, "step": 9729 }, { "epoch": 0.5423332032774093, "grad_norm": 0.5258424878120422, "learning_rate": 4.459237024684855e-05, "loss": 1.4502, "step": 9730 }, { "epoch": 0.5423889415305724, "grad_norm": 0.59454345703125, "learning_rate": 4.458351832587354e-05, "loss": 1.9578, "step": 9731 }, { "epoch": 0.5424446797837356, "grad_norm": 0.5876555442810059, "learning_rate": 4.457466657667775e-05, "loss": 1.867, "step": 9732 }, { "epoch": 0.5425004180368987, "grad_norm": 0.5531097650527954, "learning_rate": 4.456581499954189e-05, "loss": 1.8218, "step": 9733 }, { "epoch": 0.5425561562900618, "grad_norm": 0.578526496887207, "learning_rate": 4.4556963594746724e-05, "loss": 1.7565, "step": 9734 }, { "epoch": 0.542611894543225, "grad_norm": 0.5239474177360535, "learning_rate": 4.4548112362572915e-05, "loss": 1.5784, "step": 9735 }, { "epoch": 0.5426676327963882, "grad_norm": 0.5566216707229614, "learning_rate": 4.45392613033012e-05, "loss": 1.6293, "step": 9736 }, { "epoch": 0.5427233710495513, "grad_norm": 0.5767298340797424, "learning_rate": 4.453041041721228e-05, "loss": 1.8317, "step": 9737 }, { "epoch": 0.5427791093027144, "grad_norm": 0.5398491621017456, "learning_rate": 4.452155970458686e-05, "loss": 1.7089, "step": 9738 }, { "epoch": 0.5428348475558776, "grad_norm": 0.5698423385620117, "learning_rate": 4.451270916570562e-05, "loss": 1.7275, "step": 9739 }, { "epoch": 0.5428905858090407, "grad_norm": 0.534481942653656, "learning_rate": 4.450385880084924e-05, "loss": 1.6992, "step": 9740 }, { "epoch": 0.5429463240622039, "grad_norm": 0.5307855606079102, "learning_rate": 4.4495008610298435e-05, "loss": 1.6468, "step": 9741 }, { "epoch": 0.5430020623153671, "grad_norm": 0.572459876537323, "learning_rate": 4.448615859433383e-05, "loss": 1.7285, "step": 9742 }, { "epoch": 0.5430578005685301, "grad_norm": 0.5302841067314148, "learning_rate": 4.447730875323617e-05, "loss": 1.5839, "step": 9743 }, { "epoch": 0.5431135388216933, "grad_norm": 0.5929808616638184, "learning_rate": 4.446845908728604e-05, "loss": 1.8845, "step": 9744 }, { "epoch": 0.5431692770748565, "grad_norm": 0.569716215133667, "learning_rate": 4.445960959676414e-05, "loss": 1.8191, "step": 9745 }, { "epoch": 0.5432250153280196, "grad_norm": 0.590050995349884, "learning_rate": 4.4450760281951134e-05, "loss": 1.8001, "step": 9746 }, { "epoch": 0.5432807535811828, "grad_norm": 0.5911651849746704, "learning_rate": 4.444191114312765e-05, "loss": 1.8329, "step": 9747 }, { "epoch": 0.543336491834346, "grad_norm": 0.5420973896980286, "learning_rate": 4.4433062180574356e-05, "loss": 1.4827, "step": 9748 }, { "epoch": 0.543392230087509, "grad_norm": 0.5806952118873596, "learning_rate": 4.442421339457185e-05, "loss": 1.5864, "step": 9749 }, { "epoch": 0.5434479683406722, "grad_norm": 0.5456143021583557, "learning_rate": 4.44153647854008e-05, "loss": 1.5616, "step": 9750 }, { "epoch": 0.5435037065938354, "grad_norm": 0.6005389094352722, "learning_rate": 4.44065163533418e-05, "loss": 1.773, "step": 9751 }, { "epoch": 0.5435594448469985, "grad_norm": 0.6176022291183472, "learning_rate": 4.43976680986755e-05, "loss": 1.8526, "step": 9752 }, { "epoch": 0.5436151831001617, "grad_norm": 0.5599396824836731, "learning_rate": 4.438882002168251e-05, "loss": 1.5885, "step": 9753 }, { "epoch": 0.5436709213533247, "grad_norm": 0.5520609617233276, "learning_rate": 4.437997212264343e-05, "loss": 1.5476, "step": 9754 }, { "epoch": 0.5437266596064879, "grad_norm": 0.5556759834289551, "learning_rate": 4.437112440183887e-05, "loss": 1.8489, "step": 9755 }, { "epoch": 0.5437823978596511, "grad_norm": 0.5187088847160339, "learning_rate": 4.436227685954942e-05, "loss": 1.4991, "step": 9756 }, { "epoch": 0.5438381361128142, "grad_norm": 0.5788566470146179, "learning_rate": 4.4353429496055685e-05, "loss": 1.6384, "step": 9757 }, { "epoch": 0.5438938743659774, "grad_norm": 0.5231850147247314, "learning_rate": 4.4344582311638234e-05, "loss": 1.5669, "step": 9758 }, { "epoch": 0.5439496126191405, "grad_norm": 0.5520696043968201, "learning_rate": 4.4335735306577645e-05, "loss": 1.8168, "step": 9759 }, { "epoch": 0.5440053508723036, "grad_norm": 0.5291838049888611, "learning_rate": 4.432688848115455e-05, "loss": 1.449, "step": 9760 }, { "epoch": 0.5440610891254668, "grad_norm": 0.5278047323226929, "learning_rate": 4.431804183564944e-05, "loss": 1.7658, "step": 9761 }, { "epoch": 0.54411682737863, "grad_norm": 0.5484183430671692, "learning_rate": 4.430919537034293e-05, "loss": 1.6033, "step": 9762 }, { "epoch": 0.5441725656317931, "grad_norm": 0.5881717801094055, "learning_rate": 4.430034908551556e-05, "loss": 1.5546, "step": 9763 }, { "epoch": 0.5442283038849562, "grad_norm": 0.5577450394630432, "learning_rate": 4.429150298144789e-05, "loss": 1.6856, "step": 9764 }, { "epoch": 0.5442840421381194, "grad_norm": 0.5595176219940186, "learning_rate": 4.428265705842045e-05, "loss": 1.7699, "step": 9765 }, { "epoch": 0.5443397803912825, "grad_norm": 0.5696182250976562, "learning_rate": 4.42738113167138e-05, "loss": 1.5959, "step": 9766 }, { "epoch": 0.5443955186444457, "grad_norm": 0.5747469067573547, "learning_rate": 4.426496575660848e-05, "loss": 1.7393, "step": 9767 }, { "epoch": 0.5444512568976089, "grad_norm": 0.551275372505188, "learning_rate": 4.425612037838498e-05, "loss": 1.7109, "step": 9768 }, { "epoch": 0.5445069951507719, "grad_norm": 0.5111570358276367, "learning_rate": 4.424727518232389e-05, "loss": 1.5678, "step": 9769 }, { "epoch": 0.5445627334039351, "grad_norm": 0.5441057682037354, "learning_rate": 4.4238430168705655e-05, "loss": 1.6594, "step": 9770 }, { "epoch": 0.5446184716570983, "grad_norm": 0.5695107579231262, "learning_rate": 4.422958533781084e-05, "loss": 1.6752, "step": 9771 }, { "epoch": 0.5446742099102614, "grad_norm": 0.5917108058929443, "learning_rate": 4.422074068991994e-05, "loss": 1.6174, "step": 9772 }, { "epoch": 0.5447299481634246, "grad_norm": 0.577524721622467, "learning_rate": 4.4211896225313446e-05, "loss": 1.666, "step": 9773 }, { "epoch": 0.5447856864165878, "grad_norm": 0.566038191318512, "learning_rate": 4.420305194427186e-05, "loss": 1.7369, "step": 9774 }, { "epoch": 0.5448414246697508, "grad_norm": 0.5815591216087341, "learning_rate": 4.4194207847075655e-05, "loss": 1.6213, "step": 9775 }, { "epoch": 0.544897162922914, "grad_norm": 0.6219716668128967, "learning_rate": 4.4185363934005346e-05, "loss": 1.8705, "step": 9776 }, { "epoch": 0.5449529011760771, "grad_norm": 0.5532581806182861, "learning_rate": 4.417652020534137e-05, "loss": 1.8517, "step": 9777 }, { "epoch": 0.5450086394292403, "grad_norm": 0.5168758034706116, "learning_rate": 4.416767666136422e-05, "loss": 1.4589, "step": 9778 }, { "epoch": 0.5450643776824035, "grad_norm": 0.5540144443511963, "learning_rate": 4.415883330235438e-05, "loss": 1.6545, "step": 9779 }, { "epoch": 0.5451201159355665, "grad_norm": 0.5852721333503723, "learning_rate": 4.4149990128592275e-05, "loss": 1.951, "step": 9780 }, { "epoch": 0.5451758541887297, "grad_norm": 0.5823214650154114, "learning_rate": 4.41411471403584e-05, "loss": 1.7445, "step": 9781 }, { "epoch": 0.5452315924418929, "grad_norm": 0.7067981958389282, "learning_rate": 4.413230433793315e-05, "loss": 1.8898, "step": 9782 }, { "epoch": 0.545287330695056, "grad_norm": 0.5201447010040283, "learning_rate": 4.4123461721597016e-05, "loss": 1.7319, "step": 9783 }, { "epoch": 0.5453430689482192, "grad_norm": 0.5641838908195496, "learning_rate": 4.41146192916304e-05, "loss": 1.7698, "step": 9784 }, { "epoch": 0.5453988072013823, "grad_norm": 0.6753969192504883, "learning_rate": 4.4105777048313734e-05, "loss": 1.8118, "step": 9785 }, { "epoch": 0.5454545454545454, "grad_norm": 0.5199365019798279, "learning_rate": 4.409693499192748e-05, "loss": 1.6101, "step": 9786 }, { "epoch": 0.5455102837077086, "grad_norm": 0.5644820928573608, "learning_rate": 4.408809312275202e-05, "loss": 1.55, "step": 9787 }, { "epoch": 0.5455660219608718, "grad_norm": 0.5578990578651428, "learning_rate": 4.407925144106778e-05, "loss": 1.7077, "step": 9788 }, { "epoch": 0.5456217602140349, "grad_norm": 0.5619536638259888, "learning_rate": 4.4070409947155164e-05, "loss": 1.736, "step": 9789 }, { "epoch": 0.545677498467198, "grad_norm": 0.5421947836875916, "learning_rate": 4.406156864129458e-05, "loss": 1.5738, "step": 9790 }, { "epoch": 0.5457332367203612, "grad_norm": 0.5277280211448669, "learning_rate": 4.40527275237664e-05, "loss": 1.6053, "step": 9791 }, { "epoch": 0.5457889749735243, "grad_norm": 0.5307068228721619, "learning_rate": 4.404388659485102e-05, "loss": 1.3866, "step": 9792 }, { "epoch": 0.5458447132266875, "grad_norm": 0.6040915250778198, "learning_rate": 4.403504585482886e-05, "loss": 1.8915, "step": 9793 }, { "epoch": 0.5459004514798507, "grad_norm": 0.592362105846405, "learning_rate": 4.402620530398024e-05, "loss": 1.7637, "step": 9794 }, { "epoch": 0.5459561897330137, "grad_norm": 0.5996968150138855, "learning_rate": 4.401736494258559e-05, "loss": 1.6987, "step": 9795 }, { "epoch": 0.5460119279861769, "grad_norm": 0.5434197187423706, "learning_rate": 4.400852477092521e-05, "loss": 1.4462, "step": 9796 }, { "epoch": 0.5460676662393401, "grad_norm": 0.5243266224861145, "learning_rate": 4.399968478927949e-05, "loss": 1.5191, "step": 9797 }, { "epoch": 0.5461234044925032, "grad_norm": 0.5348801016807556, "learning_rate": 4.399084499792882e-05, "loss": 1.6671, "step": 9798 }, { "epoch": 0.5461791427456664, "grad_norm": 0.5467276573181152, "learning_rate": 4.398200539715348e-05, "loss": 1.6921, "step": 9799 }, { "epoch": 0.5462348809988294, "grad_norm": 0.5606840252876282, "learning_rate": 4.397316598723385e-05, "loss": 1.7919, "step": 9800 }, { "epoch": 0.5462906192519926, "grad_norm": 0.6401974558830261, "learning_rate": 4.396432676845026e-05, "loss": 1.8814, "step": 9801 }, { "epoch": 0.5463463575051558, "grad_norm": 0.5535458326339722, "learning_rate": 4.395548774108304e-05, "loss": 1.5737, "step": 9802 }, { "epoch": 0.5464020957583189, "grad_norm": 0.5207072496414185, "learning_rate": 4.3946648905412486e-05, "loss": 1.6611, "step": 9803 }, { "epoch": 0.5464578340114821, "grad_norm": 0.5882294178009033, "learning_rate": 4.393781026171894e-05, "loss": 1.7487, "step": 9804 }, { "epoch": 0.5465135722646453, "grad_norm": 0.5833045244216919, "learning_rate": 4.392897181028273e-05, "loss": 1.5144, "step": 9805 }, { "epoch": 0.5465693105178083, "grad_norm": 0.5518405437469482, "learning_rate": 4.392013355138411e-05, "loss": 1.4697, "step": 9806 }, { "epoch": 0.5466250487709715, "grad_norm": 0.530725359916687, "learning_rate": 4.391129548530343e-05, "loss": 1.5778, "step": 9807 }, { "epoch": 0.5466807870241347, "grad_norm": 0.5344696640968323, "learning_rate": 4.390245761232094e-05, "loss": 1.692, "step": 9808 }, { "epoch": 0.5467365252772978, "grad_norm": 0.5627099275588989, "learning_rate": 4.389361993271696e-05, "loss": 1.6905, "step": 9809 }, { "epoch": 0.546792263530461, "grad_norm": 0.5395922660827637, "learning_rate": 4.388478244677174e-05, "loss": 1.6399, "step": 9810 }, { "epoch": 0.5468480017836241, "grad_norm": 0.5499055981636047, "learning_rate": 4.387594515476555e-05, "loss": 1.6705, "step": 9811 }, { "epoch": 0.5469037400367872, "grad_norm": 0.5418950319290161, "learning_rate": 4.386710805697871e-05, "loss": 1.2681, "step": 9812 }, { "epoch": 0.5469594782899504, "grad_norm": 0.6052936315536499, "learning_rate": 4.385827115369142e-05, "loss": 1.7306, "step": 9813 }, { "epoch": 0.5470152165431136, "grad_norm": 0.5980737805366516, "learning_rate": 4.3849434445183977e-05, "loss": 1.7266, "step": 9814 }, { "epoch": 0.5470709547962767, "grad_norm": 0.610704243183136, "learning_rate": 4.38405979317366e-05, "loss": 1.7676, "step": 9815 }, { "epoch": 0.5471266930494398, "grad_norm": 0.5389671921730042, "learning_rate": 4.383176161362956e-05, "loss": 1.5504, "step": 9816 }, { "epoch": 0.547182431302603, "grad_norm": 0.540878415107727, "learning_rate": 4.382292549114306e-05, "loss": 1.5915, "step": 9817 }, { "epoch": 0.5472381695557661, "grad_norm": 0.5596631765365601, "learning_rate": 4.381408956455734e-05, "loss": 1.7682, "step": 9818 }, { "epoch": 0.5472939078089293, "grad_norm": 0.5934588313102722, "learning_rate": 4.380525383415267e-05, "loss": 1.7587, "step": 9819 }, { "epoch": 0.5473496460620925, "grad_norm": 0.5937252044677734, "learning_rate": 4.3796418300209194e-05, "loss": 1.8327, "step": 9820 }, { "epoch": 0.5474053843152555, "grad_norm": 0.5708996057510376, "learning_rate": 4.378758296300719e-05, "loss": 1.7339, "step": 9821 }, { "epoch": 0.5474611225684187, "grad_norm": 0.5516582727432251, "learning_rate": 4.377874782282681e-05, "loss": 1.7792, "step": 9822 }, { "epoch": 0.5475168608215818, "grad_norm": 0.5492805242538452, "learning_rate": 4.376991287994829e-05, "loss": 1.5596, "step": 9823 }, { "epoch": 0.547572599074745, "grad_norm": 0.5498988628387451, "learning_rate": 4.376107813465181e-05, "loss": 1.5048, "step": 9824 }, { "epoch": 0.5476283373279082, "grad_norm": 0.5600182414054871, "learning_rate": 4.3752243587217546e-05, "loss": 1.6238, "step": 9825 }, { "epoch": 0.5476840755810712, "grad_norm": 0.6144223213195801, "learning_rate": 4.37434092379257e-05, "loss": 1.5942, "step": 9826 }, { "epoch": 0.5477398138342344, "grad_norm": 0.580475389957428, "learning_rate": 4.3734575087056425e-05, "loss": 1.6875, "step": 9827 }, { "epoch": 0.5477955520873976, "grad_norm": 0.5507834553718567, "learning_rate": 4.372574113488991e-05, "loss": 1.4169, "step": 9828 }, { "epoch": 0.5478512903405607, "grad_norm": 0.6568073034286499, "learning_rate": 4.371690738170628e-05, "loss": 1.7524, "step": 9829 }, { "epoch": 0.5479070285937239, "grad_norm": 0.5925707817077637, "learning_rate": 4.370807382778573e-05, "loss": 1.859, "step": 9830 }, { "epoch": 0.547962766846887, "grad_norm": 0.5850409269332886, "learning_rate": 4.36992404734084e-05, "loss": 1.4963, "step": 9831 }, { "epoch": 0.5480185051000501, "grad_norm": 0.5406137108802795, "learning_rate": 4.3690407318854416e-05, "loss": 1.4895, "step": 9832 }, { "epoch": 0.5480742433532133, "grad_norm": 0.5430670380592346, "learning_rate": 4.3681574364403936e-05, "loss": 1.6322, "step": 9833 }, { "epoch": 0.5481299816063765, "grad_norm": 0.5720853805541992, "learning_rate": 4.367274161033707e-05, "loss": 1.5524, "step": 9834 }, { "epoch": 0.5481857198595396, "grad_norm": 0.5545953512191772, "learning_rate": 4.366390905693396e-05, "loss": 1.5659, "step": 9835 }, { "epoch": 0.5482414581127028, "grad_norm": 0.5208585858345032, "learning_rate": 4.36550767044747e-05, "loss": 1.5188, "step": 9836 }, { "epoch": 0.5482971963658659, "grad_norm": 0.5306513905525208, "learning_rate": 4.36462445532394e-05, "loss": 1.6353, "step": 9837 }, { "epoch": 0.548352934619029, "grad_norm": 0.5258748531341553, "learning_rate": 4.3637412603508206e-05, "loss": 1.6311, "step": 9838 }, { "epoch": 0.5484086728721922, "grad_norm": 0.5057768821716309, "learning_rate": 4.3628580855561176e-05, "loss": 1.5426, "step": 9839 }, { "epoch": 0.5484644111253554, "grad_norm": 0.5385904312133789, "learning_rate": 4.361974930967842e-05, "loss": 1.7261, "step": 9840 }, { "epoch": 0.5485201493785185, "grad_norm": 0.5567389726638794, "learning_rate": 4.361091796614001e-05, "loss": 1.6934, "step": 9841 }, { "epoch": 0.5485758876316816, "grad_norm": 0.5337814092636108, "learning_rate": 4.360208682522603e-05, "loss": 1.6504, "step": 9842 }, { "epoch": 0.5486316258848448, "grad_norm": 0.5739735960960388, "learning_rate": 4.3593255887216555e-05, "loss": 1.5194, "step": 9843 }, { "epoch": 0.5486873641380079, "grad_norm": 0.5616000890731812, "learning_rate": 4.358442515239164e-05, "loss": 1.7104, "step": 9844 }, { "epoch": 0.5487431023911711, "grad_norm": 0.5667536854743958, "learning_rate": 4.357559462103138e-05, "loss": 1.547, "step": 9845 }, { "epoch": 0.5487988406443342, "grad_norm": 0.5744796991348267, "learning_rate": 4.3566764293415776e-05, "loss": 1.675, "step": 9846 }, { "epoch": 0.5488545788974973, "grad_norm": 0.5584723353385925, "learning_rate": 4.355793416982492e-05, "loss": 1.6043, "step": 9847 }, { "epoch": 0.5489103171506605, "grad_norm": 0.5384577512741089, "learning_rate": 4.354910425053881e-05, "loss": 1.7647, "step": 9848 }, { "epoch": 0.5489660554038236, "grad_norm": 0.5378496646881104, "learning_rate": 4.3540274535837524e-05, "loss": 1.5597, "step": 9849 }, { "epoch": 0.5490217936569868, "grad_norm": 0.5346800088882446, "learning_rate": 4.353144502600105e-05, "loss": 1.6489, "step": 9850 }, { "epoch": 0.54907753191015, "grad_norm": 0.5737703442573547, "learning_rate": 4.3522615721309436e-05, "loss": 1.7917, "step": 9851 }, { "epoch": 0.549133270163313, "grad_norm": 0.5731632113456726, "learning_rate": 4.3513786622042685e-05, "loss": 1.7261, "step": 9852 }, { "epoch": 0.5491890084164762, "grad_norm": 0.5530697107315063, "learning_rate": 4.35049577284808e-05, "loss": 1.6477, "step": 9853 }, { "epoch": 0.5492447466696394, "grad_norm": 0.5312392711639404, "learning_rate": 4.3496129040903795e-05, "loss": 1.5079, "step": 9854 }, { "epoch": 0.5493004849228025, "grad_norm": 0.5439527630805969, "learning_rate": 4.348730055959164e-05, "loss": 1.6616, "step": 9855 }, { "epoch": 0.5493562231759657, "grad_norm": 0.6246342062950134, "learning_rate": 4.3478472284824346e-05, "loss": 1.8967, "step": 9856 }, { "epoch": 0.5494119614291288, "grad_norm": 0.5579057335853577, "learning_rate": 4.3469644216881893e-05, "loss": 1.8138, "step": 9857 }, { "epoch": 0.5494676996822919, "grad_norm": 0.5730265378952026, "learning_rate": 4.346081635604425e-05, "loss": 1.6882, "step": 9858 }, { "epoch": 0.5495234379354551, "grad_norm": 0.6028726696968079, "learning_rate": 4.34519887025914e-05, "loss": 1.8155, "step": 9859 }, { "epoch": 0.5495791761886183, "grad_norm": 0.5326104164123535, "learning_rate": 4.3443161256803264e-05, "loss": 1.5754, "step": 9860 }, { "epoch": 0.5496349144417814, "grad_norm": 0.5944668650627136, "learning_rate": 4.343433401895984e-05, "loss": 1.6735, "step": 9861 }, { "epoch": 0.5496906526949445, "grad_norm": 0.5546173453330994, "learning_rate": 4.342550698934106e-05, "loss": 1.5815, "step": 9862 }, { "epoch": 0.5497463909481077, "grad_norm": 0.5735986232757568, "learning_rate": 4.3416680168226855e-05, "loss": 1.5027, "step": 9863 }, { "epoch": 0.5498021292012708, "grad_norm": 0.5421950817108154, "learning_rate": 4.340785355589718e-05, "loss": 1.6578, "step": 9864 }, { "epoch": 0.549857867454434, "grad_norm": 0.5298879146575928, "learning_rate": 4.339902715263195e-05, "loss": 1.4347, "step": 9865 }, { "epoch": 0.5499136057075972, "grad_norm": 0.574774444103241, "learning_rate": 4.339020095871111e-05, "loss": 1.6696, "step": 9866 }, { "epoch": 0.5499693439607602, "grad_norm": 0.5394953489303589, "learning_rate": 4.338137497441454e-05, "loss": 1.6238, "step": 9867 }, { "epoch": 0.5500250822139234, "grad_norm": 0.574478030204773, "learning_rate": 4.337254920002218e-05, "loss": 1.6317, "step": 9868 }, { "epoch": 0.5500808204670865, "grad_norm": 0.5428669452667236, "learning_rate": 4.336372363581391e-05, "loss": 1.4349, "step": 9869 }, { "epoch": 0.5501365587202497, "grad_norm": 0.49491697549819946, "learning_rate": 4.3354898282069624e-05, "loss": 1.4318, "step": 9870 }, { "epoch": 0.5501922969734129, "grad_norm": 0.5415276288986206, "learning_rate": 4.3346073139069245e-05, "loss": 1.7328, "step": 9871 }, { "epoch": 0.550248035226576, "grad_norm": 0.5677903890609741, "learning_rate": 4.3337248207092604e-05, "loss": 1.6179, "step": 9872 }, { "epoch": 0.5503037734797391, "grad_norm": 0.544323742389679, "learning_rate": 4.332842348641962e-05, "loss": 1.7354, "step": 9873 }, { "epoch": 0.5503595117329023, "grad_norm": 0.5574266910552979, "learning_rate": 4.331959897733015e-05, "loss": 1.4075, "step": 9874 }, { "epoch": 0.5504152499860654, "grad_norm": 0.566932201385498, "learning_rate": 4.3310774680104045e-05, "loss": 1.7368, "step": 9875 }, { "epoch": 0.5504709882392286, "grad_norm": 0.5502132773399353, "learning_rate": 4.330195059502116e-05, "loss": 1.6125, "step": 9876 }, { "epoch": 0.5505267264923918, "grad_norm": 0.5705932974815369, "learning_rate": 4.329312672236136e-05, "loss": 1.5961, "step": 9877 }, { "epoch": 0.5505824647455548, "grad_norm": 0.5607555508613586, "learning_rate": 4.328430306240447e-05, "loss": 1.7871, "step": 9878 }, { "epoch": 0.550638202998718, "grad_norm": 0.5171093344688416, "learning_rate": 4.327547961543032e-05, "loss": 1.6864, "step": 9879 }, { "epoch": 0.5506939412518812, "grad_norm": 0.5639234185218811, "learning_rate": 4.3266656381718776e-05, "loss": 1.503, "step": 9880 }, { "epoch": 0.5507496795050443, "grad_norm": 0.5581746697425842, "learning_rate": 4.325783336154961e-05, "loss": 1.6596, "step": 9881 }, { "epoch": 0.5508054177582075, "grad_norm": 0.5529181957244873, "learning_rate": 4.324901055520266e-05, "loss": 1.5023, "step": 9882 }, { "epoch": 0.5508611560113706, "grad_norm": 0.5628426671028137, "learning_rate": 4.324018796295776e-05, "loss": 1.6991, "step": 9883 }, { "epoch": 0.5509168942645337, "grad_norm": 0.6470309495925903, "learning_rate": 4.323136558509466e-05, "loss": 1.5738, "step": 9884 }, { "epoch": 0.5509726325176969, "grad_norm": 0.5868264436721802, "learning_rate": 4.3222543421893205e-05, "loss": 1.8939, "step": 9885 }, { "epoch": 0.5510283707708601, "grad_norm": 0.5328113436698914, "learning_rate": 4.3213721473633136e-05, "loss": 1.3017, "step": 9886 }, { "epoch": 0.5510841090240232, "grad_norm": 0.5659006237983704, "learning_rate": 4.320489974059426e-05, "loss": 1.6982, "step": 9887 }, { "epoch": 0.5511398472771863, "grad_norm": 0.5908814072608948, "learning_rate": 4.3196078223056346e-05, "loss": 1.75, "step": 9888 }, { "epoch": 0.5511955855303495, "grad_norm": 0.5215436220169067, "learning_rate": 4.3187256921299155e-05, "loss": 1.6721, "step": 9889 }, { "epoch": 0.5512513237835126, "grad_norm": 0.5671520233154297, "learning_rate": 4.317843583560246e-05, "loss": 1.6611, "step": 9890 }, { "epoch": 0.5513070620366758, "grad_norm": 0.568788468837738, "learning_rate": 4.316961496624601e-05, "loss": 1.5042, "step": 9891 }, { "epoch": 0.5513628002898389, "grad_norm": 0.5743429064750671, "learning_rate": 4.316079431350956e-05, "loss": 1.5001, "step": 9892 }, { "epoch": 0.551418538543002, "grad_norm": 0.5083088278770447, "learning_rate": 4.3151973877672815e-05, "loss": 1.3369, "step": 9893 }, { "epoch": 0.5514742767961652, "grad_norm": 0.5570357441902161, "learning_rate": 4.314315365901555e-05, "loss": 1.7525, "step": 9894 }, { "epoch": 0.5515300150493283, "grad_norm": 0.5853736400604248, "learning_rate": 4.3134333657817464e-05, "loss": 1.7689, "step": 9895 }, { "epoch": 0.5515857533024915, "grad_norm": 0.5641288161277771, "learning_rate": 4.312551387435827e-05, "loss": 1.8022, "step": 9896 }, { "epoch": 0.5516414915556547, "grad_norm": 0.5889329314231873, "learning_rate": 4.311669430891773e-05, "loss": 1.7209, "step": 9897 }, { "epoch": 0.5516972298088177, "grad_norm": 0.5651081800460815, "learning_rate": 4.310787496177548e-05, "loss": 1.6729, "step": 9898 }, { "epoch": 0.5517529680619809, "grad_norm": 0.5457670092582703, "learning_rate": 4.309905583321128e-05, "loss": 1.4496, "step": 9899 }, { "epoch": 0.5518087063151441, "grad_norm": 0.5131945013999939, "learning_rate": 4.309023692350478e-05, "loss": 1.5063, "step": 9900 }, { "epoch": 0.5518644445683072, "grad_norm": 0.5401031374931335, "learning_rate": 4.3081418232935687e-05, "loss": 1.5095, "step": 9901 }, { "epoch": 0.5519201828214704, "grad_norm": 0.6112632155418396, "learning_rate": 4.3072599761783664e-05, "loss": 1.7476, "step": 9902 }, { "epoch": 0.5519759210746336, "grad_norm": 0.5150609612464905, "learning_rate": 4.306378151032838e-05, "loss": 1.3913, "step": 9903 }, { "epoch": 0.5520316593277966, "grad_norm": 0.5849746465682983, "learning_rate": 4.3054963478849517e-05, "loss": 1.6125, "step": 9904 }, { "epoch": 0.5520873975809598, "grad_norm": 0.6250925064086914, "learning_rate": 4.30461456676267e-05, "loss": 1.8131, "step": 9905 }, { "epoch": 0.552143135834123, "grad_norm": 0.49495401978492737, "learning_rate": 4.303732807693963e-05, "loss": 1.4352, "step": 9906 }, { "epoch": 0.5521988740872861, "grad_norm": 0.6217641234397888, "learning_rate": 4.3028510707067885e-05, "loss": 1.9657, "step": 9907 }, { "epoch": 0.5522546123404493, "grad_norm": 0.6217669248580933, "learning_rate": 4.3019693558291144e-05, "loss": 1.8648, "step": 9908 }, { "epoch": 0.5523103505936124, "grad_norm": 0.5340979099273682, "learning_rate": 4.301087663088904e-05, "loss": 1.4133, "step": 9909 }, { "epoch": 0.5523660888467755, "grad_norm": 0.5489256381988525, "learning_rate": 4.300205992514117e-05, "loss": 1.8159, "step": 9910 }, { "epoch": 0.5524218270999387, "grad_norm": 0.5621556639671326, "learning_rate": 4.299324344132717e-05, "loss": 1.7179, "step": 9911 }, { "epoch": 0.5524775653531019, "grad_norm": 0.5325203537940979, "learning_rate": 4.298442717972662e-05, "loss": 1.6968, "step": 9912 }, { "epoch": 0.552533303606265, "grad_norm": 0.5561079978942871, "learning_rate": 4.297561114061915e-05, "loss": 1.6225, "step": 9913 }, { "epoch": 0.5525890418594281, "grad_norm": 0.566832423210144, "learning_rate": 4.2966795324284324e-05, "loss": 1.4759, "step": 9914 }, { "epoch": 0.5526447801125912, "grad_norm": 0.647016167640686, "learning_rate": 4.295797973100174e-05, "loss": 1.456, "step": 9915 }, { "epoch": 0.5527005183657544, "grad_norm": 0.5589674711227417, "learning_rate": 4.2949164361051e-05, "loss": 1.625, "step": 9916 }, { "epoch": 0.5527562566189176, "grad_norm": 0.5907155275344849, "learning_rate": 4.294034921471164e-05, "loss": 1.3695, "step": 9917 }, { "epoch": 0.5528119948720807, "grad_norm": 0.6016174554824829, "learning_rate": 4.2931534292263264e-05, "loss": 1.748, "step": 9918 }, { "epoch": 0.5528677331252438, "grad_norm": 0.593099057674408, "learning_rate": 4.292271959398539e-05, "loss": 1.7037, "step": 9919 }, { "epoch": 0.552923471378407, "grad_norm": 0.6433031558990479, "learning_rate": 4.2913905120157596e-05, "loss": 1.9699, "step": 9920 }, { "epoch": 0.5529792096315701, "grad_norm": 0.5927780270576477, "learning_rate": 4.290509087105942e-05, "loss": 1.7382, "step": 9921 }, { "epoch": 0.5530349478847333, "grad_norm": 0.5874158143997192, "learning_rate": 4.289627684697037e-05, "loss": 1.5503, "step": 9922 }, { "epoch": 0.5530906861378965, "grad_norm": 0.5684481263160706, "learning_rate": 4.288746304817004e-05, "loss": 1.3335, "step": 9923 }, { "epoch": 0.5531464243910595, "grad_norm": 0.6145278811454773, "learning_rate": 4.287864947493788e-05, "loss": 1.9235, "step": 9924 }, { "epoch": 0.5532021626442227, "grad_norm": 0.5604020953178406, "learning_rate": 4.286983612755345e-05, "loss": 1.7431, "step": 9925 }, { "epoch": 0.5532579008973859, "grad_norm": 0.5660736560821533, "learning_rate": 4.2861023006296245e-05, "loss": 1.6642, "step": 9926 }, { "epoch": 0.553313639150549, "grad_norm": 0.5653099417686462, "learning_rate": 4.285221011144578e-05, "loss": 1.5847, "step": 9927 }, { "epoch": 0.5533693774037122, "grad_norm": 0.5528052449226379, "learning_rate": 4.284339744328152e-05, "loss": 1.6667, "step": 9928 }, { "epoch": 0.5534251156568754, "grad_norm": 0.5655755996704102, "learning_rate": 4.283458500208297e-05, "loss": 1.6661, "step": 9929 }, { "epoch": 0.5534808539100384, "grad_norm": 0.5706286430358887, "learning_rate": 4.282577278812962e-05, "loss": 1.57, "step": 9930 }, { "epoch": 0.5535365921632016, "grad_norm": 0.5573133230209351, "learning_rate": 4.28169608017009e-05, "loss": 1.6694, "step": 9931 }, { "epoch": 0.5535923304163648, "grad_norm": 0.5579492449760437, "learning_rate": 4.2808149043076337e-05, "loss": 1.8431, "step": 9932 }, { "epoch": 0.5536480686695279, "grad_norm": 0.5701257586479187, "learning_rate": 4.279933751253533e-05, "loss": 1.7697, "step": 9933 }, { "epoch": 0.553703806922691, "grad_norm": 0.586942195892334, "learning_rate": 4.279052621035738e-05, "loss": 1.6883, "step": 9934 }, { "epoch": 0.5537595451758542, "grad_norm": 0.6071811318397522, "learning_rate": 4.2781715136821874e-05, "loss": 1.88, "step": 9935 }, { "epoch": 0.5538152834290173, "grad_norm": 0.6040579080581665, "learning_rate": 4.277290429220829e-05, "loss": 1.9303, "step": 9936 }, { "epoch": 0.5538710216821805, "grad_norm": 0.554654598236084, "learning_rate": 4.276409367679605e-05, "loss": 1.7339, "step": 9937 }, { "epoch": 0.5539267599353436, "grad_norm": 0.5604990720748901, "learning_rate": 4.275528329086457e-05, "loss": 1.8366, "step": 9938 }, { "epoch": 0.5539824981885068, "grad_norm": 0.5396780967712402, "learning_rate": 4.274647313469326e-05, "loss": 1.5683, "step": 9939 }, { "epoch": 0.5540382364416699, "grad_norm": 0.5582244396209717, "learning_rate": 4.273766320856152e-05, "loss": 1.7331, "step": 9940 }, { "epoch": 0.554093974694833, "grad_norm": 0.5684306621551514, "learning_rate": 4.2728853512748774e-05, "loss": 1.6732, "step": 9941 }, { "epoch": 0.5541497129479962, "grad_norm": 0.5707295536994934, "learning_rate": 4.272004404753441e-05, "loss": 1.7369, "step": 9942 }, { "epoch": 0.5542054512011594, "grad_norm": 0.5841218829154968, "learning_rate": 4.271123481319779e-05, "loss": 1.5369, "step": 9943 }, { "epoch": 0.5542611894543225, "grad_norm": 0.5583487749099731, "learning_rate": 4.2702425810018314e-05, "loss": 1.9257, "step": 9944 }, { "epoch": 0.5543169277074856, "grad_norm": 0.6115777492523193, "learning_rate": 4.269361703827533e-05, "loss": 1.6092, "step": 9945 }, { "epoch": 0.5543726659606488, "grad_norm": 0.5527899861335754, "learning_rate": 4.268480849824824e-05, "loss": 1.5267, "step": 9946 }, { "epoch": 0.5544284042138119, "grad_norm": 0.5732879638671875, "learning_rate": 4.2676000190216355e-05, "loss": 1.7716, "step": 9947 }, { "epoch": 0.5544841424669751, "grad_norm": 0.5747142434120178, "learning_rate": 4.266719211445903e-05, "loss": 1.5671, "step": 9948 }, { "epoch": 0.5545398807201383, "grad_norm": 0.5483896136283875, "learning_rate": 4.265838427125565e-05, "loss": 1.68, "step": 9949 }, { "epoch": 0.5545956189733013, "grad_norm": 0.5686975121498108, "learning_rate": 4.2649576660885484e-05, "loss": 1.6753, "step": 9950 }, { "epoch": 0.5546513572264645, "grad_norm": 0.5398499965667725, "learning_rate": 4.264076928362791e-05, "loss": 1.4447, "step": 9951 }, { "epoch": 0.5547070954796277, "grad_norm": 0.5364631414413452, "learning_rate": 4.2631962139762216e-05, "loss": 1.6929, "step": 9952 }, { "epoch": 0.5547628337327908, "grad_norm": 0.6317248940467834, "learning_rate": 4.262315522956774e-05, "loss": 1.3731, "step": 9953 }, { "epoch": 0.554818571985954, "grad_norm": 0.5127749443054199, "learning_rate": 4.261434855332376e-05, "loss": 1.4813, "step": 9954 }, { "epoch": 0.5548743102391172, "grad_norm": 0.5657464861869812, "learning_rate": 4.2605542111309574e-05, "loss": 1.7245, "step": 9955 }, { "epoch": 0.5549300484922802, "grad_norm": 0.5313467979431152, "learning_rate": 4.25967359038045e-05, "loss": 1.6008, "step": 9956 }, { "epoch": 0.5549857867454434, "grad_norm": 0.5843843817710876, "learning_rate": 4.258792993108777e-05, "loss": 1.483, "step": 9957 }, { "epoch": 0.5550415249986066, "grad_norm": 0.5298835635185242, "learning_rate": 4.257912419343872e-05, "loss": 1.7526, "step": 9958 }, { "epoch": 0.5550972632517697, "grad_norm": 0.5512775182723999, "learning_rate": 4.257031869113656e-05, "loss": 1.593, "step": 9959 }, { "epoch": 0.5551530015049329, "grad_norm": 0.5587732791900635, "learning_rate": 4.256151342446059e-05, "loss": 1.6164, "step": 9960 }, { "epoch": 0.5552087397580959, "grad_norm": 0.5447744727134705, "learning_rate": 4.255270839369003e-05, "loss": 1.4945, "step": 9961 }, { "epoch": 0.5552644780112591, "grad_norm": 0.6091803908348083, "learning_rate": 4.254390359910414e-05, "loss": 1.7595, "step": 9962 }, { "epoch": 0.5553202162644223, "grad_norm": 0.5939117074012756, "learning_rate": 4.2535099040982174e-05, "loss": 1.6638, "step": 9963 }, { "epoch": 0.5553759545175854, "grad_norm": 0.5523215532302856, "learning_rate": 4.252629471960332e-05, "loss": 1.6403, "step": 9964 }, { "epoch": 0.5554316927707486, "grad_norm": 0.5710287690162659, "learning_rate": 4.251749063524684e-05, "loss": 1.7287, "step": 9965 }, { "epoch": 0.5554874310239117, "grad_norm": 0.5372434854507446, "learning_rate": 4.2508686788191917e-05, "loss": 1.4819, "step": 9966 }, { "epoch": 0.5555431692770748, "grad_norm": 0.5489197373390198, "learning_rate": 4.249988317871777e-05, "loss": 1.6855, "step": 9967 }, { "epoch": 0.555598907530238, "grad_norm": 0.56691974401474, "learning_rate": 4.249107980710362e-05, "loss": 1.6364, "step": 9968 }, { "epoch": 0.5556546457834012, "grad_norm": 0.5599048733711243, "learning_rate": 4.2482276673628626e-05, "loss": 1.6847, "step": 9969 }, { "epoch": 0.5557103840365643, "grad_norm": 0.5381473302841187, "learning_rate": 4.247347377857199e-05, "loss": 1.5898, "step": 9970 }, { "epoch": 0.5557661222897274, "grad_norm": 0.5331934094429016, "learning_rate": 4.2464671122212876e-05, "loss": 1.7209, "step": 9971 }, { "epoch": 0.5558218605428906, "grad_norm": 0.593131959438324, "learning_rate": 4.245586870483047e-05, "loss": 1.9442, "step": 9972 }, { "epoch": 0.5558775987960537, "grad_norm": 0.5709297060966492, "learning_rate": 4.2447066526703914e-05, "loss": 1.6745, "step": 9973 }, { "epoch": 0.5559333370492169, "grad_norm": 0.5793182849884033, "learning_rate": 4.2438264588112354e-05, "loss": 1.7414, "step": 9974 }, { "epoch": 0.5559890753023801, "grad_norm": 0.5524191856384277, "learning_rate": 4.242946288933499e-05, "loss": 1.654, "step": 9975 }, { "epoch": 0.5560448135555431, "grad_norm": 0.5401830077171326, "learning_rate": 4.2420661430650895e-05, "loss": 1.8102, "step": 9976 }, { "epoch": 0.5561005518087063, "grad_norm": 0.5864329934120178, "learning_rate": 4.241186021233925e-05, "loss": 1.6475, "step": 9977 }, { "epoch": 0.5561562900618695, "grad_norm": 0.688472330570221, "learning_rate": 4.240305923467914e-05, "loss": 2.1339, "step": 9978 }, { "epoch": 0.5562120283150326, "grad_norm": 0.5475722551345825, "learning_rate": 4.239425849794971e-05, "loss": 1.6901, "step": 9979 }, { "epoch": 0.5562677665681958, "grad_norm": 0.5240103602409363, "learning_rate": 4.238545800243005e-05, "loss": 1.3724, "step": 9980 }, { "epoch": 0.556323504821359, "grad_norm": 0.5475266575813293, "learning_rate": 4.237665774839926e-05, "loss": 1.6478, "step": 9981 }, { "epoch": 0.556379243074522, "grad_norm": 0.5561927556991577, "learning_rate": 4.236785773613646e-05, "loss": 1.8298, "step": 9982 }, { "epoch": 0.5564349813276852, "grad_norm": 0.568395733833313, "learning_rate": 4.2359057965920684e-05, "loss": 1.5858, "step": 9983 }, { "epoch": 0.5564907195808483, "grad_norm": 0.5727097988128662, "learning_rate": 4.235025843803108e-05, "loss": 1.7207, "step": 9984 }, { "epoch": 0.5565464578340115, "grad_norm": 0.5476745963096619, "learning_rate": 4.234145915274663e-05, "loss": 1.6093, "step": 9985 }, { "epoch": 0.5566021960871746, "grad_norm": 0.5828372240066528, "learning_rate": 4.233266011034648e-05, "loss": 1.6445, "step": 9986 }, { "epoch": 0.5566579343403377, "grad_norm": 0.532822847366333, "learning_rate": 4.232386131110963e-05, "loss": 1.5421, "step": 9987 }, { "epoch": 0.5567136725935009, "grad_norm": 0.6059979200363159, "learning_rate": 4.231506275531514e-05, "loss": 1.7904, "step": 9988 }, { "epoch": 0.5567694108466641, "grad_norm": 0.5532347559928894, "learning_rate": 4.230626444324207e-05, "loss": 1.8607, "step": 9989 }, { "epoch": 0.5568251490998272, "grad_norm": 0.5815007090568542, "learning_rate": 4.2297466375169425e-05, "loss": 1.6841, "step": 9990 }, { "epoch": 0.5568808873529904, "grad_norm": 0.5533902049064636, "learning_rate": 4.2288668551376246e-05, "loss": 1.5424, "step": 9991 }, { "epoch": 0.5569366256061535, "grad_norm": 0.5605874061584473, "learning_rate": 4.2279870972141516e-05, "loss": 1.7097, "step": 9992 }, { "epoch": 0.5569923638593166, "grad_norm": 0.6050384640693665, "learning_rate": 4.227107363774429e-05, "loss": 1.6374, "step": 9993 }, { "epoch": 0.5570481021124798, "grad_norm": 0.5556758046150208, "learning_rate": 4.226227654846354e-05, "loss": 1.6, "step": 9994 }, { "epoch": 0.557103840365643, "grad_norm": 0.544030487537384, "learning_rate": 4.2253479704578255e-05, "loss": 1.5008, "step": 9995 }, { "epoch": 0.557159578618806, "grad_norm": 0.5751504898071289, "learning_rate": 4.224468310636745e-05, "loss": 1.6418, "step": 9996 }, { "epoch": 0.5572153168719692, "grad_norm": 0.5826640725135803, "learning_rate": 4.223588675411007e-05, "loss": 1.8126, "step": 9997 }, { "epoch": 0.5572710551251324, "grad_norm": 0.5713084936141968, "learning_rate": 4.222709064808509e-05, "loss": 1.7107, "step": 9998 }, { "epoch": 0.5573267933782955, "grad_norm": 0.5925366878509521, "learning_rate": 4.221829478857148e-05, "loss": 1.6814, "step": 9999 }, { "epoch": 0.5573825316314587, "grad_norm": 0.5469158291816711, "learning_rate": 4.220949917584817e-05, "loss": 1.7406, "step": 10000 }, { "epoch": 0.5574382698846219, "grad_norm": 0.5660231709480286, "learning_rate": 4.2200703810194155e-05, "loss": 1.6824, "step": 10001 }, { "epoch": 0.5574940081377849, "grad_norm": 0.6542315483093262, "learning_rate": 4.219190869188831e-05, "loss": 1.6746, "step": 10002 }, { "epoch": 0.5575497463909481, "grad_norm": 0.5918342471122742, "learning_rate": 4.2183113821209625e-05, "loss": 1.9145, "step": 10003 }, { "epoch": 0.5576054846441113, "grad_norm": 0.5502055883407593, "learning_rate": 4.2174319198436976e-05, "loss": 1.6127, "step": 10004 }, { "epoch": 0.5576612228972744, "grad_norm": 0.523463249206543, "learning_rate": 4.216552482384931e-05, "loss": 1.6454, "step": 10005 }, { "epoch": 0.5577169611504376, "grad_norm": 0.6080988645553589, "learning_rate": 4.215673069772551e-05, "loss": 1.7028, "step": 10006 }, { "epoch": 0.5577726994036006, "grad_norm": 0.5708165764808655, "learning_rate": 4.214793682034448e-05, "loss": 1.7396, "step": 10007 }, { "epoch": 0.5578284376567638, "grad_norm": 0.657543420791626, "learning_rate": 4.213914319198512e-05, "loss": 1.9985, "step": 10008 }, { "epoch": 0.557884175909927, "grad_norm": 0.5319724082946777, "learning_rate": 4.213034981292629e-05, "loss": 1.5067, "step": 10009 }, { "epoch": 0.5579399141630901, "grad_norm": 0.5601934790611267, "learning_rate": 4.212155668344691e-05, "loss": 1.5677, "step": 10010 }, { "epoch": 0.5579956524162533, "grad_norm": 0.5320611596107483, "learning_rate": 4.211276380382579e-05, "loss": 1.5929, "step": 10011 }, { "epoch": 0.5580513906694164, "grad_norm": 0.5682796239852905, "learning_rate": 4.210397117434183e-05, "loss": 1.5922, "step": 10012 }, { "epoch": 0.5581071289225795, "grad_norm": 0.531771183013916, "learning_rate": 4.2095178795273864e-05, "loss": 1.6061, "step": 10013 }, { "epoch": 0.5581628671757427, "grad_norm": 0.5384634733200073, "learning_rate": 4.208638666690074e-05, "loss": 1.6108, "step": 10014 }, { "epoch": 0.5582186054289059, "grad_norm": 0.5733115077018738, "learning_rate": 4.207759478950129e-05, "loss": 1.6811, "step": 10015 }, { "epoch": 0.558274343682069, "grad_norm": 0.5614367127418518, "learning_rate": 4.2068803163354344e-05, "loss": 1.5484, "step": 10016 }, { "epoch": 0.5583300819352321, "grad_norm": 0.5718212723731995, "learning_rate": 4.206001178873872e-05, "loss": 1.6159, "step": 10017 }, { "epoch": 0.5583858201883953, "grad_norm": 0.577841579914093, "learning_rate": 4.205122066593321e-05, "loss": 1.7111, "step": 10018 }, { "epoch": 0.5584415584415584, "grad_norm": 0.5670404434204102, "learning_rate": 4.204242979521665e-05, "loss": 1.6692, "step": 10019 }, { "epoch": 0.5584972966947216, "grad_norm": 0.5312654376029968, "learning_rate": 4.203363917686784e-05, "loss": 1.5002, "step": 10020 }, { "epoch": 0.5585530349478848, "grad_norm": 0.5269418358802795, "learning_rate": 4.202484881116553e-05, "loss": 1.5218, "step": 10021 }, { "epoch": 0.5586087732010478, "grad_norm": 0.5869148969650269, "learning_rate": 4.201605869838852e-05, "loss": 1.6535, "step": 10022 }, { "epoch": 0.558664511454211, "grad_norm": 0.5673363208770752, "learning_rate": 4.2007268838815575e-05, "loss": 1.7252, "step": 10023 }, { "epoch": 0.5587202497073742, "grad_norm": 0.5675745606422424, "learning_rate": 4.199847923272547e-05, "loss": 1.7039, "step": 10024 }, { "epoch": 0.5587759879605373, "grad_norm": 0.6065249443054199, "learning_rate": 4.198968988039695e-05, "loss": 1.8148, "step": 10025 }, { "epoch": 0.5588317262137005, "grad_norm": 0.5471330285072327, "learning_rate": 4.198090078210874e-05, "loss": 1.6482, "step": 10026 }, { "epoch": 0.5588874644668637, "grad_norm": 0.5834773182868958, "learning_rate": 4.1972111938139636e-05, "loss": 1.7497, "step": 10027 }, { "epoch": 0.5589432027200267, "grad_norm": 0.5758984684944153, "learning_rate": 4.196332334876831e-05, "loss": 1.7287, "step": 10028 }, { "epoch": 0.5589989409731899, "grad_norm": 0.6321014165878296, "learning_rate": 4.195453501427353e-05, "loss": 1.8665, "step": 10029 }, { "epoch": 0.559054679226353, "grad_norm": 0.6041330099105835, "learning_rate": 4.194574693493398e-05, "loss": 2.0083, "step": 10030 }, { "epoch": 0.5591104174795162, "grad_norm": 0.5616350173950195, "learning_rate": 4.19369591110284e-05, "loss": 1.4786, "step": 10031 }, { "epoch": 0.5591661557326794, "grad_norm": 0.5436341166496277, "learning_rate": 4.192817154283544e-05, "loss": 1.5979, "step": 10032 }, { "epoch": 0.5592218939858424, "grad_norm": 0.5548772811889648, "learning_rate": 4.1919384230633804e-05, "loss": 1.612, "step": 10033 }, { "epoch": 0.5592776322390056, "grad_norm": 0.49860692024230957, "learning_rate": 4.191059717470223e-05, "loss": 1.1854, "step": 10034 }, { "epoch": 0.5593333704921688, "grad_norm": 0.5409103035926819, "learning_rate": 4.19018103753193e-05, "loss": 1.6482, "step": 10035 }, { "epoch": 0.5593891087453319, "grad_norm": 0.5163053274154663, "learning_rate": 4.1893023832763786e-05, "loss": 1.5681, "step": 10036 }, { "epoch": 0.5594448469984951, "grad_norm": 0.5453513860702515, "learning_rate": 4.1884237547314244e-05, "loss": 1.5808, "step": 10037 }, { "epoch": 0.5595005852516582, "grad_norm": 0.52850741147995, "learning_rate": 4.1875451519249386e-05, "loss": 1.4751, "step": 10038 }, { "epoch": 0.5595563235048213, "grad_norm": 0.5223550200462341, "learning_rate": 4.186666574884783e-05, "loss": 1.4774, "step": 10039 }, { "epoch": 0.5596120617579845, "grad_norm": 0.5033788084983826, "learning_rate": 4.1857880236388217e-05, "loss": 1.546, "step": 10040 }, { "epoch": 0.5596678000111477, "grad_norm": 0.5803878903388977, "learning_rate": 4.184909498214918e-05, "loss": 1.7583, "step": 10041 }, { "epoch": 0.5597235382643108, "grad_norm": 0.5489541888237, "learning_rate": 4.1840309986409316e-05, "loss": 1.4077, "step": 10042 }, { "epoch": 0.559779276517474, "grad_norm": 0.5916313529014587, "learning_rate": 4.1831525249447255e-05, "loss": 1.7512, "step": 10043 }, { "epoch": 0.5598350147706371, "grad_norm": 0.610925018787384, "learning_rate": 4.182274077154157e-05, "loss": 1.8438, "step": 10044 }, { "epoch": 0.5598907530238002, "grad_norm": 0.5416461229324341, "learning_rate": 4.181395655297088e-05, "loss": 1.7325, "step": 10045 }, { "epoch": 0.5599464912769634, "grad_norm": 0.5306708812713623, "learning_rate": 4.180517259401377e-05, "loss": 1.662, "step": 10046 }, { "epoch": 0.5600022295301266, "grad_norm": 0.5335866808891296, "learning_rate": 4.179638889494879e-05, "loss": 1.7033, "step": 10047 }, { "epoch": 0.5600579677832896, "grad_norm": 0.5430575013160706, "learning_rate": 4.1787605456054546e-05, "loss": 1.4528, "step": 10048 }, { "epoch": 0.5601137060364528, "grad_norm": 0.5668299794197083, "learning_rate": 4.177882227760956e-05, "loss": 1.6722, "step": 10049 }, { "epoch": 0.560169444289616, "grad_norm": 0.5457166433334351, "learning_rate": 4.17700393598924e-05, "loss": 1.534, "step": 10050 }, { "epoch": 0.5602251825427791, "grad_norm": 0.5463144183158875, "learning_rate": 4.176125670318161e-05, "loss": 1.7221, "step": 10051 }, { "epoch": 0.5602809207959423, "grad_norm": 0.5175307989120483, "learning_rate": 4.1752474307755706e-05, "loss": 1.4255, "step": 10052 }, { "epoch": 0.5603366590491053, "grad_norm": 0.5423510670661926, "learning_rate": 4.174369217389326e-05, "loss": 1.6083, "step": 10053 }, { "epoch": 0.5603923973022685, "grad_norm": 0.5733422040939331, "learning_rate": 4.173491030187274e-05, "loss": 1.4492, "step": 10054 }, { "epoch": 0.5604481355554317, "grad_norm": 0.5978653430938721, "learning_rate": 4.172612869197269e-05, "loss": 1.9517, "step": 10055 }, { "epoch": 0.5605038738085948, "grad_norm": 0.6374850869178772, "learning_rate": 4.171734734447158e-05, "loss": 1.5612, "step": 10056 }, { "epoch": 0.560559612061758, "grad_norm": 0.5653359889984131, "learning_rate": 4.1708566259647944e-05, "loss": 1.6853, "step": 10057 }, { "epoch": 0.5606153503149212, "grad_norm": 0.5681639313697815, "learning_rate": 4.1699785437780226e-05, "loss": 1.6625, "step": 10058 }, { "epoch": 0.5606710885680842, "grad_norm": 0.5495839715003967, "learning_rate": 4.169100487914691e-05, "loss": 1.6534, "step": 10059 }, { "epoch": 0.5607268268212474, "grad_norm": 0.6118134260177612, "learning_rate": 4.168222458402651e-05, "loss": 1.7264, "step": 10060 }, { "epoch": 0.5607825650744106, "grad_norm": 0.5823867321014404, "learning_rate": 4.167344455269741e-05, "loss": 1.6749, "step": 10061 }, { "epoch": 0.5608383033275737, "grad_norm": 0.5473729968070984, "learning_rate": 4.166466478543814e-05, "loss": 1.5746, "step": 10062 }, { "epoch": 0.5608940415807369, "grad_norm": 0.5540270805358887, "learning_rate": 4.1655885282527075e-05, "loss": 1.4935, "step": 10063 }, { "epoch": 0.5609497798339, "grad_norm": 0.5212602019309998, "learning_rate": 4.164710604424269e-05, "loss": 1.422, "step": 10064 }, { "epoch": 0.5610055180870631, "grad_norm": 0.5885487198829651, "learning_rate": 4.1638327070863404e-05, "loss": 1.7145, "step": 10065 }, { "epoch": 0.5610612563402263, "grad_norm": 0.5488985776901245, "learning_rate": 4.162954836266762e-05, "loss": 1.565, "step": 10066 }, { "epoch": 0.5611169945933895, "grad_norm": 0.563651978969574, "learning_rate": 4.1620769919933775e-05, "loss": 1.653, "step": 10067 }, { "epoch": 0.5611727328465526, "grad_norm": 0.5442456007003784, "learning_rate": 4.161199174294025e-05, "loss": 1.7342, "step": 10068 }, { "epoch": 0.5612284710997157, "grad_norm": 0.555916428565979, "learning_rate": 4.1603213831965435e-05, "loss": 1.7592, "step": 10069 }, { "epoch": 0.5612842093528789, "grad_norm": 0.556006908416748, "learning_rate": 4.1594436187287714e-05, "loss": 1.6228, "step": 10070 }, { "epoch": 0.561339947606042, "grad_norm": 0.5912269949913025, "learning_rate": 4.15856588091855e-05, "loss": 1.789, "step": 10071 }, { "epoch": 0.5613956858592052, "grad_norm": 0.5295083522796631, "learning_rate": 4.157688169793709e-05, "loss": 1.4986, "step": 10072 }, { "epoch": 0.5614514241123684, "grad_norm": 0.5145447254180908, "learning_rate": 4.15681048538209e-05, "loss": 1.326, "step": 10073 }, { "epoch": 0.5615071623655314, "grad_norm": 0.6136693954467773, "learning_rate": 4.155932827711527e-05, "loss": 2.1994, "step": 10074 }, { "epoch": 0.5615629006186946, "grad_norm": 0.649308443069458, "learning_rate": 4.155055196809852e-05, "loss": 1.9845, "step": 10075 }, { "epoch": 0.5616186388718577, "grad_norm": 0.5622665286064148, "learning_rate": 4.154177592704901e-05, "loss": 1.6813, "step": 10076 }, { "epoch": 0.5616743771250209, "grad_norm": 0.5435896515846252, "learning_rate": 4.153300015424505e-05, "loss": 1.6458, "step": 10077 }, { "epoch": 0.5617301153781841, "grad_norm": 0.5368431806564331, "learning_rate": 4.1524224649964935e-05, "loss": 1.4892, "step": 10078 }, { "epoch": 0.5617858536313471, "grad_norm": 0.5401378870010376, "learning_rate": 4.1515449414487034e-05, "loss": 1.5534, "step": 10079 }, { "epoch": 0.5618415918845103, "grad_norm": 0.5672016143798828, "learning_rate": 4.1506674448089586e-05, "loss": 1.7606, "step": 10080 }, { "epoch": 0.5618973301376735, "grad_norm": 0.6083208322525024, "learning_rate": 4.149789975105092e-05, "loss": 1.7784, "step": 10081 }, { "epoch": 0.5619530683908366, "grad_norm": 0.5388506650924683, "learning_rate": 4.1489125323649294e-05, "loss": 1.7375, "step": 10082 }, { "epoch": 0.5620088066439998, "grad_norm": 0.6294654011726379, "learning_rate": 4.1480351166163e-05, "loss": 1.5098, "step": 10083 }, { "epoch": 0.562064544897163, "grad_norm": 0.5356112718582153, "learning_rate": 4.1471577278870285e-05, "loss": 1.6152, "step": 10084 }, { "epoch": 0.562120283150326, "grad_norm": 0.566550612449646, "learning_rate": 4.14628036620494e-05, "loss": 1.5779, "step": 10085 }, { "epoch": 0.5621760214034892, "grad_norm": 0.5861518979072571, "learning_rate": 4.145403031597865e-05, "loss": 1.5938, "step": 10086 }, { "epoch": 0.5622317596566524, "grad_norm": 0.5656233429908752, "learning_rate": 4.144525724093619e-05, "loss": 1.5921, "step": 10087 }, { "epoch": 0.5622874979098155, "grad_norm": 0.555738091468811, "learning_rate": 4.143648443720033e-05, "loss": 1.6272, "step": 10088 }, { "epoch": 0.5623432361629787, "grad_norm": 0.6210994124412537, "learning_rate": 4.1427711905049215e-05, "loss": 1.9088, "step": 10089 }, { "epoch": 0.5623989744161418, "grad_norm": 0.5784873366355896, "learning_rate": 4.1418939644761125e-05, "loss": 1.6455, "step": 10090 }, { "epoch": 0.5624547126693049, "grad_norm": 0.5760608911514282, "learning_rate": 4.141016765661423e-05, "loss": 1.71, "step": 10091 }, { "epoch": 0.5625104509224681, "grad_norm": 0.5815902948379517, "learning_rate": 4.1401395940886725e-05, "loss": 1.6911, "step": 10092 }, { "epoch": 0.5625661891756313, "grad_norm": 0.5799475312232971, "learning_rate": 4.139262449785683e-05, "loss": 1.7185, "step": 10093 }, { "epoch": 0.5626219274287944, "grad_norm": 0.572181224822998, "learning_rate": 4.1383853327802686e-05, "loss": 1.5143, "step": 10094 }, { "epoch": 0.5626776656819575, "grad_norm": 0.593944787979126, "learning_rate": 4.137508243100249e-05, "loss": 1.7856, "step": 10095 }, { "epoch": 0.5627334039351207, "grad_norm": 0.5817708373069763, "learning_rate": 4.136631180773437e-05, "loss": 1.7438, "step": 10096 }, { "epoch": 0.5627891421882838, "grad_norm": 0.559810221195221, "learning_rate": 4.1357541458276535e-05, "loss": 1.5218, "step": 10097 }, { "epoch": 0.562844880441447, "grad_norm": 0.5834983587265015, "learning_rate": 4.134877138290706e-05, "loss": 1.7985, "step": 10098 }, { "epoch": 0.5629006186946102, "grad_norm": 0.5739032030105591, "learning_rate": 4.134000158190413e-05, "loss": 1.6318, "step": 10099 }, { "epoch": 0.5629563569477732, "grad_norm": 0.5493670105934143, "learning_rate": 4.133123205554587e-05, "loss": 1.5959, "step": 10100 }, { "epoch": 0.5630120952009364, "grad_norm": 0.5687856674194336, "learning_rate": 4.132246280411038e-05, "loss": 1.7061, "step": 10101 }, { "epoch": 0.5630678334540995, "grad_norm": 0.5455751419067383, "learning_rate": 4.131369382787578e-05, "loss": 1.5649, "step": 10102 }, { "epoch": 0.5631235717072627, "grad_norm": 0.5812469124794006, "learning_rate": 4.130492512712016e-05, "loss": 1.7403, "step": 10103 }, { "epoch": 0.5631793099604259, "grad_norm": 0.5267177224159241, "learning_rate": 4.129615670212161e-05, "loss": 1.4974, "step": 10104 }, { "epoch": 0.5632350482135889, "grad_norm": 0.5732220411300659, "learning_rate": 4.1287388553158235e-05, "loss": 1.6699, "step": 10105 }, { "epoch": 0.5632907864667521, "grad_norm": 0.5401387810707092, "learning_rate": 4.1278620680508096e-05, "loss": 1.5078, "step": 10106 }, { "epoch": 0.5633465247199153, "grad_norm": 0.5950440764427185, "learning_rate": 4.126985308444927e-05, "loss": 1.7493, "step": 10107 }, { "epoch": 0.5634022629730784, "grad_norm": 0.5888415575027466, "learning_rate": 4.1261085765259786e-05, "loss": 1.5739, "step": 10108 }, { "epoch": 0.5634580012262416, "grad_norm": 0.5839027166366577, "learning_rate": 4.1252318723217724e-05, "loss": 1.6441, "step": 10109 }, { "epoch": 0.5635137394794048, "grad_norm": 0.517785906791687, "learning_rate": 4.1243551958601103e-05, "loss": 1.5001, "step": 10110 }, { "epoch": 0.5635694777325678, "grad_norm": 0.5795639753341675, "learning_rate": 4.123478547168795e-05, "loss": 1.6376, "step": 10111 }, { "epoch": 0.563625215985731, "grad_norm": 0.5699589252471924, "learning_rate": 4.122601926275632e-05, "loss": 1.723, "step": 10112 }, { "epoch": 0.5636809542388942, "grad_norm": 0.5570908188819885, "learning_rate": 4.121725333208418e-05, "loss": 1.4783, "step": 10113 }, { "epoch": 0.5637366924920573, "grad_norm": 0.641866147518158, "learning_rate": 4.1208487679949574e-05, "loss": 1.6867, "step": 10114 }, { "epoch": 0.5637924307452205, "grad_norm": 0.5656890869140625, "learning_rate": 4.119972230663047e-05, "loss": 1.6556, "step": 10115 }, { "epoch": 0.5638481689983836, "grad_norm": 0.5093350410461426, "learning_rate": 4.119095721240488e-05, "loss": 1.4309, "step": 10116 }, { "epoch": 0.5639039072515467, "grad_norm": 0.590333104133606, "learning_rate": 4.118219239755076e-05, "loss": 1.7376, "step": 10117 }, { "epoch": 0.5639596455047099, "grad_norm": 0.5829031467437744, "learning_rate": 4.117342786234608e-05, "loss": 1.6084, "step": 10118 }, { "epoch": 0.5640153837578731, "grad_norm": 0.5385381579399109, "learning_rate": 4.116466360706881e-05, "loss": 1.6276, "step": 10119 }, { "epoch": 0.5640711220110362, "grad_norm": 0.5832441449165344, "learning_rate": 4.1155899631996883e-05, "loss": 1.7219, "step": 10120 }, { "epoch": 0.5641268602641993, "grad_norm": 0.59648197889328, "learning_rate": 4.114713593740828e-05, "loss": 1.6228, "step": 10121 }, { "epoch": 0.5641825985173625, "grad_norm": 0.5410522818565369, "learning_rate": 4.113837252358089e-05, "loss": 1.6936, "step": 10122 }, { "epoch": 0.5642383367705256, "grad_norm": 0.5790727138519287, "learning_rate": 4.1129609390792675e-05, "loss": 1.7324, "step": 10123 }, { "epoch": 0.5642940750236888, "grad_norm": 0.5365089774131775, "learning_rate": 4.112084653932151e-05, "loss": 1.5449, "step": 10124 }, { "epoch": 0.5643498132768519, "grad_norm": 0.5550824999809265, "learning_rate": 4.111208396944533e-05, "loss": 1.6333, "step": 10125 }, { "epoch": 0.564405551530015, "grad_norm": 0.5916097164154053, "learning_rate": 4.110332168144204e-05, "loss": 1.7363, "step": 10126 }, { "epoch": 0.5644612897831782, "grad_norm": 0.5079007744789124, "learning_rate": 4.10945596755895e-05, "loss": 1.4916, "step": 10127 }, { "epoch": 0.5645170280363413, "grad_norm": 0.5717905163764954, "learning_rate": 4.108579795216562e-05, "loss": 1.7816, "step": 10128 }, { "epoch": 0.5645727662895045, "grad_norm": 0.5530692338943481, "learning_rate": 4.107703651144824e-05, "loss": 1.6816, "step": 10129 }, { "epoch": 0.5646285045426677, "grad_norm": 0.5110148787498474, "learning_rate": 4.106827535371523e-05, "loss": 1.5643, "step": 10130 }, { "epoch": 0.5646842427958307, "grad_norm": 0.5846538543701172, "learning_rate": 4.105951447924447e-05, "loss": 1.8457, "step": 10131 }, { "epoch": 0.5647399810489939, "grad_norm": 0.6359025239944458, "learning_rate": 4.105075388831378e-05, "loss": 1.8374, "step": 10132 }, { "epoch": 0.5647957193021571, "grad_norm": 0.5842446684837341, "learning_rate": 4.1041993581201e-05, "loss": 1.9171, "step": 10133 }, { "epoch": 0.5648514575553202, "grad_norm": 0.5989353060722351, "learning_rate": 4.103323355818395e-05, "loss": 1.8695, "step": 10134 }, { "epoch": 0.5649071958084834, "grad_norm": 0.5007326006889343, "learning_rate": 4.102447381954046e-05, "loss": 1.5685, "step": 10135 }, { "epoch": 0.5649629340616465, "grad_norm": 0.544731855392456, "learning_rate": 4.1015714365548316e-05, "loss": 1.7752, "step": 10136 }, { "epoch": 0.5650186723148096, "grad_norm": 0.4942627251148224, "learning_rate": 4.1006955196485324e-05, "loss": 1.3801, "step": 10137 }, { "epoch": 0.5650744105679728, "grad_norm": 0.5270852446556091, "learning_rate": 4.099819631262931e-05, "loss": 1.6352, "step": 10138 }, { "epoch": 0.565130148821136, "grad_norm": 0.5775606632232666, "learning_rate": 4.0989437714258e-05, "loss": 1.7102, "step": 10139 }, { "epoch": 0.5651858870742991, "grad_norm": 0.5705950260162354, "learning_rate": 4.09806794016492e-05, "loss": 1.7768, "step": 10140 }, { "epoch": 0.5652416253274622, "grad_norm": 0.5659387111663818, "learning_rate": 4.097192137508066e-05, "loss": 1.5619, "step": 10141 }, { "epoch": 0.5652973635806254, "grad_norm": 0.5292123556137085, "learning_rate": 4.096316363483014e-05, "loss": 1.5905, "step": 10142 }, { "epoch": 0.5653531018337885, "grad_norm": 0.6031396985054016, "learning_rate": 4.095440618117538e-05, "loss": 1.5628, "step": 10143 }, { "epoch": 0.5654088400869517, "grad_norm": 0.5399644374847412, "learning_rate": 4.094564901439411e-05, "loss": 1.7393, "step": 10144 }, { "epoch": 0.5654645783401149, "grad_norm": 0.5706971287727356, "learning_rate": 4.0936892134764076e-05, "loss": 1.6748, "step": 10145 }, { "epoch": 0.565520316593278, "grad_norm": 0.6021378040313721, "learning_rate": 4.0928135542562964e-05, "loss": 1.7082, "step": 10146 }, { "epoch": 0.5655760548464411, "grad_norm": 0.4986594617366791, "learning_rate": 4.0919379238068526e-05, "loss": 1.0838, "step": 10147 }, { "epoch": 0.5656317930996042, "grad_norm": 0.5753964185714722, "learning_rate": 4.0910623221558405e-05, "loss": 1.552, "step": 10148 }, { "epoch": 0.5656875313527674, "grad_norm": 0.5776475071907043, "learning_rate": 4.0901867493310354e-05, "loss": 1.8034, "step": 10149 }, { "epoch": 0.5657432696059306, "grad_norm": 0.5469151735305786, "learning_rate": 4.089311205360199e-05, "loss": 1.7206, "step": 10150 }, { "epoch": 0.5657990078590937, "grad_norm": 0.5828034281730652, "learning_rate": 4.0884356902711016e-05, "loss": 1.6696, "step": 10151 }, { "epoch": 0.5658547461122568, "grad_norm": 0.5706288814544678, "learning_rate": 4.087560204091511e-05, "loss": 1.6162, "step": 10152 }, { "epoch": 0.56591048436542, "grad_norm": 0.65047687292099, "learning_rate": 4.08668474684919e-05, "loss": 1.9385, "step": 10153 }, { "epoch": 0.5659662226185831, "grad_norm": 0.5484048128128052, "learning_rate": 4.085809318571905e-05, "loss": 1.7144, "step": 10154 }, { "epoch": 0.5660219608717463, "grad_norm": 0.5408827066421509, "learning_rate": 4.084933919287417e-05, "loss": 1.4162, "step": 10155 }, { "epoch": 0.5660776991249095, "grad_norm": 0.5536865592002869, "learning_rate": 4.084058549023488e-05, "loss": 1.6541, "step": 10156 }, { "epoch": 0.5661334373780725, "grad_norm": 0.5546683073043823, "learning_rate": 4.0831832078078845e-05, "loss": 1.5955, "step": 10157 }, { "epoch": 0.5661891756312357, "grad_norm": 0.6069309711456299, "learning_rate": 4.082307895668364e-05, "loss": 1.9304, "step": 10158 }, { "epoch": 0.5662449138843989, "grad_norm": 0.6290032267570496, "learning_rate": 4.0814326126326864e-05, "loss": 1.91, "step": 10159 }, { "epoch": 0.566300652137562, "grad_norm": 0.5626652240753174, "learning_rate": 4.080557358728609e-05, "loss": 1.848, "step": 10160 }, { "epoch": 0.5663563903907252, "grad_norm": 0.5320069789886475, "learning_rate": 4.079682133983894e-05, "loss": 1.6209, "step": 10161 }, { "epoch": 0.5664121286438883, "grad_norm": 0.5245012044906616, "learning_rate": 4.0788069384262946e-05, "loss": 1.4589, "step": 10162 }, { "epoch": 0.5664678668970514, "grad_norm": 0.5692093968391418, "learning_rate": 4.077931772083566e-05, "loss": 1.7792, "step": 10163 }, { "epoch": 0.5665236051502146, "grad_norm": 0.5256657600402832, "learning_rate": 4.0770566349834696e-05, "loss": 1.5378, "step": 10164 }, { "epoch": 0.5665793434033778, "grad_norm": 0.5258059501647949, "learning_rate": 4.076181527153753e-05, "loss": 1.543, "step": 10165 }, { "epoch": 0.5666350816565409, "grad_norm": 0.5840742588043213, "learning_rate": 4.0753064486221736e-05, "loss": 1.6944, "step": 10166 }, { "epoch": 0.566690819909704, "grad_norm": 0.5648197531700134, "learning_rate": 4.0744313994164804e-05, "loss": 1.6907, "step": 10167 }, { "epoch": 0.5667465581628672, "grad_norm": 0.6014297008514404, "learning_rate": 4.0735563795644294e-05, "loss": 1.6311, "step": 10168 }, { "epoch": 0.5668022964160303, "grad_norm": 0.5499513745307922, "learning_rate": 4.072681389093767e-05, "loss": 1.5288, "step": 10169 }, { "epoch": 0.5668580346691935, "grad_norm": 0.5218088626861572, "learning_rate": 4.071806428032244e-05, "loss": 1.5348, "step": 10170 }, { "epoch": 0.5669137729223566, "grad_norm": 0.5808815956115723, "learning_rate": 4.07093149640761e-05, "loss": 1.9325, "step": 10171 }, { "epoch": 0.5669695111755197, "grad_norm": 0.6530500054359436, "learning_rate": 4.0700565942476104e-05, "loss": 1.93, "step": 10172 }, { "epoch": 0.5670252494286829, "grad_norm": 0.5505321025848389, "learning_rate": 4.069181721579997e-05, "loss": 1.6544, "step": 10173 }, { "epoch": 0.567080987681846, "grad_norm": 0.5787295699119568, "learning_rate": 4.068306878432509e-05, "loss": 1.6509, "step": 10174 }, { "epoch": 0.5671367259350092, "grad_norm": 0.6493069529533386, "learning_rate": 4.067432064832898e-05, "loss": 2.02, "step": 10175 }, { "epoch": 0.5671924641881724, "grad_norm": 0.5896463394165039, "learning_rate": 4.066557280808901e-05, "loss": 1.7404, "step": 10176 }, { "epoch": 0.5672482024413354, "grad_norm": 0.5523368716239929, "learning_rate": 4.065682526388266e-05, "loss": 1.7042, "step": 10177 }, { "epoch": 0.5673039406944986, "grad_norm": 0.5077149271965027, "learning_rate": 4.064807801598735e-05, "loss": 1.5399, "step": 10178 }, { "epoch": 0.5673596789476618, "grad_norm": 0.5975422263145447, "learning_rate": 4.063933106468047e-05, "loss": 1.5696, "step": 10179 }, { "epoch": 0.5674154172008249, "grad_norm": 0.5238234400749207, "learning_rate": 4.063058441023944e-05, "loss": 1.5354, "step": 10180 }, { "epoch": 0.5674711554539881, "grad_norm": 0.5576155781745911, "learning_rate": 4.062183805294164e-05, "loss": 1.6381, "step": 10181 }, { "epoch": 0.5675268937071513, "grad_norm": 0.5786839723587036, "learning_rate": 4.0613091993064464e-05, "loss": 1.6276, "step": 10182 }, { "epoch": 0.5675826319603143, "grad_norm": 0.5902144312858582, "learning_rate": 4.0604346230885257e-05, "loss": 1.7498, "step": 10183 }, { "epoch": 0.5676383702134775, "grad_norm": 0.647559642791748, "learning_rate": 4.0595600766681425e-05, "loss": 1.63, "step": 10184 }, { "epoch": 0.5676941084666407, "grad_norm": 0.5559741854667664, "learning_rate": 4.0586855600730314e-05, "loss": 1.6698, "step": 10185 }, { "epoch": 0.5677498467198038, "grad_norm": 0.5404937267303467, "learning_rate": 4.057811073330925e-05, "loss": 1.4475, "step": 10186 }, { "epoch": 0.567805584972967, "grad_norm": 0.6052438616752625, "learning_rate": 4.056936616469559e-05, "loss": 1.5655, "step": 10187 }, { "epoch": 0.5678613232261301, "grad_norm": 0.5823904275894165, "learning_rate": 4.056062189516664e-05, "loss": 1.7327, "step": 10188 }, { "epoch": 0.5679170614792932, "grad_norm": 0.5970086455345154, "learning_rate": 4.055187792499971e-05, "loss": 1.8365, "step": 10189 }, { "epoch": 0.5679727997324564, "grad_norm": 0.530210018157959, "learning_rate": 4.054313425447217e-05, "loss": 1.5606, "step": 10190 }, { "epoch": 0.5680285379856196, "grad_norm": 0.5752225518226624, "learning_rate": 4.053439088386124e-05, "loss": 1.5273, "step": 10191 }, { "epoch": 0.5680842762387827, "grad_norm": 0.6104926466941833, "learning_rate": 4.0525647813444254e-05, "loss": 1.6909, "step": 10192 }, { "epoch": 0.5681400144919458, "grad_norm": 0.6021226048469543, "learning_rate": 4.0516905043498474e-05, "loss": 1.8376, "step": 10193 }, { "epoch": 0.5681957527451089, "grad_norm": 0.5418221950531006, "learning_rate": 4.0508162574301195e-05, "loss": 1.4, "step": 10194 }, { "epoch": 0.5682514909982721, "grad_norm": 0.5655646324157715, "learning_rate": 4.049942040612964e-05, "loss": 1.6005, "step": 10195 }, { "epoch": 0.5683072292514353, "grad_norm": 0.5451434254646301, "learning_rate": 4.049067853926108e-05, "loss": 1.6205, "step": 10196 }, { "epoch": 0.5683629675045984, "grad_norm": 0.585850715637207, "learning_rate": 4.048193697397276e-05, "loss": 1.7273, "step": 10197 }, { "epoch": 0.5684187057577615, "grad_norm": 0.6063744425773621, "learning_rate": 4.0473195710541886e-05, "loss": 1.7355, "step": 10198 }, { "epoch": 0.5684744440109247, "grad_norm": 0.5964711308479309, "learning_rate": 4.046445474924573e-05, "loss": 1.8361, "step": 10199 }, { "epoch": 0.5685301822640878, "grad_norm": 0.5515483021736145, "learning_rate": 4.0455714090361446e-05, "loss": 1.6124, "step": 10200 }, { "epoch": 0.568585920517251, "grad_norm": 0.5965580344200134, "learning_rate": 4.044697373416628e-05, "loss": 1.8331, "step": 10201 }, { "epoch": 0.5686416587704142, "grad_norm": 0.618015706539154, "learning_rate": 4.04382336809374e-05, "loss": 1.7974, "step": 10202 }, { "epoch": 0.5686973970235772, "grad_norm": 0.5886608958244324, "learning_rate": 4.0429493930952e-05, "loss": 1.7206, "step": 10203 }, { "epoch": 0.5687531352767404, "grad_norm": 0.6158391237258911, "learning_rate": 4.042075448448726e-05, "loss": 1.6667, "step": 10204 }, { "epoch": 0.5688088735299036, "grad_norm": 0.6388469338417053, "learning_rate": 4.041201534182033e-05, "loss": 1.5124, "step": 10205 }, { "epoch": 0.5688646117830667, "grad_norm": 0.575337290763855, "learning_rate": 4.040327650322838e-05, "loss": 1.756, "step": 10206 }, { "epoch": 0.5689203500362299, "grad_norm": 0.5659148693084717, "learning_rate": 4.039453796898853e-05, "loss": 1.7316, "step": 10207 }, { "epoch": 0.5689760882893931, "grad_norm": 0.5717800259590149, "learning_rate": 4.038579973937796e-05, "loss": 1.6193, "step": 10208 }, { "epoch": 0.5690318265425561, "grad_norm": 0.5925152897834778, "learning_rate": 4.037706181467373e-05, "loss": 1.8098, "step": 10209 }, { "epoch": 0.5690875647957193, "grad_norm": 0.5946084856987, "learning_rate": 4.036832419515301e-05, "loss": 1.792, "step": 10210 }, { "epoch": 0.5691433030488825, "grad_norm": 0.5962294340133667, "learning_rate": 4.03595868810929e-05, "loss": 1.716, "step": 10211 }, { "epoch": 0.5691990413020456, "grad_norm": 0.5260846614837646, "learning_rate": 4.035084987277048e-05, "loss": 1.5782, "step": 10212 }, { "epoch": 0.5692547795552088, "grad_norm": 0.5939358472824097, "learning_rate": 4.034211317046285e-05, "loss": 1.7408, "step": 10213 }, { "epoch": 0.5693105178083719, "grad_norm": 0.5185898542404175, "learning_rate": 4.033337677444707e-05, "loss": 1.5208, "step": 10214 }, { "epoch": 0.569366256061535, "grad_norm": 0.5650632381439209, "learning_rate": 4.0324640685000206e-05, "loss": 1.7486, "step": 10215 }, { "epoch": 0.5694219943146982, "grad_norm": 0.5693777799606323, "learning_rate": 4.0315904902399367e-05, "loss": 1.8807, "step": 10216 }, { "epoch": 0.5694777325678613, "grad_norm": 0.5746406316757202, "learning_rate": 4.030716942692153e-05, "loss": 1.7639, "step": 10217 }, { "epoch": 0.5695334708210245, "grad_norm": 0.5026817917823792, "learning_rate": 4.0298434258843775e-05, "loss": 1.52, "step": 10218 }, { "epoch": 0.5695892090741876, "grad_norm": 0.5377751588821411, "learning_rate": 4.028969939844312e-05, "loss": 1.7166, "step": 10219 }, { "epoch": 0.5696449473273507, "grad_norm": 0.5900459289550781, "learning_rate": 4.0280964845996597e-05, "loss": 1.7338, "step": 10220 }, { "epoch": 0.5697006855805139, "grad_norm": 0.5911110639572144, "learning_rate": 4.027223060178119e-05, "loss": 1.7374, "step": 10221 }, { "epoch": 0.5697564238336771, "grad_norm": 0.49996447563171387, "learning_rate": 4.0263496666073907e-05, "loss": 1.4187, "step": 10222 }, { "epoch": 0.5698121620868402, "grad_norm": 0.5100619792938232, "learning_rate": 4.025476303915176e-05, "loss": 1.4252, "step": 10223 }, { "epoch": 0.5698679003400033, "grad_norm": 0.6046686768531799, "learning_rate": 4.024602972129169e-05, "loss": 1.8751, "step": 10224 }, { "epoch": 0.5699236385931665, "grad_norm": 0.5467636585235596, "learning_rate": 4.0237296712770714e-05, "loss": 1.7274, "step": 10225 }, { "epoch": 0.5699793768463296, "grad_norm": 0.5854259729385376, "learning_rate": 4.022856401386573e-05, "loss": 1.7434, "step": 10226 }, { "epoch": 0.5700351150994928, "grad_norm": 0.5991394519805908, "learning_rate": 4.0219831624853754e-05, "loss": 1.7, "step": 10227 }, { "epoch": 0.570090853352656, "grad_norm": 0.5040337443351746, "learning_rate": 4.021109954601169e-05, "loss": 1.3809, "step": 10228 }, { "epoch": 0.570146591605819, "grad_norm": 0.5473932027816772, "learning_rate": 4.020236777761646e-05, "loss": 1.7109, "step": 10229 }, { "epoch": 0.5702023298589822, "grad_norm": 0.5707757472991943, "learning_rate": 4.0193636319945025e-05, "loss": 1.8791, "step": 10230 }, { "epoch": 0.5702580681121454, "grad_norm": 0.5629134178161621, "learning_rate": 4.018490517327425e-05, "loss": 1.904, "step": 10231 }, { "epoch": 0.5703138063653085, "grad_norm": 0.5864009261131287, "learning_rate": 4.0176174337881076e-05, "loss": 1.7688, "step": 10232 }, { "epoch": 0.5703695446184717, "grad_norm": 0.5961767435073853, "learning_rate": 4.0167443814042344e-05, "loss": 1.7515, "step": 10233 }, { "epoch": 0.5704252828716349, "grad_norm": 0.5666062235832214, "learning_rate": 4.0158713602035004e-05, "loss": 1.7589, "step": 10234 }, { "epoch": 0.5704810211247979, "grad_norm": 0.5441728234291077, "learning_rate": 4.014998370213586e-05, "loss": 1.7212, "step": 10235 }, { "epoch": 0.5705367593779611, "grad_norm": 0.6179669499397278, "learning_rate": 4.0141254114621815e-05, "loss": 1.6665, "step": 10236 }, { "epoch": 0.5705924976311243, "grad_norm": 0.5338011980056763, "learning_rate": 4.0132524839769716e-05, "loss": 1.7351, "step": 10237 }, { "epoch": 0.5706482358842874, "grad_norm": 0.5339807868003845, "learning_rate": 4.0123795877856385e-05, "loss": 1.6007, "step": 10238 }, { "epoch": 0.5707039741374506, "grad_norm": 0.5658773183822632, "learning_rate": 4.011506722915867e-05, "loss": 1.706, "step": 10239 }, { "epoch": 0.5707597123906136, "grad_norm": 0.591503918170929, "learning_rate": 4.01063388939534e-05, "loss": 1.9076, "step": 10240 }, { "epoch": 0.5708154506437768, "grad_norm": 0.4976126253604889, "learning_rate": 4.009761087251735e-05, "loss": 1.3349, "step": 10241 }, { "epoch": 0.57087118889694, "grad_norm": 0.5694444179534912, "learning_rate": 4.008888316512738e-05, "loss": 1.7024, "step": 10242 }, { "epoch": 0.5709269271501031, "grad_norm": 0.6095151305198669, "learning_rate": 4.0080155772060225e-05, "loss": 1.8915, "step": 10243 }, { "epoch": 0.5709826654032663, "grad_norm": 0.5910167694091797, "learning_rate": 4.007142869359272e-05, "loss": 1.6265, "step": 10244 }, { "epoch": 0.5710384036564294, "grad_norm": 0.5558249950408936, "learning_rate": 4.006270193000158e-05, "loss": 1.6305, "step": 10245 }, { "epoch": 0.5710941419095925, "grad_norm": 0.5426621437072754, "learning_rate": 4.005397548156362e-05, "loss": 1.7311, "step": 10246 }, { "epoch": 0.5711498801627557, "grad_norm": 0.5525389313697815, "learning_rate": 4.004524934855555e-05, "loss": 1.7237, "step": 10247 }, { "epoch": 0.5712056184159189, "grad_norm": 0.5233203172683716, "learning_rate": 4.0036523531254136e-05, "loss": 1.6268, "step": 10248 }, { "epoch": 0.571261356669082, "grad_norm": 0.5712999105453491, "learning_rate": 4.0027798029936114e-05, "loss": 1.6511, "step": 10249 }, { "epoch": 0.5713170949222451, "grad_norm": 0.5465791821479797, "learning_rate": 4.001907284487818e-05, "loss": 1.554, "step": 10250 }, { "epoch": 0.5713728331754083, "grad_norm": 0.5340691208839417, "learning_rate": 4.0010347976357085e-05, "loss": 1.6915, "step": 10251 }, { "epoch": 0.5714285714285714, "grad_norm": 0.5902113914489746, "learning_rate": 4.000162342464948e-05, "loss": 1.8803, "step": 10252 }, { "epoch": 0.5714843096817346, "grad_norm": 0.5747789144515991, "learning_rate": 3.9992899190032104e-05, "loss": 1.7181, "step": 10253 }, { "epoch": 0.5715400479348978, "grad_norm": 0.574839174747467, "learning_rate": 3.998417527278162e-05, "loss": 1.8356, "step": 10254 }, { "epoch": 0.5715957861880608, "grad_norm": 0.5555924773216248, "learning_rate": 3.997545167317469e-05, "loss": 1.6524, "step": 10255 }, { "epoch": 0.571651524441224, "grad_norm": 0.5201401114463806, "learning_rate": 3.9966728391488e-05, "loss": 1.4982, "step": 10256 }, { "epoch": 0.5717072626943872, "grad_norm": 0.5710572004318237, "learning_rate": 3.995800542799818e-05, "loss": 1.6855, "step": 10257 }, { "epoch": 0.5717630009475503, "grad_norm": 0.5481722354888916, "learning_rate": 3.9949282782981886e-05, "loss": 1.6136, "step": 10258 }, { "epoch": 0.5718187392007135, "grad_norm": 0.5805692672729492, "learning_rate": 3.994056045671572e-05, "loss": 1.8276, "step": 10259 }, { "epoch": 0.5718744774538767, "grad_norm": 0.5870146155357361, "learning_rate": 3.993183844947635e-05, "loss": 1.8052, "step": 10260 }, { "epoch": 0.5719302157070397, "grad_norm": 0.5496461391448975, "learning_rate": 3.992311676154035e-05, "loss": 1.7339, "step": 10261 }, { "epoch": 0.5719859539602029, "grad_norm": 0.5844667553901672, "learning_rate": 3.991439539318434e-05, "loss": 1.593, "step": 10262 }, { "epoch": 0.572041692213366, "grad_norm": 0.5758823156356812, "learning_rate": 3.99056743446849e-05, "loss": 1.5935, "step": 10263 }, { "epoch": 0.5720974304665292, "grad_norm": 0.5993025302886963, "learning_rate": 3.9896953616318614e-05, "loss": 1.7039, "step": 10264 }, { "epoch": 0.5721531687196924, "grad_norm": 0.5562222003936768, "learning_rate": 3.988823320836207e-05, "loss": 1.718, "step": 10265 }, { "epoch": 0.5722089069728554, "grad_norm": 0.5475729703903198, "learning_rate": 3.9879513121091795e-05, "loss": 1.6327, "step": 10266 }, { "epoch": 0.5722646452260186, "grad_norm": 0.5502913594245911, "learning_rate": 3.987079335478435e-05, "loss": 1.7032, "step": 10267 }, { "epoch": 0.5723203834791818, "grad_norm": 0.5649488568305969, "learning_rate": 3.986207390971631e-05, "loss": 1.6994, "step": 10268 }, { "epoch": 0.5723761217323449, "grad_norm": 0.5440324544906616, "learning_rate": 3.985335478616415e-05, "loss": 1.6933, "step": 10269 }, { "epoch": 0.572431859985508, "grad_norm": 0.5091212391853333, "learning_rate": 3.984463598440444e-05, "loss": 1.3312, "step": 10270 }, { "epoch": 0.5724875982386712, "grad_norm": 0.5522047877311707, "learning_rate": 3.983591750471366e-05, "loss": 1.5176, "step": 10271 }, { "epoch": 0.5725433364918343, "grad_norm": 0.5953494906425476, "learning_rate": 3.982719934736832e-05, "loss": 1.6718, "step": 10272 }, { "epoch": 0.5725990747449975, "grad_norm": 0.5262237191200256, "learning_rate": 3.981848151264489e-05, "loss": 1.6548, "step": 10273 }, { "epoch": 0.5726548129981607, "grad_norm": 0.54544997215271, "learning_rate": 3.9809764000819875e-05, "loss": 1.6723, "step": 10274 }, { "epoch": 0.5727105512513238, "grad_norm": 0.5449570417404175, "learning_rate": 3.980104681216974e-05, "loss": 1.673, "step": 10275 }, { "epoch": 0.5727662895044869, "grad_norm": 0.602749228477478, "learning_rate": 3.979232994697091e-05, "loss": 1.2947, "step": 10276 }, { "epoch": 0.5728220277576501, "grad_norm": 0.6310192942619324, "learning_rate": 3.97836134054999e-05, "loss": 1.6502, "step": 10277 }, { "epoch": 0.5728777660108132, "grad_norm": 0.5666201114654541, "learning_rate": 3.9774897188033064e-05, "loss": 1.8275, "step": 10278 }, { "epoch": 0.5729335042639764, "grad_norm": 0.5859840512275696, "learning_rate": 3.97661812948469e-05, "loss": 1.7393, "step": 10279 }, { "epoch": 0.5729892425171396, "grad_norm": 0.6887635588645935, "learning_rate": 3.975746572621778e-05, "loss": 1.6816, "step": 10280 }, { "epoch": 0.5730449807703026, "grad_norm": 0.5669187903404236, "learning_rate": 3.9748750482422145e-05, "loss": 1.5666, "step": 10281 }, { "epoch": 0.5731007190234658, "grad_norm": 0.5395673513412476, "learning_rate": 3.974003556373637e-05, "loss": 1.611, "step": 10282 }, { "epoch": 0.573156457276629, "grad_norm": 0.5382205247879028, "learning_rate": 3.973132097043685e-05, "loss": 1.6061, "step": 10283 }, { "epoch": 0.5732121955297921, "grad_norm": 0.5802567601203918, "learning_rate": 3.972260670279996e-05, "loss": 1.7779, "step": 10284 }, { "epoch": 0.5732679337829553, "grad_norm": 0.5593720078468323, "learning_rate": 3.971389276110204e-05, "loss": 1.6468, "step": 10285 }, { "epoch": 0.5733236720361183, "grad_norm": 0.5882350206375122, "learning_rate": 3.970517914561951e-05, "loss": 1.6013, "step": 10286 }, { "epoch": 0.5733794102892815, "grad_norm": 0.5444415807723999, "learning_rate": 3.969646585662864e-05, "loss": 1.7013, "step": 10287 }, { "epoch": 0.5734351485424447, "grad_norm": 0.5615072846412659, "learning_rate": 3.9687752894405804e-05, "loss": 1.7675, "step": 10288 }, { "epoch": 0.5734908867956078, "grad_norm": Infinity, "learning_rate": 3.9687752894405804e-05, "loss": 1.5965, "step": 10289 }, { "epoch": 0.573546625048771, "grad_norm": 0.5616545677185059, "learning_rate": 3.967904025922734e-05, "loss": 1.6883, "step": 10290 }, { "epoch": 0.5736023633019341, "grad_norm": 0.571757972240448, "learning_rate": 3.9670327951369537e-05, "loss": 1.6156, "step": 10291 }, { "epoch": 0.5736581015550972, "grad_norm": 0.5675120949745178, "learning_rate": 3.9661615971108706e-05, "loss": 1.6129, "step": 10292 }, { "epoch": 0.5737138398082604, "grad_norm": 0.6117345094680786, "learning_rate": 3.965290431872113e-05, "loss": 1.7097, "step": 10293 }, { "epoch": 0.5737695780614236, "grad_norm": 0.6245883107185364, "learning_rate": 3.9644192994483095e-05, "loss": 1.9848, "step": 10294 }, { "epoch": 0.5738253163145867, "grad_norm": 0.5811381936073303, "learning_rate": 3.96354819986709e-05, "loss": 1.7762, "step": 10295 }, { "epoch": 0.5738810545677498, "grad_norm": 0.5340662002563477, "learning_rate": 3.9626771331560766e-05, "loss": 1.5887, "step": 10296 }, { "epoch": 0.573936792820913, "grad_norm": 0.6120584011077881, "learning_rate": 3.961806099342899e-05, "loss": 1.8988, "step": 10297 }, { "epoch": 0.5739925310740761, "grad_norm": 0.5624459385871887, "learning_rate": 3.960935098455177e-05, "loss": 1.7073, "step": 10298 }, { "epoch": 0.5740482693272393, "grad_norm": 0.5729663968086243, "learning_rate": 3.9600641305205365e-05, "loss": 1.5212, "step": 10299 }, { "epoch": 0.5741040075804025, "grad_norm": 0.552730917930603, "learning_rate": 3.959193195566598e-05, "loss": 1.4532, "step": 10300 }, { "epoch": 0.5741597458335655, "grad_norm": 0.5537503361701965, "learning_rate": 3.958322293620982e-05, "loss": 1.7352, "step": 10301 }, { "epoch": 0.5742154840867287, "grad_norm": 0.5406333804130554, "learning_rate": 3.957451424711312e-05, "loss": 1.7068, "step": 10302 }, { "epoch": 0.5742712223398919, "grad_norm": 0.5790851712226868, "learning_rate": 3.956580588865202e-05, "loss": 1.8338, "step": 10303 }, { "epoch": 0.574326960593055, "grad_norm": 0.5130342245101929, "learning_rate": 3.955709786110274e-05, "loss": 1.3051, "step": 10304 }, { "epoch": 0.5743826988462182, "grad_norm": 0.5465152263641357, "learning_rate": 3.954839016474141e-05, "loss": 1.4312, "step": 10305 }, { "epoch": 0.5744384370993814, "grad_norm": 0.565580427646637, "learning_rate": 3.953968279984422e-05, "loss": 1.6097, "step": 10306 }, { "epoch": 0.5744941753525444, "grad_norm": 0.5684987902641296, "learning_rate": 3.95309757666873e-05, "loss": 1.7451, "step": 10307 }, { "epoch": 0.5745499136057076, "grad_norm": 0.5432803630828857, "learning_rate": 3.952226906554679e-05, "loss": 1.8016, "step": 10308 }, { "epoch": 0.5746056518588707, "grad_norm": 0.5711129903793335, "learning_rate": 3.9513562696698826e-05, "loss": 1.6584, "step": 10309 }, { "epoch": 0.5746613901120339, "grad_norm": 0.5580195784568787, "learning_rate": 3.9504856660419495e-05, "loss": 1.5651, "step": 10310 }, { "epoch": 0.5747171283651971, "grad_norm": 0.5921227931976318, "learning_rate": 3.949615095698494e-05, "loss": 1.7212, "step": 10311 }, { "epoch": 0.5747728666183601, "grad_norm": 0.5192678570747375, "learning_rate": 3.948744558667121e-05, "loss": 1.3938, "step": 10312 }, { "epoch": 0.5748286048715233, "grad_norm": 0.5308910012245178, "learning_rate": 3.9478740549754444e-05, "loss": 1.6182, "step": 10313 }, { "epoch": 0.5748843431246865, "grad_norm": 0.5796390771865845, "learning_rate": 3.947003584651065e-05, "loss": 1.5559, "step": 10314 }, { "epoch": 0.5749400813778496, "grad_norm": 0.620233952999115, "learning_rate": 3.946133147721594e-05, "loss": 1.8528, "step": 10315 }, { "epoch": 0.5749958196310128, "grad_norm": 0.5096827149391174, "learning_rate": 3.945262744214636e-05, "loss": 1.3252, "step": 10316 }, { "epoch": 0.575051557884176, "grad_norm": 0.5851264595985413, "learning_rate": 3.9443923741577935e-05, "loss": 1.6752, "step": 10317 }, { "epoch": 0.575107296137339, "grad_norm": 0.5834670662879944, "learning_rate": 3.943522037578671e-05, "loss": 1.8702, "step": 10318 }, { "epoch": 0.5751630343905022, "grad_norm": 0.5740618705749512, "learning_rate": 3.942651734504869e-05, "loss": 1.7431, "step": 10319 }, { "epoch": 0.5752187726436654, "grad_norm": 0.5476807951927185, "learning_rate": 3.9417814649639893e-05, "loss": 1.7095, "step": 10320 }, { "epoch": 0.5752745108968285, "grad_norm": 0.5697437524795532, "learning_rate": 3.9409112289836305e-05, "loss": 1.7306, "step": 10321 }, { "epoch": 0.5753302491499916, "grad_norm": 0.5896326899528503, "learning_rate": 3.9400410265913936e-05, "loss": 1.5845, "step": 10322 }, { "epoch": 0.5753859874031548, "grad_norm": 0.5682885050773621, "learning_rate": 3.939170857814876e-05, "loss": 1.7804, "step": 10323 }, { "epoch": 0.5754417256563179, "grad_norm": 0.5711153745651245, "learning_rate": 3.9383007226816726e-05, "loss": 1.6929, "step": 10324 }, { "epoch": 0.5754974639094811, "grad_norm": 0.5820274353027344, "learning_rate": 3.937430621219382e-05, "loss": 1.7542, "step": 10325 }, { "epoch": 0.5755532021626443, "grad_norm": 0.5988385081291199, "learning_rate": 3.936560553455595e-05, "loss": 1.7471, "step": 10326 }, { "epoch": 0.5756089404158073, "grad_norm": 0.5577500462532043, "learning_rate": 3.935690519417906e-05, "loss": 1.7389, "step": 10327 }, { "epoch": 0.5756646786689705, "grad_norm": 0.5570036768913269, "learning_rate": 3.934820519133912e-05, "loss": 1.6225, "step": 10328 }, { "epoch": 0.5757204169221337, "grad_norm": 0.6182720065116882, "learning_rate": 3.933950552631198e-05, "loss": 1.1692, "step": 10329 }, { "epoch": 0.5757761551752968, "grad_norm": 0.5024303793907166, "learning_rate": 3.9330806199373595e-05, "loss": 1.697, "step": 10330 }, { "epoch": 0.57583189342846, "grad_norm": 0.544809103012085, "learning_rate": 3.9322107210799795e-05, "loss": 1.6768, "step": 10331 }, { "epoch": 0.575887631681623, "grad_norm": 0.5746915340423584, "learning_rate": 3.931340856086652e-05, "loss": 1.7127, "step": 10332 }, { "epoch": 0.5759433699347862, "grad_norm": 0.5670152306556702, "learning_rate": 3.930471024984961e-05, "loss": 1.7439, "step": 10333 }, { "epoch": 0.5759991081879494, "grad_norm": 0.5794965624809265, "learning_rate": 3.929601227802494e-05, "loss": 1.622, "step": 10334 }, { "epoch": 0.5760548464411125, "grad_norm": 0.5243938565254211, "learning_rate": 3.928731464566836e-05, "loss": 1.5308, "step": 10335 }, { "epoch": 0.5761105846942757, "grad_norm": 0.6057234406471252, "learning_rate": 3.927861735305568e-05, "loss": 1.7297, "step": 10336 }, { "epoch": 0.5761663229474389, "grad_norm": 0.5918848514556885, "learning_rate": 3.926992040046277e-05, "loss": 1.8108, "step": 10337 }, { "epoch": 0.5762220612006019, "grad_norm": 0.557761549949646, "learning_rate": 3.926122378816539e-05, "loss": 1.4936, "step": 10338 }, { "epoch": 0.5762777994537651, "grad_norm": 0.5680163502693176, "learning_rate": 3.925252751643942e-05, "loss": 1.5076, "step": 10339 }, { "epoch": 0.5763335377069283, "grad_norm": 0.516508162021637, "learning_rate": 3.924383158556059e-05, "loss": 1.7351, "step": 10340 }, { "epoch": 0.5763892759600914, "grad_norm": 0.5683130025863647, "learning_rate": 3.9235135995804705e-05, "loss": 1.6554, "step": 10341 }, { "epoch": 0.5764450142132546, "grad_norm": 0.5636241436004639, "learning_rate": 3.9226440747447565e-05, "loss": 1.624, "step": 10342 }, { "epoch": 0.5765007524664177, "grad_norm": 0.53886479139328, "learning_rate": 3.9217745840764895e-05, "loss": 1.5321, "step": 10343 }, { "epoch": 0.5765564907195808, "grad_norm": 0.5719546675682068, "learning_rate": 3.920905127603247e-05, "loss": 1.5408, "step": 10344 }, { "epoch": 0.576612228972744, "grad_norm": 0.5249210596084595, "learning_rate": 3.920035705352602e-05, "loss": 1.587, "step": 10345 }, { "epoch": 0.5766679672259072, "grad_norm": 0.563533365726471, "learning_rate": 3.9191663173521284e-05, "loss": 1.7007, "step": 10346 }, { "epoch": 0.5767237054790703, "grad_norm": 0.5978162288665771, "learning_rate": 3.918296963629395e-05, "loss": 1.7961, "step": 10347 }, { "epoch": 0.5767794437322334, "grad_norm": 0.5723155736923218, "learning_rate": 3.9174276442119766e-05, "loss": 1.5591, "step": 10348 }, { "epoch": 0.5768351819853966, "grad_norm": 0.5694242715835571, "learning_rate": 3.916558359127443e-05, "loss": 1.6161, "step": 10349 }, { "epoch": 0.5768909202385597, "grad_norm": 0.5386130213737488, "learning_rate": 3.9156891084033596e-05, "loss": 1.6975, "step": 10350 }, { "epoch": 0.5769466584917229, "grad_norm": 0.5643964409828186, "learning_rate": 3.9148198920672975e-05, "loss": 1.7234, "step": 10351 }, { "epoch": 0.5770023967448861, "grad_norm": 0.551584005355835, "learning_rate": 3.913950710146819e-05, "loss": 1.5164, "step": 10352 }, { "epoch": 0.5770581349980491, "grad_norm": 0.60798180103302, "learning_rate": 3.913081562669492e-05, "loss": 1.7445, "step": 10353 }, { "epoch": 0.5771138732512123, "grad_norm": 0.5259472131729126, "learning_rate": 3.9122124496628836e-05, "loss": 1.5357, "step": 10354 }, { "epoch": 0.5771696115043754, "grad_norm": 0.5704507231712341, "learning_rate": 3.911343371154551e-05, "loss": 1.8522, "step": 10355 }, { "epoch": 0.5772253497575386, "grad_norm": 0.6215217113494873, "learning_rate": 3.9104743271720624e-05, "loss": 2.0213, "step": 10356 }, { "epoch": 0.5772810880107018, "grad_norm": 0.5803076028823853, "learning_rate": 3.909605317742972e-05, "loss": 1.8434, "step": 10357 }, { "epoch": 0.5773368262638648, "grad_norm": 0.5362025499343872, "learning_rate": 3.908736342894846e-05, "loss": 1.5575, "step": 10358 }, { "epoch": 0.577392564517028, "grad_norm": 0.5348682999610901, "learning_rate": 3.90786740265524e-05, "loss": 1.6087, "step": 10359 }, { "epoch": 0.5774483027701912, "grad_norm": 0.5173177719116211, "learning_rate": 3.9069984970517124e-05, "loss": 1.6299, "step": 10360 }, { "epoch": 0.5775040410233543, "grad_norm": 0.5550698637962341, "learning_rate": 3.90612962611182e-05, "loss": 1.7779, "step": 10361 }, { "epoch": 0.5775597792765175, "grad_norm": 0.5339301824569702, "learning_rate": 3.905260789863118e-05, "loss": 1.4732, "step": 10362 }, { "epoch": 0.5776155175296807, "grad_norm": 0.611870288848877, "learning_rate": 3.9043919883331615e-05, "loss": 1.7523, "step": 10363 }, { "epoch": 0.5776712557828437, "grad_norm": 0.5385359525680542, "learning_rate": 3.903523221549502e-05, "loss": 1.617, "step": 10364 }, { "epoch": 0.5777269940360069, "grad_norm": 0.5916758179664612, "learning_rate": 3.902654489539695e-05, "loss": 2.0081, "step": 10365 }, { "epoch": 0.5777827322891701, "grad_norm": 0.5239583253860474, "learning_rate": 3.901785792331287e-05, "loss": 1.6251, "step": 10366 }, { "epoch": 0.5778384705423332, "grad_norm": 0.5588314533233643, "learning_rate": 3.9009171299518324e-05, "loss": 1.6483, "step": 10367 }, { "epoch": 0.5778942087954964, "grad_norm": 0.5109575986862183, "learning_rate": 3.9000485024288784e-05, "loss": 1.3879, "step": 10368 }, { "epoch": 0.5779499470486595, "grad_norm": 0.5284083485603333, "learning_rate": 3.899179909789972e-05, "loss": 1.548, "step": 10369 }, { "epoch": 0.5780056853018226, "grad_norm": 0.6521651744842529, "learning_rate": 3.898311352062662e-05, "loss": 1.9616, "step": 10370 }, { "epoch": 0.5780614235549858, "grad_norm": 0.607297956943512, "learning_rate": 3.8974428292744914e-05, "loss": 1.7795, "step": 10371 }, { "epoch": 0.578117161808149, "grad_norm": 0.5658968687057495, "learning_rate": 3.896574341453007e-05, "loss": 1.6974, "step": 10372 }, { "epoch": 0.5781729000613121, "grad_norm": 0.5024977922439575, "learning_rate": 3.895705888625748e-05, "loss": 1.4682, "step": 10373 }, { "epoch": 0.5782286383144752, "grad_norm": 0.5308341383934021, "learning_rate": 3.894837470820262e-05, "loss": 1.4336, "step": 10374 }, { "epoch": 0.5782843765676384, "grad_norm": 0.5695244073867798, "learning_rate": 3.8939690880640885e-05, "loss": 1.6593, "step": 10375 }, { "epoch": 0.5783401148208015, "grad_norm": 0.5992659330368042, "learning_rate": 3.893100740384766e-05, "loss": 1.6772, "step": 10376 }, { "epoch": 0.5783958530739647, "grad_norm": 0.5644543766975403, "learning_rate": 3.8922324278098356e-05, "loss": 1.7087, "step": 10377 }, { "epoch": 0.5784515913271278, "grad_norm": 0.5220384001731873, "learning_rate": 3.891364150366832e-05, "loss": 1.4623, "step": 10378 }, { "epoch": 0.5785073295802909, "grad_norm": 0.5461076498031616, "learning_rate": 3.890495908083293e-05, "loss": 1.6518, "step": 10379 }, { "epoch": 0.5785630678334541, "grad_norm": 0.5484482049942017, "learning_rate": 3.889627700986759e-05, "loss": 1.5737, "step": 10380 }, { "epoch": 0.5786188060866172, "grad_norm": 0.5702036023139954, "learning_rate": 3.8887595291047564e-05, "loss": 1.5644, "step": 10381 }, { "epoch": 0.5786745443397804, "grad_norm": 0.5962613224983215, "learning_rate": 3.887891392464825e-05, "loss": 1.8534, "step": 10382 }, { "epoch": 0.5787302825929436, "grad_norm": 0.6296350359916687, "learning_rate": 3.8870232910944924e-05, "loss": 1.8821, "step": 10383 }, { "epoch": 0.5787860208461066, "grad_norm": 0.5504742860794067, "learning_rate": 3.886155225021294e-05, "loss": 1.8454, "step": 10384 }, { "epoch": 0.5788417590992698, "grad_norm": 0.5213546752929688, "learning_rate": 3.885287194272757e-05, "loss": 1.4968, "step": 10385 }, { "epoch": 0.578897497352433, "grad_norm": 0.5692139267921448, "learning_rate": 3.884419198876411e-05, "loss": 1.7601, "step": 10386 }, { "epoch": 0.5789532356055961, "grad_norm": 0.5776494145393372, "learning_rate": 3.8835512388597836e-05, "loss": 1.7149, "step": 10387 }, { "epoch": 0.5790089738587593, "grad_norm": 0.5485444068908691, "learning_rate": 3.8826833142504006e-05, "loss": 1.6867, "step": 10388 }, { "epoch": 0.5790647121119225, "grad_norm": 0.5601508021354675, "learning_rate": 3.881815425075791e-05, "loss": 1.6042, "step": 10389 }, { "epoch": 0.5791204503650855, "grad_norm": 0.5325314998626709, "learning_rate": 3.880947571363474e-05, "loss": 1.7868, "step": 10390 }, { "epoch": 0.5791761886182487, "grad_norm": 0.5936904549598694, "learning_rate": 3.880079753140978e-05, "loss": 1.8606, "step": 10391 }, { "epoch": 0.5792319268714119, "grad_norm": 0.5427181720733643, "learning_rate": 3.87921197043582e-05, "loss": 1.5603, "step": 10392 }, { "epoch": 0.579287665124575, "grad_norm": 0.5596809387207031, "learning_rate": 3.878344223275524e-05, "loss": 1.9158, "step": 10393 }, { "epoch": 0.5793434033777382, "grad_norm": 0.5559753179550171, "learning_rate": 3.877476511687611e-05, "loss": 1.62, "step": 10394 }, { "epoch": 0.5793991416309013, "grad_norm": 0.5727944374084473, "learning_rate": 3.8766088356995976e-05, "loss": 1.5055, "step": 10395 }, { "epoch": 0.5794548798840644, "grad_norm": 0.6167700290679932, "learning_rate": 3.875741195339003e-05, "loss": 1.7256, "step": 10396 }, { "epoch": 0.5795106181372276, "grad_norm": 0.5643514394760132, "learning_rate": 3.874873590633341e-05, "loss": 1.6768, "step": 10397 }, { "epoch": 0.5795663563903908, "grad_norm": 0.5504075884819031, "learning_rate": 3.874006021610131e-05, "loss": 1.676, "step": 10398 }, { "epoch": 0.5796220946435539, "grad_norm": 0.5333808064460754, "learning_rate": 3.8731384882968824e-05, "loss": 1.5099, "step": 10399 }, { "epoch": 0.579677832896717, "grad_norm": 0.5340782999992371, "learning_rate": 3.872270990721112e-05, "loss": 1.7212, "step": 10400 }, { "epoch": 0.5797335711498801, "grad_norm": 0.5497784614562988, "learning_rate": 3.8714035289103314e-05, "loss": 1.66, "step": 10401 }, { "epoch": 0.5797893094030433, "grad_norm": 0.5847936868667603, "learning_rate": 3.8705361028920494e-05, "loss": 1.7655, "step": 10402 }, { "epoch": 0.5798450476562065, "grad_norm": 0.5303927659988403, "learning_rate": 3.869668712693778e-05, "loss": 1.5689, "step": 10403 }, { "epoch": 0.5799007859093696, "grad_norm": 0.5461509823799133, "learning_rate": 3.868801358343025e-05, "loss": 1.6214, "step": 10404 }, { "epoch": 0.5799565241625327, "grad_norm": 0.5522668957710266, "learning_rate": 3.8679340398672953e-05, "loss": 1.5918, "step": 10405 }, { "epoch": 0.5800122624156959, "grad_norm": 0.5287279486656189, "learning_rate": 3.867066757294101e-05, "loss": 1.5958, "step": 10406 }, { "epoch": 0.580068000668859, "grad_norm": 0.5924019813537598, "learning_rate": 3.866199510650941e-05, "loss": 1.6475, "step": 10407 }, { "epoch": 0.5801237389220222, "grad_norm": 0.5650224685668945, "learning_rate": 3.865332299965323e-05, "loss": 1.6921, "step": 10408 }, { "epoch": 0.5801794771751854, "grad_norm": 0.5323730707168579, "learning_rate": 3.864465125264749e-05, "loss": 1.5513, "step": 10409 }, { "epoch": 0.5802352154283484, "grad_norm": 0.5714460611343384, "learning_rate": 3.8635979865767205e-05, "loss": 1.6684, "step": 10410 }, { "epoch": 0.5802909536815116, "grad_norm": 0.5639826059341431, "learning_rate": 3.862730883928738e-05, "loss": 1.6376, "step": 10411 }, { "epoch": 0.5803466919346748, "grad_norm": 0.5803040266036987, "learning_rate": 3.8618638173483014e-05, "loss": 1.8236, "step": 10412 }, { "epoch": 0.5804024301878379, "grad_norm": 0.566265344619751, "learning_rate": 3.860996786862909e-05, "loss": 1.4877, "step": 10413 }, { "epoch": 0.5804581684410011, "grad_norm": 0.5610904097557068, "learning_rate": 3.860129792500056e-05, "loss": 1.6582, "step": 10414 }, { "epoch": 0.5805139066941643, "grad_norm": 0.5860254764556885, "learning_rate": 3.859262834287243e-05, "loss": 1.6139, "step": 10415 }, { "epoch": 0.5805696449473273, "grad_norm": 0.5870318412780762, "learning_rate": 3.8583959122519585e-05, "loss": 1.6614, "step": 10416 }, { "epoch": 0.5806253832004905, "grad_norm": 0.5830135941505432, "learning_rate": 3.8575290264217036e-05, "loss": 1.7069, "step": 10417 }, { "epoch": 0.5806811214536537, "grad_norm": 0.5582641959190369, "learning_rate": 3.8566621768239634e-05, "loss": 1.604, "step": 10418 }, { "epoch": 0.5807368597068168, "grad_norm": 0.6204952001571655, "learning_rate": 3.855795363486233e-05, "loss": 1.9387, "step": 10419 }, { "epoch": 0.58079259795998, "grad_norm": 0.5565268993377686, "learning_rate": 3.854928586436005e-05, "loss": 1.8071, "step": 10420 }, { "epoch": 0.5808483362131431, "grad_norm": 0.5894541144371033, "learning_rate": 3.854061845700764e-05, "loss": 1.7062, "step": 10421 }, { "epoch": 0.5809040744663062, "grad_norm": 0.5459067821502686, "learning_rate": 3.853195141308001e-05, "loss": 1.6668, "step": 10422 }, { "epoch": 0.5809598127194694, "grad_norm": 0.5536026954650879, "learning_rate": 3.852328473285201e-05, "loss": 1.6721, "step": 10423 }, { "epoch": 0.5810155509726325, "grad_norm": 0.5301326513290405, "learning_rate": 3.851461841659851e-05, "loss": 1.503, "step": 10424 }, { "epoch": 0.5810712892257957, "grad_norm": 0.5645812153816223, "learning_rate": 3.850595246459434e-05, "loss": 1.6078, "step": 10425 }, { "epoch": 0.5811270274789588, "grad_norm": 0.5299369692802429, "learning_rate": 3.849728687711435e-05, "loss": 1.4543, "step": 10426 }, { "epoch": 0.5811827657321219, "grad_norm": 0.5582391619682312, "learning_rate": 3.8488621654433356e-05, "loss": 1.4153, "step": 10427 }, { "epoch": 0.5812385039852851, "grad_norm": 0.5766590237617493, "learning_rate": 3.8479956796826164e-05, "loss": 1.8426, "step": 10428 }, { "epoch": 0.5812942422384483, "grad_norm": 0.5900693535804749, "learning_rate": 3.8471292304567586e-05, "loss": 1.9991, "step": 10429 }, { "epoch": 0.5813499804916114, "grad_norm": 0.5874468088150024, "learning_rate": 3.8462628177932386e-05, "loss": 1.7196, "step": 10430 }, { "epoch": 0.5814057187447745, "grad_norm": 0.5636804699897766, "learning_rate": 3.845396441719537e-05, "loss": 1.6985, "step": 10431 }, { "epoch": 0.5814614569979377, "grad_norm": 0.5602846145629883, "learning_rate": 3.844530102263126e-05, "loss": 1.729, "step": 10432 }, { "epoch": 0.5815171952511008, "grad_norm": 0.5678505301475525, "learning_rate": 3.843663799451483e-05, "loss": 1.602, "step": 10433 }, { "epoch": 0.581572933504264, "grad_norm": 0.5459701418876648, "learning_rate": 3.842797533312085e-05, "loss": 1.6195, "step": 10434 }, { "epoch": 0.5816286717574272, "grad_norm": 0.5326259732246399, "learning_rate": 3.841931303872401e-05, "loss": 1.5695, "step": 10435 }, { "epoch": 0.5816844100105902, "grad_norm": 0.5516942143440247, "learning_rate": 3.841065111159905e-05, "loss": 1.5744, "step": 10436 }, { "epoch": 0.5817401482637534, "grad_norm": 0.5589244365692139, "learning_rate": 3.8401989552020654e-05, "loss": 1.5559, "step": 10437 }, { "epoch": 0.5817958865169166, "grad_norm": 0.5421091318130493, "learning_rate": 3.839332836026353e-05, "loss": 1.5991, "step": 10438 }, { "epoch": 0.5818516247700797, "grad_norm": 0.5204689502716064, "learning_rate": 3.838466753660237e-05, "loss": 1.3576, "step": 10439 }, { "epoch": 0.5819073630232429, "grad_norm": 0.6035448312759399, "learning_rate": 3.837600708131181e-05, "loss": 1.7927, "step": 10440 }, { "epoch": 0.581963101276406, "grad_norm": 0.5337579250335693, "learning_rate": 3.836734699466656e-05, "loss": 1.6014, "step": 10441 }, { "epoch": 0.5820188395295691, "grad_norm": 0.604854166507721, "learning_rate": 3.835868727694122e-05, "loss": 1.7221, "step": 10442 }, { "epoch": 0.5820745777827323, "grad_norm": 0.5534946918487549, "learning_rate": 3.835002792841047e-05, "loss": 1.5634, "step": 10443 }, { "epoch": 0.5821303160358955, "grad_norm": 0.5689296126365662, "learning_rate": 3.834136894934888e-05, "loss": 1.6135, "step": 10444 }, { "epoch": 0.5821860542890586, "grad_norm": 0.5645999312400818, "learning_rate": 3.833271034003111e-05, "loss": 1.3717, "step": 10445 }, { "epoch": 0.5822417925422217, "grad_norm": 0.6080798506736755, "learning_rate": 3.832405210073174e-05, "loss": 1.5761, "step": 10446 }, { "epoch": 0.5822975307953848, "grad_norm": 0.5378057360649109, "learning_rate": 3.831539423172536e-05, "loss": 1.6861, "step": 10447 }, { "epoch": 0.582353269048548, "grad_norm": 0.576270341873169, "learning_rate": 3.8306736733286555e-05, "loss": 1.6967, "step": 10448 }, { "epoch": 0.5824090073017112, "grad_norm": 0.6018567681312561, "learning_rate": 3.829807960568988e-05, "loss": 1.9025, "step": 10449 }, { "epoch": 0.5824647455548743, "grad_norm": 0.6117346286773682, "learning_rate": 3.8289422849209896e-05, "loss": 1.8112, "step": 10450 }, { "epoch": 0.5825204838080374, "grad_norm": 0.5422847270965576, "learning_rate": 3.8280766464121134e-05, "loss": 1.5044, "step": 10451 }, { "epoch": 0.5825762220612006, "grad_norm": 0.5537722110748291, "learning_rate": 3.827211045069813e-05, "loss": 1.6428, "step": 10452 }, { "epoch": 0.5826319603143637, "grad_norm": 0.6170569062232971, "learning_rate": 3.826345480921542e-05, "loss": 1.7481, "step": 10453 }, { "epoch": 0.5826876985675269, "grad_norm": 0.5351431369781494, "learning_rate": 3.825479953994748e-05, "loss": 1.6192, "step": 10454 }, { "epoch": 0.5827434368206901, "grad_norm": 0.5633178353309631, "learning_rate": 3.824614464316883e-05, "loss": 1.6705, "step": 10455 }, { "epoch": 0.5827991750738531, "grad_norm": 0.5995389223098755, "learning_rate": 3.8237490119153934e-05, "loss": 1.7806, "step": 10456 }, { "epoch": 0.5828549133270163, "grad_norm": 0.5304275751113892, "learning_rate": 3.822883596817728e-05, "loss": 1.5233, "step": 10457 }, { "epoch": 0.5829106515801795, "grad_norm": 0.5443453788757324, "learning_rate": 3.822018219051331e-05, "loss": 1.6379, "step": 10458 }, { "epoch": 0.5829663898333426, "grad_norm": 0.5200064778327942, "learning_rate": 3.821152878643647e-05, "loss": 1.5846, "step": 10459 }, { "epoch": 0.5830221280865058, "grad_norm": 0.5608554482460022, "learning_rate": 3.820287575622122e-05, "loss": 1.5801, "step": 10460 }, { "epoch": 0.583077866339669, "grad_norm": 0.5903092622756958, "learning_rate": 3.8194223100141965e-05, "loss": 1.6576, "step": 10461 }, { "epoch": 0.583133604592832, "grad_norm": 0.5784822106361389, "learning_rate": 3.818557081847313e-05, "loss": 1.8402, "step": 10462 }, { "epoch": 0.5831893428459952, "grad_norm": 0.5177431702613831, "learning_rate": 3.81769189114891e-05, "loss": 1.2378, "step": 10463 }, { "epoch": 0.5832450810991584, "grad_norm": 0.5646283626556396, "learning_rate": 3.8168267379464263e-05, "loss": 1.6343, "step": 10464 }, { "epoch": 0.5833008193523215, "grad_norm": 0.5550134778022766, "learning_rate": 3.815961622267301e-05, "loss": 1.6733, "step": 10465 }, { "epoch": 0.5833565576054847, "grad_norm": 0.6027835011482239, "learning_rate": 3.8150965441389674e-05, "loss": 1.6899, "step": 10466 }, { "epoch": 0.5834122958586478, "grad_norm": 0.5438368916511536, "learning_rate": 3.814231503588867e-05, "loss": 1.6729, "step": 10467 }, { "epoch": 0.5834680341118109, "grad_norm": 0.5765901803970337, "learning_rate": 3.8133665006444255e-05, "loss": 1.794, "step": 10468 }, { "epoch": 0.5835237723649741, "grad_norm": 0.6034119725227356, "learning_rate": 3.812501535333083e-05, "loss": 1.9005, "step": 10469 }, { "epoch": 0.5835795106181372, "grad_norm": 0.5628261566162109, "learning_rate": 3.811636607682267e-05, "loss": 1.75, "step": 10470 }, { "epoch": 0.5836352488713004, "grad_norm": 0.6064727902412415, "learning_rate": 3.810771717719409e-05, "loss": 1.83, "step": 10471 }, { "epoch": 0.5836909871244635, "grad_norm": 0.5413762331008911, "learning_rate": 3.80990686547194e-05, "loss": 1.6382, "step": 10472 }, { "epoch": 0.5837467253776266, "grad_norm": 0.5523511171340942, "learning_rate": 3.809042050967285e-05, "loss": 1.4951, "step": 10473 }, { "epoch": 0.5838024636307898, "grad_norm": 0.5516862273216248, "learning_rate": 3.808177274232873e-05, "loss": 1.5714, "step": 10474 }, { "epoch": 0.583858201883953, "grad_norm": 0.5366679430007935, "learning_rate": 3.807312535296127e-05, "loss": 1.6896, "step": 10475 }, { "epoch": 0.5839139401371161, "grad_norm": 0.5850146412849426, "learning_rate": 3.806447834184477e-05, "loss": 1.6643, "step": 10476 }, { "epoch": 0.5839696783902792, "grad_norm": 0.5514613389968872, "learning_rate": 3.8055831709253396e-05, "loss": 1.6747, "step": 10477 }, { "epoch": 0.5840254166434424, "grad_norm": 0.5313770771026611, "learning_rate": 3.804718545546142e-05, "loss": 1.7009, "step": 10478 }, { "epoch": 0.5840811548966055, "grad_norm": 0.5248450040817261, "learning_rate": 3.803853958074303e-05, "loss": 1.4489, "step": 10479 }, { "epoch": 0.5841368931497687, "grad_norm": 0.8921785950660706, "learning_rate": 3.802989408537242e-05, "loss": 1.5598, "step": 10480 }, { "epoch": 0.5841926314029319, "grad_norm": 0.5542730689048767, "learning_rate": 3.802124896962379e-05, "loss": 1.6924, "step": 10481 }, { "epoch": 0.584248369656095, "grad_norm": 0.5227362513542175, "learning_rate": 3.801260423377129e-05, "loss": 1.479, "step": 10482 }, { "epoch": 0.5843041079092581, "grad_norm": 0.5378886461257935, "learning_rate": 3.8003959878089104e-05, "loss": 1.5304, "step": 10483 }, { "epoch": 0.5843598461624213, "grad_norm": 0.554295003414154, "learning_rate": 3.7995315902851354e-05, "loss": 1.4134, "step": 10484 }, { "epoch": 0.5844155844155844, "grad_norm": 0.5478252172470093, "learning_rate": 3.798667230833218e-05, "loss": 1.8024, "step": 10485 }, { "epoch": 0.5844713226687476, "grad_norm": 0.5450767874717712, "learning_rate": 3.797802909480574e-05, "loss": 1.7916, "step": 10486 }, { "epoch": 0.5845270609219108, "grad_norm": 0.6002693176269531, "learning_rate": 3.796938626254612e-05, "loss": 1.6446, "step": 10487 }, { "epoch": 0.5845827991750738, "grad_norm": 0.5589439272880554, "learning_rate": 3.796074381182743e-05, "loss": 1.5499, "step": 10488 }, { "epoch": 0.584638537428237, "grad_norm": 0.5932784676551819, "learning_rate": 3.795210174292374e-05, "loss": 1.661, "step": 10489 }, { "epoch": 0.5846942756814002, "grad_norm": 0.7987622618675232, "learning_rate": 3.794346005610914e-05, "loss": 1.9696, "step": 10490 }, { "epoch": 0.5847500139345633, "grad_norm": 0.5644296407699585, "learning_rate": 3.7934818751657706e-05, "loss": 1.6024, "step": 10491 }, { "epoch": 0.5848057521877265, "grad_norm": 0.5474801659584045, "learning_rate": 3.792617782984346e-05, "loss": 1.5879, "step": 10492 }, { "epoch": 0.5848614904408895, "grad_norm": 0.5493007302284241, "learning_rate": 3.791753729094048e-05, "loss": 1.5693, "step": 10493 }, { "epoch": 0.5849172286940527, "grad_norm": 0.5822592973709106, "learning_rate": 3.790889713522274e-05, "loss": 1.7629, "step": 10494 }, { "epoch": 0.5849729669472159, "grad_norm": 0.5798677206039429, "learning_rate": 3.7900257362964314e-05, "loss": 1.8306, "step": 10495 }, { "epoch": 0.585028705200379, "grad_norm": 0.5388454794883728, "learning_rate": 3.7891617974439165e-05, "loss": 1.6657, "step": 10496 }, { "epoch": 0.5850844434535422, "grad_norm": 0.5188543796539307, "learning_rate": 3.7882978969921296e-05, "loss": 1.6045, "step": 10497 }, { "epoch": 0.5851401817067053, "grad_norm": 0.5407771468162537, "learning_rate": 3.78743403496847e-05, "loss": 1.6769, "step": 10498 }, { "epoch": 0.5851959199598684, "grad_norm": 0.5791205763816833, "learning_rate": 3.7865702114003314e-05, "loss": 1.5448, "step": 10499 }, { "epoch": 0.5852516582130316, "grad_norm": 0.574635922908783, "learning_rate": 3.785706426315113e-05, "loss": 1.8509, "step": 10500 }, { "epoch": 0.5853073964661948, "grad_norm": 0.5714727640151978, "learning_rate": 3.7848426797402034e-05, "loss": 1.856, "step": 10501 }, { "epoch": 0.5853631347193579, "grad_norm": 0.558771014213562, "learning_rate": 3.783978971703003e-05, "loss": 1.6842, "step": 10502 }, { "epoch": 0.585418872972521, "grad_norm": 0.6013060808181763, "learning_rate": 3.783115302230897e-05, "loss": 1.8741, "step": 10503 }, { "epoch": 0.5854746112256842, "grad_norm": 0.5288045406341553, "learning_rate": 3.7822516713512795e-05, "loss": 1.669, "step": 10504 }, { "epoch": 0.5855303494788473, "grad_norm": 0.5664896368980408, "learning_rate": 3.78138807909154e-05, "loss": 1.7707, "step": 10505 }, { "epoch": 0.5855860877320105, "grad_norm": 0.6236469745635986, "learning_rate": 3.7805245254790646e-05, "loss": 2.0792, "step": 10506 }, { "epoch": 0.5856418259851737, "grad_norm": 0.5737569332122803, "learning_rate": 3.779661010541242e-05, "loss": 1.7686, "step": 10507 }, { "epoch": 0.5856975642383367, "grad_norm": 0.5788602232933044, "learning_rate": 3.778797534305456e-05, "loss": 1.751, "step": 10508 }, { "epoch": 0.5857533024914999, "grad_norm": 0.5300620794296265, "learning_rate": 3.777934096799094e-05, "loss": 1.7072, "step": 10509 }, { "epoch": 0.5858090407446631, "grad_norm": 0.5347722768783569, "learning_rate": 3.777070698049535e-05, "loss": 1.4512, "step": 10510 }, { "epoch": 0.5858647789978262, "grad_norm": 0.5386114716529846, "learning_rate": 3.7762073380841634e-05, "loss": 1.6386, "step": 10511 }, { "epoch": 0.5859205172509894, "grad_norm": 0.545583963394165, "learning_rate": 3.775344016930361e-05, "loss": 1.4614, "step": 10512 }, { "epoch": 0.5859762555041526, "grad_norm": 0.540080726146698, "learning_rate": 3.774480734615506e-05, "loss": 1.3026, "step": 10513 }, { "epoch": 0.5860319937573156, "grad_norm": 0.5793723464012146, "learning_rate": 3.7736174911669776e-05, "loss": 1.664, "step": 10514 }, { "epoch": 0.5860877320104788, "grad_norm": 0.5617543458938599, "learning_rate": 3.77275428661215e-05, "loss": 1.7944, "step": 10515 }, { "epoch": 0.5861434702636419, "grad_norm": 0.5727483630180359, "learning_rate": 3.7718911209784026e-05, "loss": 1.6576, "step": 10516 }, { "epoch": 0.5861992085168051, "grad_norm": 0.614232063293457, "learning_rate": 3.771027994293109e-05, "loss": 1.968, "step": 10517 }, { "epoch": 0.5862549467699683, "grad_norm": 0.5104675889015198, "learning_rate": 3.7701649065836394e-05, "loss": 1.754, "step": 10518 }, { "epoch": 0.5863106850231313, "grad_norm": 0.5460989475250244, "learning_rate": 3.769301857877372e-05, "loss": 1.4775, "step": 10519 }, { "epoch": 0.5863664232762945, "grad_norm": 0.5603992342948914, "learning_rate": 3.768438848201671e-05, "loss": 1.6659, "step": 10520 }, { "epoch": 0.5864221615294577, "grad_norm": 0.5435361862182617, "learning_rate": 3.767575877583912e-05, "loss": 1.6178, "step": 10521 }, { "epoch": 0.5864778997826208, "grad_norm": 0.5277562737464905, "learning_rate": 3.7667129460514585e-05, "loss": 1.5179, "step": 10522 }, { "epoch": 0.586533638035784, "grad_norm": 0.5214918851852417, "learning_rate": 3.76585005363168e-05, "loss": 1.6504, "step": 10523 }, { "epoch": 0.5865893762889471, "grad_norm": 0.5323712229728699, "learning_rate": 3.764987200351944e-05, "loss": 1.468, "step": 10524 }, { "epoch": 0.5866451145421102, "grad_norm": 0.5450025796890259, "learning_rate": 3.764124386239611e-05, "loss": 1.6458, "step": 10525 }, { "epoch": 0.5867008527952734, "grad_norm": 0.5709915161132812, "learning_rate": 3.7632616113220495e-05, "loss": 1.7088, "step": 10526 }, { "epoch": 0.5867565910484366, "grad_norm": 0.5776938199996948, "learning_rate": 3.762398875626616e-05, "loss": 1.5763, "step": 10527 }, { "epoch": 0.5868123293015997, "grad_norm": 0.5697132349014282, "learning_rate": 3.761536179180678e-05, "loss": 1.639, "step": 10528 }, { "epoch": 0.5868680675547628, "grad_norm": 0.5992898941040039, "learning_rate": 3.760673522011588e-05, "loss": 1.6822, "step": 10529 }, { "epoch": 0.586923805807926, "grad_norm": 0.5981577634811401, "learning_rate": 3.7598109041467094e-05, "loss": 1.7807, "step": 10530 }, { "epoch": 0.5869795440610891, "grad_norm": 0.5266358852386475, "learning_rate": 3.758948325613399e-05, "loss": 1.6093, "step": 10531 }, { "epoch": 0.5870352823142523, "grad_norm": 0.5778212547302246, "learning_rate": 3.758085786439011e-05, "loss": 1.6115, "step": 10532 }, { "epoch": 0.5870910205674155, "grad_norm": 0.5699662566184998, "learning_rate": 3.757223286650902e-05, "loss": 1.6165, "step": 10533 }, { "epoch": 0.5871467588205785, "grad_norm": 0.6047526597976685, "learning_rate": 3.756360826276424e-05, "loss": 1.7445, "step": 10534 }, { "epoch": 0.5872024970737417, "grad_norm": 0.5751059651374817, "learning_rate": 3.75549840534293e-05, "loss": 1.6591, "step": 10535 }, { "epoch": 0.5872582353269049, "grad_norm": 0.5245922207832336, "learning_rate": 3.7546360238777694e-05, "loss": 1.6325, "step": 10536 }, { "epoch": 0.587313973580068, "grad_norm": 0.5294795632362366, "learning_rate": 3.753773681908292e-05, "loss": 1.6007, "step": 10537 }, { "epoch": 0.5873697118332312, "grad_norm": 0.5342444181442261, "learning_rate": 3.75291137946185e-05, "loss": 1.6943, "step": 10538 }, { "epoch": 0.5874254500863942, "grad_norm": 0.5659368634223938, "learning_rate": 3.7520491165657875e-05, "loss": 1.538, "step": 10539 }, { "epoch": 0.5874811883395574, "grad_norm": 0.5024417638778687, "learning_rate": 3.751186893247452e-05, "loss": 1.7185, "step": 10540 }, { "epoch": 0.5875369265927206, "grad_norm": 0.553939700126648, "learning_rate": 3.750324709534185e-05, "loss": 1.6519, "step": 10541 }, { "epoch": 0.5875926648458837, "grad_norm": 0.5790380239486694, "learning_rate": 3.749462565453333e-05, "loss": 1.783, "step": 10542 }, { "epoch": 0.5876484030990469, "grad_norm": 0.5356141328811646, "learning_rate": 3.748600461032238e-05, "loss": 1.4267, "step": 10543 }, { "epoch": 0.58770414135221, "grad_norm": 0.5545246601104736, "learning_rate": 3.7477383962982374e-05, "loss": 1.6198, "step": 10544 }, { "epoch": 0.5877598796053731, "grad_norm": 0.5444962978363037, "learning_rate": 3.746876371278678e-05, "loss": 1.501, "step": 10545 }, { "epoch": 0.5878156178585363, "grad_norm": 0.5676127076148987, "learning_rate": 3.74601438600089e-05, "loss": 1.7348, "step": 10546 }, { "epoch": 0.5878713561116995, "grad_norm": 0.5442788004875183, "learning_rate": 3.745152440492217e-05, "loss": 1.6013, "step": 10547 }, { "epoch": 0.5879270943648626, "grad_norm": 0.543764054775238, "learning_rate": 3.744290534779991e-05, "loss": 1.5797, "step": 10548 }, { "epoch": 0.5879828326180258, "grad_norm": 0.5701844692230225, "learning_rate": 3.7434286688915474e-05, "loss": 1.6181, "step": 10549 }, { "epoch": 0.5880385708711889, "grad_norm": 0.558018147945404, "learning_rate": 3.742566842854222e-05, "loss": 1.7129, "step": 10550 }, { "epoch": 0.588094309124352, "grad_norm": 0.617866575717926, "learning_rate": 3.741705056695344e-05, "loss": 1.8679, "step": 10551 }, { "epoch": 0.5881500473775152, "grad_norm": 0.5197618007659912, "learning_rate": 3.7408433104422455e-05, "loss": 1.2723, "step": 10552 }, { "epoch": 0.5882057856306784, "grad_norm": 0.6245566606521606, "learning_rate": 3.739981604122254e-05, "loss": 1.8093, "step": 10553 }, { "epoch": 0.5882615238838415, "grad_norm": 0.5682582855224609, "learning_rate": 3.739119937762703e-05, "loss": 1.7748, "step": 10554 }, { "epoch": 0.5883172621370046, "grad_norm": 0.5899463891983032, "learning_rate": 3.738258311390913e-05, "loss": 1.874, "step": 10555 }, { "epoch": 0.5883730003901678, "grad_norm": 0.587677001953125, "learning_rate": 3.737396725034214e-05, "loss": 1.7739, "step": 10556 }, { "epoch": 0.5884287386433309, "grad_norm": 0.5093933939933777, "learning_rate": 3.7365351787199305e-05, "loss": 1.3497, "step": 10557 }, { "epoch": 0.5884844768964941, "grad_norm": 0.5440930128097534, "learning_rate": 3.7356736724753834e-05, "loss": 1.5436, "step": 10558 }, { "epoch": 0.5885402151496573, "grad_norm": 0.531604528427124, "learning_rate": 3.734812206327897e-05, "loss": 1.5924, "step": 10559 }, { "epoch": 0.5885959534028203, "grad_norm": 0.593714714050293, "learning_rate": 3.73395078030479e-05, "loss": 1.9695, "step": 10560 }, { "epoch": 0.5886516916559835, "grad_norm": 0.5461561679840088, "learning_rate": 3.733089394433383e-05, "loss": 1.5767, "step": 10561 }, { "epoch": 0.5887074299091466, "grad_norm": 0.5576294660568237, "learning_rate": 3.732228048740992e-05, "loss": 1.6256, "step": 10562 }, { "epoch": 0.5887631681623098, "grad_norm": 0.5713305473327637, "learning_rate": 3.731366743254937e-05, "loss": 1.7033, "step": 10563 }, { "epoch": 0.588818906415473, "grad_norm": 0.5811915397644043, "learning_rate": 3.730505478002533e-05, "loss": 1.8645, "step": 10564 }, { "epoch": 0.588874644668636, "grad_norm": 0.5555295348167419, "learning_rate": 3.7296442530110934e-05, "loss": 1.6798, "step": 10565 }, { "epoch": 0.5889303829217992, "grad_norm": 0.6551502346992493, "learning_rate": 3.728783068307931e-05, "loss": 1.9067, "step": 10566 }, { "epoch": 0.5889861211749624, "grad_norm": 0.6149044036865234, "learning_rate": 3.727921923920358e-05, "loss": 1.7204, "step": 10567 }, { "epoch": 0.5890418594281255, "grad_norm": 0.635021448135376, "learning_rate": 3.7270608198756854e-05, "loss": 1.8126, "step": 10568 }, { "epoch": 0.5890975976812887, "grad_norm": 0.813902735710144, "learning_rate": 3.726199756201221e-05, "loss": 1.5697, "step": 10569 }, { "epoch": 0.5891533359344518, "grad_norm": 0.6003322005271912, "learning_rate": 3.7253387329242726e-05, "loss": 1.6158, "step": 10570 }, { "epoch": 0.5892090741876149, "grad_norm": 0.5682037472724915, "learning_rate": 3.7244777500721504e-05, "loss": 1.6502, "step": 10571 }, { "epoch": 0.5892648124407781, "grad_norm": 0.6143748164176941, "learning_rate": 3.723616807672155e-05, "loss": 1.9357, "step": 10572 }, { "epoch": 0.5893205506939413, "grad_norm": 0.5592736005783081, "learning_rate": 3.722755905751594e-05, "loss": 1.7127, "step": 10573 }, { "epoch": 0.5893762889471044, "grad_norm": 0.5672972798347473, "learning_rate": 3.7218950443377676e-05, "loss": 1.6027, "step": 10574 }, { "epoch": 0.5894320272002675, "grad_norm": 0.5966082215309143, "learning_rate": 3.7210342234579785e-05, "loss": 1.8543, "step": 10575 }, { "epoch": 0.5894877654534307, "grad_norm": 0.5324078798294067, "learning_rate": 3.720173443139528e-05, "loss": 1.6043, "step": 10576 }, { "epoch": 0.5895435037065938, "grad_norm": 0.5152407884597778, "learning_rate": 3.7193127034097144e-05, "loss": 1.4774, "step": 10577 }, { "epoch": 0.589599241959757, "grad_norm": 0.5895288586616516, "learning_rate": 3.718452004295835e-05, "loss": 1.8391, "step": 10578 }, { "epoch": 0.5896549802129202, "grad_norm": 0.5626966953277588, "learning_rate": 3.717591345825183e-05, "loss": 1.8092, "step": 10579 }, { "epoch": 0.5897107184660833, "grad_norm": 0.5412726402282715, "learning_rate": 3.7167307280250607e-05, "loss": 1.5544, "step": 10580 }, { "epoch": 0.5897664567192464, "grad_norm": 0.5498217940330505, "learning_rate": 3.7158701509227544e-05, "loss": 1.8257, "step": 10581 }, { "epoch": 0.5898221949724096, "grad_norm": 0.5844752788543701, "learning_rate": 3.715009614545561e-05, "loss": 1.8018, "step": 10582 }, { "epoch": 0.5898779332255727, "grad_norm": 0.5298795700073242, "learning_rate": 3.714149118920772e-05, "loss": 1.6239, "step": 10583 }, { "epoch": 0.5899336714787359, "grad_norm": 0.5541282296180725, "learning_rate": 3.713288664075674e-05, "loss": 1.7372, "step": 10584 }, { "epoch": 0.589989409731899, "grad_norm": 0.5731157064437866, "learning_rate": 3.7124282500375597e-05, "loss": 1.7206, "step": 10585 }, { "epoch": 0.5900451479850621, "grad_norm": 0.5772982239723206, "learning_rate": 3.711567876833712e-05, "loss": 1.8214, "step": 10586 }, { "epoch": 0.5901008862382253, "grad_norm": 0.6069211959838867, "learning_rate": 3.710707544491421e-05, "loss": 1.9289, "step": 10587 }, { "epoch": 0.5901566244913884, "grad_norm": 0.582378089427948, "learning_rate": 3.709847253037967e-05, "loss": 1.6522, "step": 10588 }, { "epoch": 0.5902123627445516, "grad_norm": 0.5845189690589905, "learning_rate": 3.7089870025006374e-05, "loss": 1.7481, "step": 10589 }, { "epoch": 0.5902681009977148, "grad_norm": 0.5751447677612305, "learning_rate": 3.708126792906714e-05, "loss": 1.4827, "step": 10590 }, { "epoch": 0.5903238392508778, "grad_norm": 0.5446940660476685, "learning_rate": 3.707266624283475e-05, "loss": 1.6345, "step": 10591 }, { "epoch": 0.590379577504041, "grad_norm": 0.5264309048652649, "learning_rate": 3.706406496658204e-05, "loss": 1.675, "step": 10592 }, { "epoch": 0.5904353157572042, "grad_norm": 0.5321794748306274, "learning_rate": 3.705546410058175e-05, "loss": 1.6043, "step": 10593 }, { "epoch": 0.5904910540103673, "grad_norm": 0.568670928478241, "learning_rate": 3.704686364510667e-05, "loss": 1.799, "step": 10594 }, { "epoch": 0.5905467922635305, "grad_norm": 0.5585296154022217, "learning_rate": 3.703826360042954e-05, "loss": 1.6559, "step": 10595 }, { "epoch": 0.5906025305166936, "grad_norm": 0.5437737107276917, "learning_rate": 3.702966396682312e-05, "loss": 1.7088, "step": 10596 }, { "epoch": 0.5906582687698567, "grad_norm": 0.5538874268531799, "learning_rate": 3.702106474456016e-05, "loss": 1.845, "step": 10597 }, { "epoch": 0.5907140070230199, "grad_norm": 0.5787869095802307, "learning_rate": 3.701246593391332e-05, "loss": 1.6807, "step": 10598 }, { "epoch": 0.5907697452761831, "grad_norm": 0.5712507963180542, "learning_rate": 3.7003867535155365e-05, "loss": 1.6427, "step": 10599 }, { "epoch": 0.5908254835293462, "grad_norm": 0.5915331244468689, "learning_rate": 3.699526954855895e-05, "loss": 1.7423, "step": 10600 }, { "epoch": 0.5908812217825093, "grad_norm": 0.5464789271354675, "learning_rate": 3.6986671974396755e-05, "loss": 1.5484, "step": 10601 }, { "epoch": 0.5909369600356725, "grad_norm": 0.58048015832901, "learning_rate": 3.697807481294146e-05, "loss": 1.694, "step": 10602 }, { "epoch": 0.5909926982888356, "grad_norm": 0.5703095197677612, "learning_rate": 3.696947806446571e-05, "loss": 1.7923, "step": 10603 }, { "epoch": 0.5910484365419988, "grad_norm": 0.5519693493843079, "learning_rate": 3.696088172924215e-05, "loss": 1.506, "step": 10604 }, { "epoch": 0.591104174795162, "grad_norm": 0.6527479887008667, "learning_rate": 3.695228580754337e-05, "loss": 1.6042, "step": 10605 }, { "epoch": 0.591159913048325, "grad_norm": 0.58185213804245, "learning_rate": 3.6943690299642055e-05, "loss": 1.5731, "step": 10606 }, { "epoch": 0.5912156513014882, "grad_norm": 0.5212710499763489, "learning_rate": 3.693509520581072e-05, "loss": 1.3331, "step": 10607 }, { "epoch": 0.5912713895546513, "grad_norm": 0.5953770279884338, "learning_rate": 3.6926500526322e-05, "loss": 1.7033, "step": 10608 }, { "epoch": 0.5913271278078145, "grad_norm": 0.5897699594497681, "learning_rate": 3.6917906261448473e-05, "loss": 1.8765, "step": 10609 }, { "epoch": 0.5913828660609777, "grad_norm": 0.5852439999580383, "learning_rate": 3.6909312411462675e-05, "loss": 1.6823, "step": 10610 }, { "epoch": 0.5914386043141407, "grad_norm": 0.5378084182739258, "learning_rate": 3.6900718976637174e-05, "loss": 1.6476, "step": 10611 }, { "epoch": 0.5914943425673039, "grad_norm": 0.5815349221229553, "learning_rate": 3.6892125957244484e-05, "loss": 1.7493, "step": 10612 }, { "epoch": 0.5915500808204671, "grad_norm": 0.5775283575057983, "learning_rate": 3.688353335355714e-05, "loss": 1.5932, "step": 10613 }, { "epoch": 0.5916058190736302, "grad_norm": 0.5364789962768555, "learning_rate": 3.687494116584763e-05, "loss": 1.6508, "step": 10614 }, { "epoch": 0.5916615573267934, "grad_norm": 0.5703774690628052, "learning_rate": 3.6866349394388465e-05, "loss": 1.6409, "step": 10615 }, { "epoch": 0.5917172955799566, "grad_norm": 0.575446367263794, "learning_rate": 3.6857758039452135e-05, "loss": 1.6912, "step": 10616 }, { "epoch": 0.5917730338331196, "grad_norm": 0.5789859294891357, "learning_rate": 3.6849167101311086e-05, "loss": 1.646, "step": 10617 }, { "epoch": 0.5918287720862828, "grad_norm": 0.5376462340354919, "learning_rate": 3.68405765802378e-05, "loss": 1.5624, "step": 10618 }, { "epoch": 0.591884510339446, "grad_norm": 0.5628196001052856, "learning_rate": 3.683198647650468e-05, "loss": 1.7411, "step": 10619 }, { "epoch": 0.5919402485926091, "grad_norm": 0.566883385181427, "learning_rate": 3.6823396790384176e-05, "loss": 1.6677, "step": 10620 }, { "epoch": 0.5919959868457723, "grad_norm": 0.570627748966217, "learning_rate": 3.681480752214871e-05, "loss": 1.6784, "step": 10621 }, { "epoch": 0.5920517250989354, "grad_norm": 0.5927448868751526, "learning_rate": 3.6806218672070644e-05, "loss": 2.0557, "step": 10622 }, { "epoch": 0.5921074633520985, "grad_norm": 0.5723655223846436, "learning_rate": 3.6797630240422445e-05, "loss": 1.637, "step": 10623 }, { "epoch": 0.5921632016052617, "grad_norm": 0.5774400234222412, "learning_rate": 3.67890422274764e-05, "loss": 1.6871, "step": 10624 }, { "epoch": 0.5922189398584249, "grad_norm": 0.560175359249115, "learning_rate": 3.678045463350493e-05, "loss": 1.5928, "step": 10625 }, { "epoch": 0.592274678111588, "grad_norm": 0.5461480021476746, "learning_rate": 3.677186745878036e-05, "loss": 1.5871, "step": 10626 }, { "epoch": 0.5923304163647511, "grad_norm": 0.5419283509254456, "learning_rate": 3.676328070357503e-05, "loss": 1.5642, "step": 10627 }, { "epoch": 0.5923861546179143, "grad_norm": 0.5696420669555664, "learning_rate": 3.6754694368161264e-05, "loss": 1.711, "step": 10628 }, { "epoch": 0.5924418928710774, "grad_norm": 0.5280278921127319, "learning_rate": 3.6746108452811344e-05, "loss": 1.2255, "step": 10629 }, { "epoch": 0.5924976311242406, "grad_norm": 0.5472394227981567, "learning_rate": 3.6737522957797635e-05, "loss": 1.5926, "step": 10630 }, { "epoch": 0.5925533693774037, "grad_norm": 0.6041975617408752, "learning_rate": 3.6728937883392326e-05, "loss": 2.0771, "step": 10631 }, { "epoch": 0.5926091076305668, "grad_norm": 0.5683224201202393, "learning_rate": 3.672035322986777e-05, "loss": 1.7859, "step": 10632 }, { "epoch": 0.59266484588373, "grad_norm": 0.5793581604957581, "learning_rate": 3.671176899749614e-05, "loss": 1.6532, "step": 10633 }, { "epoch": 0.5927205841368931, "grad_norm": 0.5718412399291992, "learning_rate": 3.670318518654975e-05, "loss": 1.5604, "step": 10634 }, { "epoch": 0.5927763223900563, "grad_norm": 0.5807404518127441, "learning_rate": 3.66946017973008e-05, "loss": 1.6858, "step": 10635 }, { "epoch": 0.5928320606432195, "grad_norm": 0.5799666047096252, "learning_rate": 3.668601883002149e-05, "loss": 1.4864, "step": 10636 }, { "epoch": 0.5928877988963825, "grad_norm": 0.6206822395324707, "learning_rate": 3.667743628498406e-05, "loss": 1.7848, "step": 10637 }, { "epoch": 0.5929435371495457, "grad_norm": 0.5985648036003113, "learning_rate": 3.6668854162460667e-05, "loss": 1.7601, "step": 10638 }, { "epoch": 0.5929992754027089, "grad_norm": 0.579348087310791, "learning_rate": 3.666027246272349e-05, "loss": 1.8047, "step": 10639 }, { "epoch": 0.593055013655872, "grad_norm": 0.5877785682678223, "learning_rate": 3.665169118604468e-05, "loss": 1.6072, "step": 10640 }, { "epoch": 0.5931107519090352, "grad_norm": 0.579860508441925, "learning_rate": 3.6643110332696404e-05, "loss": 1.768, "step": 10641 }, { "epoch": 0.5931664901621984, "grad_norm": 0.5771205425262451, "learning_rate": 3.663452990295081e-05, "loss": 1.7465, "step": 10642 }, { "epoch": 0.5932222284153614, "grad_norm": 0.5102217197418213, "learning_rate": 3.662594989707999e-05, "loss": 1.5842, "step": 10643 }, { "epoch": 0.5932779666685246, "grad_norm": 0.6734121441841125, "learning_rate": 3.661737031535608e-05, "loss": 2.1065, "step": 10644 }, { "epoch": 0.5933337049216878, "grad_norm": 0.5396010279655457, "learning_rate": 3.660879115805114e-05, "loss": 1.6108, "step": 10645 }, { "epoch": 0.5933894431748509, "grad_norm": 0.5581656694412231, "learning_rate": 3.6600212425437275e-05, "loss": 1.6259, "step": 10646 }, { "epoch": 0.5934451814280141, "grad_norm": 0.5051550269126892, "learning_rate": 3.659163411778654e-05, "loss": 1.5076, "step": 10647 }, { "epoch": 0.5935009196811772, "grad_norm": 0.6038596034049988, "learning_rate": 3.658305623537098e-05, "loss": 1.8803, "step": 10648 }, { "epoch": 0.5935566579343403, "grad_norm": 0.5228748917579651, "learning_rate": 3.6574478778462676e-05, "loss": 1.4336, "step": 10649 }, { "epoch": 0.5936123961875035, "grad_norm": 0.5188995599746704, "learning_rate": 3.6565901747333616e-05, "loss": 1.5946, "step": 10650 }, { "epoch": 0.5936681344406667, "grad_norm": 0.6071973443031311, "learning_rate": 3.655732514225584e-05, "loss": 1.9025, "step": 10651 }, { "epoch": 0.5937238726938298, "grad_norm": 0.5159845948219299, "learning_rate": 3.654874896350132e-05, "loss": 1.5769, "step": 10652 }, { "epoch": 0.5937796109469929, "grad_norm": 0.5195660591125488, "learning_rate": 3.654017321134206e-05, "loss": 1.5514, "step": 10653 }, { "epoch": 0.593835349200156, "grad_norm": 0.5409337878227234, "learning_rate": 3.653159788605004e-05, "loss": 1.4165, "step": 10654 }, { "epoch": 0.5938910874533192, "grad_norm": 0.5055655241012573, "learning_rate": 3.652302298789718e-05, "loss": 1.4697, "step": 10655 }, { "epoch": 0.5939468257064824, "grad_norm": 0.5607792735099792, "learning_rate": 3.65144485171555e-05, "loss": 1.6819, "step": 10656 }, { "epoch": 0.5940025639596455, "grad_norm": 0.6190406680107117, "learning_rate": 3.6505874474096844e-05, "loss": 1.8052, "step": 10657 }, { "epoch": 0.5940583022128086, "grad_norm": 0.6234287619590759, "learning_rate": 3.649730085899321e-05, "loss": 2.0106, "step": 10658 }, { "epoch": 0.5941140404659718, "grad_norm": 0.5554551482200623, "learning_rate": 3.648872767211643e-05, "loss": 1.3272, "step": 10659 }, { "epoch": 0.5941697787191349, "grad_norm": 0.5771641731262207, "learning_rate": 3.648015491373845e-05, "loss": 1.6486, "step": 10660 }, { "epoch": 0.5942255169722981, "grad_norm": 0.5576834082603455, "learning_rate": 3.6471582584131135e-05, "loss": 1.6681, "step": 10661 }, { "epoch": 0.5942812552254613, "grad_norm": 0.5702229142189026, "learning_rate": 3.6463010683566336e-05, "loss": 1.7256, "step": 10662 }, { "epoch": 0.5943369934786243, "grad_norm": 0.5582361817359924, "learning_rate": 3.645443921231592e-05, "loss": 1.5431, "step": 10663 }, { "epoch": 0.5943927317317875, "grad_norm": 0.5515589714050293, "learning_rate": 3.644586817065171e-05, "loss": 1.6071, "step": 10664 }, { "epoch": 0.5944484699849507, "grad_norm": 0.6197267174720764, "learning_rate": 3.643729755884554e-05, "loss": 1.6828, "step": 10665 }, { "epoch": 0.5945042082381138, "grad_norm": 0.5776437520980835, "learning_rate": 3.6428727377169195e-05, "loss": 1.4355, "step": 10666 }, { "epoch": 0.594559946491277, "grad_norm": 0.5944076776504517, "learning_rate": 3.642015762589451e-05, "loss": 1.8823, "step": 10667 }, { "epoch": 0.5946156847444402, "grad_norm": 0.5644761919975281, "learning_rate": 3.6411588305293255e-05, "loss": 1.6855, "step": 10668 }, { "epoch": 0.5946714229976032, "grad_norm": 0.5875162482261658, "learning_rate": 3.640301941563717e-05, "loss": 1.6565, "step": 10669 }, { "epoch": 0.5947271612507664, "grad_norm": 0.5691081881523132, "learning_rate": 3.639445095719807e-05, "loss": 1.7644, "step": 10670 }, { "epoch": 0.5947828995039296, "grad_norm": 0.5673412084579468, "learning_rate": 3.638588293024763e-05, "loss": 1.595, "step": 10671 }, { "epoch": 0.5948386377570927, "grad_norm": 0.5375627875328064, "learning_rate": 3.637731533505762e-05, "loss": 1.7064, "step": 10672 }, { "epoch": 0.5948943760102559, "grad_norm": 0.563486635684967, "learning_rate": 3.6368748171899734e-05, "loss": 1.6056, "step": 10673 }, { "epoch": 0.594950114263419, "grad_norm": 0.5447495579719543, "learning_rate": 3.636018144104567e-05, "loss": 1.6079, "step": 10674 }, { "epoch": 0.5950058525165821, "grad_norm": 0.6056522727012634, "learning_rate": 3.6351615142767146e-05, "loss": 1.7554, "step": 10675 }, { "epoch": 0.5950615907697453, "grad_norm": 0.5688165426254272, "learning_rate": 3.634304927733581e-05, "loss": 1.6637, "step": 10676 }, { "epoch": 0.5951173290229084, "grad_norm": 0.5747976303100586, "learning_rate": 3.633448384502333e-05, "loss": 1.4985, "step": 10677 }, { "epoch": 0.5951730672760716, "grad_norm": 0.5755765438079834, "learning_rate": 3.632591884610133e-05, "loss": 1.5359, "step": 10678 }, { "epoch": 0.5952288055292347, "grad_norm": 0.6509361863136292, "learning_rate": 3.631735428084148e-05, "loss": 1.9455, "step": 10679 }, { "epoch": 0.5952845437823978, "grad_norm": 0.5636258721351624, "learning_rate": 3.630879014951536e-05, "loss": 1.5867, "step": 10680 }, { "epoch": 0.595340282035561, "grad_norm": 0.5509964227676392, "learning_rate": 3.6300226452394584e-05, "loss": 1.6271, "step": 10681 }, { "epoch": 0.5953960202887242, "grad_norm": 0.5601130723953247, "learning_rate": 3.629166318975078e-05, "loss": 1.7173, "step": 10682 }, { "epoch": 0.5954517585418873, "grad_norm": 0.5896451473236084, "learning_rate": 3.628310036185546e-05, "loss": 1.7909, "step": 10683 }, { "epoch": 0.5955074967950504, "grad_norm": 0.5564537644386292, "learning_rate": 3.6274537968980255e-05, "loss": 1.6348, "step": 10684 }, { "epoch": 0.5955632350482136, "grad_norm": 0.5651112794876099, "learning_rate": 3.6265976011396655e-05, "loss": 1.7643, "step": 10685 }, { "epoch": 0.5956189733013767, "grad_norm": 0.5091538429260254, "learning_rate": 3.625741448937622e-05, "loss": 1.5047, "step": 10686 }, { "epoch": 0.5956747115545399, "grad_norm": 0.6056941747665405, "learning_rate": 3.6248853403190484e-05, "loss": 1.7784, "step": 10687 }, { "epoch": 0.5957304498077031, "grad_norm": 0.5621974468231201, "learning_rate": 3.624029275311094e-05, "loss": 1.6646, "step": 10688 }, { "epoch": 0.5957861880608661, "grad_norm": 0.6054074168205261, "learning_rate": 3.6231732539409095e-05, "loss": 1.8714, "step": 10689 }, { "epoch": 0.5958419263140293, "grad_norm": 0.5782527923583984, "learning_rate": 3.6223172762356404e-05, "loss": 1.7925, "step": 10690 }, { "epoch": 0.5958976645671925, "grad_norm": 0.5137213468551636, "learning_rate": 3.621461342222436e-05, "loss": 1.5877, "step": 10691 }, { "epoch": 0.5959534028203556, "grad_norm": 0.6089626550674438, "learning_rate": 3.62060545192844e-05, "loss": 2.0101, "step": 10692 }, { "epoch": 0.5960091410735188, "grad_norm": 0.6029727458953857, "learning_rate": 3.6197496053807954e-05, "loss": 1.8824, "step": 10693 }, { "epoch": 0.596064879326682, "grad_norm": 0.597644031047821, "learning_rate": 3.6188938026066476e-05, "loss": 1.8954, "step": 10694 }, { "epoch": 0.596120617579845, "grad_norm": 0.5832270383834839, "learning_rate": 3.618038043633135e-05, "loss": 1.8127, "step": 10695 }, { "epoch": 0.5961763558330082, "grad_norm": 0.533963680267334, "learning_rate": 3.617182328487399e-05, "loss": 1.7617, "step": 10696 }, { "epoch": 0.5962320940861714, "grad_norm": 0.5463215708732605, "learning_rate": 3.616326657196577e-05, "loss": 1.7745, "step": 10697 }, { "epoch": 0.5962878323393345, "grad_norm": 0.5657750368118286, "learning_rate": 3.615471029787807e-05, "loss": 1.7582, "step": 10698 }, { "epoch": 0.5963435705924977, "grad_norm": 0.6177558898925781, "learning_rate": 3.614615446288222e-05, "loss": 1.7778, "step": 10699 }, { "epoch": 0.5963993088456607, "grad_norm": 0.5581892728805542, "learning_rate": 3.6137599067249566e-05, "loss": 1.4981, "step": 10700 }, { "epoch": 0.5964550470988239, "grad_norm": 0.5763764381408691, "learning_rate": 3.612904411125147e-05, "loss": 1.7714, "step": 10701 }, { "epoch": 0.5965107853519871, "grad_norm": 0.5390127897262573, "learning_rate": 3.6120489595159214e-05, "loss": 1.5728, "step": 10702 }, { "epoch": 0.5965665236051502, "grad_norm": 0.563076376914978, "learning_rate": 3.611193551924411e-05, "loss": 1.6069, "step": 10703 }, { "epoch": 0.5966222618583134, "grad_norm": 0.5416215658187866, "learning_rate": 3.610338188377743e-05, "loss": 1.4952, "step": 10704 }, { "epoch": 0.5966780001114765, "grad_norm": 0.6353267431259155, "learning_rate": 3.609482868903046e-05, "loss": 1.8509, "step": 10705 }, { "epoch": 0.5967337383646396, "grad_norm": 0.5492638349533081, "learning_rate": 3.6086275935274446e-05, "loss": 1.5773, "step": 10706 }, { "epoch": 0.5967894766178028, "grad_norm": 0.5659378170967102, "learning_rate": 3.607772362278063e-05, "loss": 1.5693, "step": 10707 }, { "epoch": 0.596845214870966, "grad_norm": 0.582631528377533, "learning_rate": 3.606917175182027e-05, "loss": 1.5673, "step": 10708 }, { "epoch": 0.596900953124129, "grad_norm": 0.5583199858665466, "learning_rate": 3.606062032266453e-05, "loss": 1.8263, "step": 10709 }, { "epoch": 0.5969566913772922, "grad_norm": 0.5515373945236206, "learning_rate": 3.605206933558467e-05, "loss": 1.4876, "step": 10710 }, { "epoch": 0.5970124296304554, "grad_norm": 0.5634705424308777, "learning_rate": 3.6043518790851824e-05, "loss": 1.5723, "step": 10711 }, { "epoch": 0.5970681678836185, "grad_norm": 0.5301898121833801, "learning_rate": 3.60349686887372e-05, "loss": 1.6389, "step": 10712 }, { "epoch": 0.5971239061367817, "grad_norm": 0.5615551471710205, "learning_rate": 3.602641902951196e-05, "loss": 1.7281, "step": 10713 }, { "epoch": 0.5971796443899449, "grad_norm": 0.5689008235931396, "learning_rate": 3.601786981344722e-05, "loss": 1.7753, "step": 10714 }, { "epoch": 0.5972353826431079, "grad_norm": 0.5843450427055359, "learning_rate": 3.600932104081414e-05, "loss": 1.75, "step": 10715 }, { "epoch": 0.5972911208962711, "grad_norm": 0.5649077892303467, "learning_rate": 3.6000772711883805e-05, "loss": 1.7143, "step": 10716 }, { "epoch": 0.5973468591494343, "grad_norm": 0.5699289441108704, "learning_rate": 3.599222482692737e-05, "loss": 1.831, "step": 10717 }, { "epoch": 0.5974025974025974, "grad_norm": 0.5822247266769409, "learning_rate": 3.598367738621586e-05, "loss": 1.6298, "step": 10718 }, { "epoch": 0.5974583356557606, "grad_norm": 0.5153210163116455, "learning_rate": 3.5975130390020396e-05, "loss": 1.4701, "step": 10719 }, { "epoch": 0.5975140739089237, "grad_norm": 0.5386978983879089, "learning_rate": 3.596658383861203e-05, "loss": 1.5741, "step": 10720 }, { "epoch": 0.5975698121620868, "grad_norm": 0.586660623550415, "learning_rate": 3.5958037732261804e-05, "loss": 1.737, "step": 10721 }, { "epoch": 0.59762555041525, "grad_norm": 0.5435377359390259, "learning_rate": 3.594949207124075e-05, "loss": 1.5736, "step": 10722 }, { "epoch": 0.5976812886684131, "grad_norm": 0.5378552079200745, "learning_rate": 3.594094685581989e-05, "loss": 1.5723, "step": 10723 }, { "epoch": 0.5977370269215763, "grad_norm": 0.5955055356025696, "learning_rate": 3.5932402086270225e-05, "loss": 1.5994, "step": 10724 }, { "epoch": 0.5977927651747394, "grad_norm": 0.6412666440010071, "learning_rate": 3.592385776286274e-05, "loss": 1.9002, "step": 10725 }, { "epoch": 0.5978485034279025, "grad_norm": 0.5329419374465942, "learning_rate": 3.5915313885868405e-05, "loss": 1.4599, "step": 10726 }, { "epoch": 0.5979042416810657, "grad_norm": 0.5248754024505615, "learning_rate": 3.590677045555822e-05, "loss": 1.4763, "step": 10727 }, { "epoch": 0.5979599799342289, "grad_norm": 0.5744431614875793, "learning_rate": 3.589822747220308e-05, "loss": 1.6913, "step": 10728 }, { "epoch": 0.598015718187392, "grad_norm": 0.5648195743560791, "learning_rate": 3.588968493607398e-05, "loss": 1.6534, "step": 10729 }, { "epoch": 0.5980714564405551, "grad_norm": 0.52918541431427, "learning_rate": 3.588114284744177e-05, "loss": 1.6151, "step": 10730 }, { "epoch": 0.5981271946937183, "grad_norm": 0.5425333976745605, "learning_rate": 3.587260120657742e-05, "loss": 1.4861, "step": 10731 }, { "epoch": 0.5981829329468814, "grad_norm": 0.6288706660270691, "learning_rate": 3.5864060013751775e-05, "loss": 1.984, "step": 10732 }, { "epoch": 0.5982386712000446, "grad_norm": 0.6068618297576904, "learning_rate": 3.585551926923572e-05, "loss": 2.0094, "step": 10733 }, { "epoch": 0.5982944094532078, "grad_norm": 0.546398401260376, "learning_rate": 3.5846978973300146e-05, "loss": 1.6148, "step": 10734 }, { "epoch": 0.5983501477063708, "grad_norm": 0.5468116998672485, "learning_rate": 3.5838439126215863e-05, "loss": 1.6965, "step": 10735 }, { "epoch": 0.598405885959534, "grad_norm": 0.5598884224891663, "learning_rate": 3.582989972825374e-05, "loss": 1.5948, "step": 10736 }, { "epoch": 0.5984616242126972, "grad_norm": 0.6028941869735718, "learning_rate": 3.5821360779684564e-05, "loss": 1.8262, "step": 10737 }, { "epoch": 0.5985173624658603, "grad_norm": 0.6217941045761108, "learning_rate": 3.581282228077916e-05, "loss": 1.9408, "step": 10738 }, { "epoch": 0.5985731007190235, "grad_norm": 0.5594194531440735, "learning_rate": 3.580428423180833e-05, "loss": 1.7485, "step": 10739 }, { "epoch": 0.5986288389721867, "grad_norm": 0.5369617938995361, "learning_rate": 3.5795746633042825e-05, "loss": 1.5062, "step": 10740 }, { "epoch": 0.5986845772253497, "grad_norm": 0.5273184180259705, "learning_rate": 3.578720948475343e-05, "loss": 1.6942, "step": 10741 }, { "epoch": 0.5987403154785129, "grad_norm": 0.5254104137420654, "learning_rate": 3.5778672787210866e-05, "loss": 1.5236, "step": 10742 }, { "epoch": 0.5987960537316761, "grad_norm": 0.578599214553833, "learning_rate": 3.5770136540685915e-05, "loss": 1.7426, "step": 10743 }, { "epoch": 0.5988517919848392, "grad_norm": 0.6897125840187073, "learning_rate": 3.576160074544923e-05, "loss": 1.724, "step": 10744 }, { "epoch": 0.5989075302380024, "grad_norm": 0.5394157767295837, "learning_rate": 3.575306540177157e-05, "loss": 1.6856, "step": 10745 }, { "epoch": 0.5989632684911654, "grad_norm": 0.5963736176490784, "learning_rate": 3.5744530509923624e-05, "loss": 1.6754, "step": 10746 }, { "epoch": 0.5990190067443286, "grad_norm": 0.5288623571395874, "learning_rate": 3.5735996070176036e-05, "loss": 1.4265, "step": 10747 }, { "epoch": 0.5990747449974918, "grad_norm": 0.6559962034225464, "learning_rate": 3.57274620827995e-05, "loss": 1.6428, "step": 10748 }, { "epoch": 0.5991304832506549, "grad_norm": 0.563292920589447, "learning_rate": 3.571892854806464e-05, "loss": 1.5636, "step": 10749 }, { "epoch": 0.5991862215038181, "grad_norm": 0.5565873980522156, "learning_rate": 3.5710395466242126e-05, "loss": 1.6054, "step": 10750 }, { "epoch": 0.5992419597569812, "grad_norm": 0.5816026329994202, "learning_rate": 3.570186283760254e-05, "loss": 1.753, "step": 10751 }, { "epoch": 0.5992976980101443, "grad_norm": 0.5431708693504333, "learning_rate": 3.569333066241648e-05, "loss": 1.4766, "step": 10752 }, { "epoch": 0.5993534362633075, "grad_norm": 0.5268303751945496, "learning_rate": 3.568479894095458e-05, "loss": 1.5641, "step": 10753 }, { "epoch": 0.5994091745164707, "grad_norm": 0.5448703169822693, "learning_rate": 3.567626767348739e-05, "loss": 1.528, "step": 10754 }, { "epoch": 0.5994649127696338, "grad_norm": 0.5704829096794128, "learning_rate": 3.5667736860285485e-05, "loss": 1.8203, "step": 10755 }, { "epoch": 0.599520651022797, "grad_norm": 0.5476005673408508, "learning_rate": 3.5659206501619385e-05, "loss": 1.6485, "step": 10756 }, { "epoch": 0.5995763892759601, "grad_norm": 0.5662007927894592, "learning_rate": 3.565067659775966e-05, "loss": 1.7586, "step": 10757 }, { "epoch": 0.5996321275291232, "grad_norm": 0.5654915571212769, "learning_rate": 3.56421471489768e-05, "loss": 1.607, "step": 10758 }, { "epoch": 0.5996878657822864, "grad_norm": 0.5372860431671143, "learning_rate": 3.563361815554131e-05, "loss": 1.7169, "step": 10759 }, { "epoch": 0.5997436040354496, "grad_norm": 0.528947114944458, "learning_rate": 3.5625089617723716e-05, "loss": 1.5978, "step": 10760 }, { "epoch": 0.5997993422886126, "grad_norm": 0.5702643990516663, "learning_rate": 3.5616561535794445e-05, "loss": 1.8016, "step": 10761 }, { "epoch": 0.5998550805417758, "grad_norm": 0.5951703190803528, "learning_rate": 3.5608033910023995e-05, "loss": 1.6588, "step": 10762 }, { "epoch": 0.599910818794939, "grad_norm": 0.5421859622001648, "learning_rate": 3.55995067406828e-05, "loss": 1.8786, "step": 10763 }, { "epoch": 0.5999665570481021, "grad_norm": 0.553703248500824, "learning_rate": 3.5590980028041274e-05, "loss": 1.7054, "step": 10764 }, { "epoch": 0.6000222953012653, "grad_norm": 0.515870988368988, "learning_rate": 3.558245377236987e-05, "loss": 1.4711, "step": 10765 }, { "epoch": 0.6000780335544285, "grad_norm": 0.547909140586853, "learning_rate": 3.557392797393896e-05, "loss": 1.6006, "step": 10766 }, { "epoch": 0.6001337718075915, "grad_norm": 0.5588516592979431, "learning_rate": 3.556540263301896e-05, "loss": 1.7297, "step": 10767 }, { "epoch": 0.6001895100607547, "grad_norm": 0.5475988388061523, "learning_rate": 3.55568777498802e-05, "loss": 1.684, "step": 10768 }, { "epoch": 0.6002452483139178, "grad_norm": 0.577996551990509, "learning_rate": 3.554835332479311e-05, "loss": 1.5644, "step": 10769 }, { "epoch": 0.600300986567081, "grad_norm": 0.5774939656257629, "learning_rate": 3.553982935802795e-05, "loss": 1.7492, "step": 10770 }, { "epoch": 0.6003567248202442, "grad_norm": 0.6069820523262024, "learning_rate": 3.5531305849855115e-05, "loss": 1.7471, "step": 10771 }, { "epoch": 0.6004124630734072, "grad_norm": 0.5596128106117249, "learning_rate": 3.552278280054491e-05, "loss": 1.6402, "step": 10772 }, { "epoch": 0.6004682013265704, "grad_norm": 0.5300867557525635, "learning_rate": 3.551426021036761e-05, "loss": 1.5077, "step": 10773 }, { "epoch": 0.6005239395797336, "grad_norm": 0.6160181760787964, "learning_rate": 3.550573807959353e-05, "loss": 1.8933, "step": 10774 }, { "epoch": 0.6005796778328967, "grad_norm": 0.5664730072021484, "learning_rate": 3.549721640849293e-05, "loss": 1.5747, "step": 10775 }, { "epoch": 0.6006354160860599, "grad_norm": 0.5451174974441528, "learning_rate": 3.548869519733606e-05, "loss": 1.7253, "step": 10776 }, { "epoch": 0.600691154339223, "grad_norm": 0.5766503214836121, "learning_rate": 3.5480174446393175e-05, "loss": 1.6655, "step": 10777 }, { "epoch": 0.6007468925923861, "grad_norm": 0.6216641068458557, "learning_rate": 3.5471654155934485e-05, "loss": 1.3522, "step": 10778 }, { "epoch": 0.6008026308455493, "grad_norm": 0.5925215482711792, "learning_rate": 3.546313432623025e-05, "loss": 1.8613, "step": 10779 }, { "epoch": 0.6008583690987125, "grad_norm": 0.5722919702529907, "learning_rate": 3.545461495755061e-05, "loss": 1.6849, "step": 10780 }, { "epoch": 0.6009141073518756, "grad_norm": 0.5494630932807922, "learning_rate": 3.54460960501658e-05, "loss": 1.7203, "step": 10781 }, { "epoch": 0.6009698456050387, "grad_norm": 0.5340242385864258, "learning_rate": 3.5437577604345964e-05, "loss": 1.4293, "step": 10782 }, { "epoch": 0.6010255838582019, "grad_norm": 0.5879470705986023, "learning_rate": 3.542905962036126e-05, "loss": 1.7313, "step": 10783 }, { "epoch": 0.601081322111365, "grad_norm": 0.5412936210632324, "learning_rate": 3.542054209848182e-05, "loss": 1.7252, "step": 10784 }, { "epoch": 0.6011370603645282, "grad_norm": 0.5653871893882751, "learning_rate": 3.5412025038977774e-05, "loss": 1.7057, "step": 10785 }, { "epoch": 0.6011927986176914, "grad_norm": 0.5855697989463806, "learning_rate": 3.540350844211927e-05, "loss": 1.813, "step": 10786 }, { "epoch": 0.6012485368708544, "grad_norm": 0.5583329796791077, "learning_rate": 3.539499230817634e-05, "loss": 1.765, "step": 10787 }, { "epoch": 0.6013042751240176, "grad_norm": 0.5965836644172668, "learning_rate": 3.538647663741913e-05, "loss": 1.6828, "step": 10788 }, { "epoch": 0.6013600133771808, "grad_norm": 0.6070554852485657, "learning_rate": 3.5377961430117665e-05, "loss": 1.8815, "step": 10789 }, { "epoch": 0.6014157516303439, "grad_norm": 0.6031116247177124, "learning_rate": 3.5369446686542016e-05, "loss": 1.8932, "step": 10790 }, { "epoch": 0.6014714898835071, "grad_norm": 0.6014582514762878, "learning_rate": 3.5360932406962214e-05, "loss": 1.8328, "step": 10791 }, { "epoch": 0.6015272281366701, "grad_norm": 0.5309771299362183, "learning_rate": 3.5352418591648285e-05, "loss": 1.4607, "step": 10792 }, { "epoch": 0.6015829663898333, "grad_norm": 0.5593672394752502, "learning_rate": 3.534390524087024e-05, "loss": 1.7778, "step": 10793 }, { "epoch": 0.6016387046429965, "grad_norm": 0.541955292224884, "learning_rate": 3.5335392354898055e-05, "loss": 1.6689, "step": 10794 }, { "epoch": 0.6016944428961596, "grad_norm": 0.5780551433563232, "learning_rate": 3.532687993400175e-05, "loss": 1.6137, "step": 10795 }, { "epoch": 0.6017501811493228, "grad_norm": 0.5650581121444702, "learning_rate": 3.5318367978451234e-05, "loss": 1.5538, "step": 10796 }, { "epoch": 0.601805919402486, "grad_norm": 0.5644901990890503, "learning_rate": 3.530985648851649e-05, "loss": 1.7932, "step": 10797 }, { "epoch": 0.601861657655649, "grad_norm": 0.5749223828315735, "learning_rate": 3.530134546446747e-05, "loss": 1.7293, "step": 10798 }, { "epoch": 0.6019173959088122, "grad_norm": 0.6107510328292847, "learning_rate": 3.529283490657406e-05, "loss": 1.6503, "step": 10799 }, { "epoch": 0.6019731341619754, "grad_norm": 0.6036887168884277, "learning_rate": 3.5284324815106184e-05, "loss": 1.7405, "step": 10800 }, { "epoch": 0.6020288724151385, "grad_norm": 0.6208280920982361, "learning_rate": 3.527581519033372e-05, "loss": 1.9516, "step": 10801 }, { "epoch": 0.6020846106683017, "grad_norm": 0.5419480204582214, "learning_rate": 3.5267306032526556e-05, "loss": 1.518, "step": 10802 }, { "epoch": 0.6021403489214648, "grad_norm": 0.594589352607727, "learning_rate": 3.525879734195453e-05, "loss": 1.9563, "step": 10803 }, { "epoch": 0.6021960871746279, "grad_norm": 0.553704023361206, "learning_rate": 3.525028911888751e-05, "loss": 1.4931, "step": 10804 }, { "epoch": 0.6022518254277911, "grad_norm": 0.5537152886390686, "learning_rate": 3.5241781363595344e-05, "loss": 1.6004, "step": 10805 }, { "epoch": 0.6023075636809543, "grad_norm": 0.5621123909950256, "learning_rate": 3.523327407634781e-05, "loss": 1.5503, "step": 10806 }, { "epoch": 0.6023633019341174, "grad_norm": 0.6001083254814148, "learning_rate": 3.5224767257414734e-05, "loss": 1.8868, "step": 10807 }, { "epoch": 0.6024190401872805, "grad_norm": 0.5669860243797302, "learning_rate": 3.521626090706589e-05, "loss": 1.7416, "step": 10808 }, { "epoch": 0.6024747784404437, "grad_norm": 0.5396543741226196, "learning_rate": 3.5207755025571066e-05, "loss": 1.6226, "step": 10809 }, { "epoch": 0.6025305166936068, "grad_norm": 0.5446880459785461, "learning_rate": 3.519924961319999e-05, "loss": 1.5139, "step": 10810 }, { "epoch": 0.60258625494677, "grad_norm": 0.5307889580726624, "learning_rate": 3.519074467022241e-05, "loss": 1.5466, "step": 10811 }, { "epoch": 0.6026419931999332, "grad_norm": 0.5402228832244873, "learning_rate": 3.518224019690811e-05, "loss": 1.5498, "step": 10812 }, { "epoch": 0.6026977314530962, "grad_norm": 0.5594295859336853, "learning_rate": 3.517373619352672e-05, "loss": 1.5064, "step": 10813 }, { "epoch": 0.6027534697062594, "grad_norm": 0.5983800292015076, "learning_rate": 3.516523266034799e-05, "loss": 1.9771, "step": 10814 }, { "epoch": 0.6028092079594225, "grad_norm": 0.5792510509490967, "learning_rate": 3.515672959764158e-05, "loss": 1.5936, "step": 10815 }, { "epoch": 0.6028649462125857, "grad_norm": 0.5335869789123535, "learning_rate": 3.514822700567718e-05, "loss": 1.7219, "step": 10816 }, { "epoch": 0.6029206844657489, "grad_norm": 0.5837699174880981, "learning_rate": 3.5139724884724406e-05, "loss": 1.6883, "step": 10817 }, { "epoch": 0.6029764227189119, "grad_norm": 0.5807821154594421, "learning_rate": 3.5131223235052927e-05, "loss": 1.8162, "step": 10818 }, { "epoch": 0.6030321609720751, "grad_norm": 0.5589340329170227, "learning_rate": 3.512272205693236e-05, "loss": 1.7268, "step": 10819 }, { "epoch": 0.6030878992252383, "grad_norm": 0.6129077672958374, "learning_rate": 3.511422135063229e-05, "loss": 1.7887, "step": 10820 }, { "epoch": 0.6031436374784014, "grad_norm": 0.5553536415100098, "learning_rate": 3.5105721116422364e-05, "loss": 1.5031, "step": 10821 }, { "epoch": 0.6031993757315646, "grad_norm": 0.6441968083381653, "learning_rate": 3.509722135457209e-05, "loss": 1.486, "step": 10822 }, { "epoch": 0.6032551139847278, "grad_norm": 0.5330159068107605, "learning_rate": 3.5088722065351074e-05, "loss": 1.6346, "step": 10823 }, { "epoch": 0.6033108522378908, "grad_norm": 0.5661835670471191, "learning_rate": 3.508022324902888e-05, "loss": 1.6415, "step": 10824 }, { "epoch": 0.603366590491054, "grad_norm": 0.6099451184272766, "learning_rate": 3.507172490587499e-05, "loss": 1.8652, "step": 10825 }, { "epoch": 0.6034223287442172, "grad_norm": 0.5531985759735107, "learning_rate": 3.5063227036158956e-05, "loss": 1.6726, "step": 10826 }, { "epoch": 0.6034780669973803, "grad_norm": 0.5670011639595032, "learning_rate": 3.5054729640150274e-05, "loss": 1.6772, "step": 10827 }, { "epoch": 0.6035338052505435, "grad_norm": 0.5749963521957397, "learning_rate": 3.504623271811843e-05, "loss": 1.714, "step": 10828 }, { "epoch": 0.6035895435037066, "grad_norm": 0.5688133835792542, "learning_rate": 3.5037736270332886e-05, "loss": 1.6752, "step": 10829 }, { "epoch": 0.6036452817568697, "grad_norm": 0.5335734486579895, "learning_rate": 3.502924029706312e-05, "loss": 1.3809, "step": 10830 }, { "epoch": 0.6037010200100329, "grad_norm": 0.5560937523841858, "learning_rate": 3.502074479857858e-05, "loss": 1.6847, "step": 10831 }, { "epoch": 0.6037567582631961, "grad_norm": 0.5606946349143982, "learning_rate": 3.501224977514867e-05, "loss": 1.7664, "step": 10832 }, { "epoch": 0.6038124965163592, "grad_norm": 0.5246312022209167, "learning_rate": 3.500375522704281e-05, "loss": 1.631, "step": 10833 }, { "epoch": 0.6038682347695223, "grad_norm": 0.5628395676612854, "learning_rate": 3.49952611545304e-05, "loss": 1.6952, "step": 10834 }, { "epoch": 0.6039239730226855, "grad_norm": 0.5548897385597229, "learning_rate": 3.498676755788083e-05, "loss": 1.5307, "step": 10835 }, { "epoch": 0.6039797112758486, "grad_norm": 0.5087965130805969, "learning_rate": 3.497827443736344e-05, "loss": 1.4216, "step": 10836 }, { "epoch": 0.6040354495290118, "grad_norm": 0.5709730982780457, "learning_rate": 3.496978179324761e-05, "loss": 1.7554, "step": 10837 }, { "epoch": 0.6040911877821749, "grad_norm": 0.5632007122039795, "learning_rate": 3.49612896258027e-05, "loss": 1.677, "step": 10838 }, { "epoch": 0.604146926035338, "grad_norm": 0.5664843320846558, "learning_rate": 3.4952797935297955e-05, "loss": 1.8422, "step": 10839 }, { "epoch": 0.6042026642885012, "grad_norm": 0.5658974051475525, "learning_rate": 3.494430672200276e-05, "loss": 1.6821, "step": 10840 }, { "epoch": 0.6042584025416643, "grad_norm": 0.5238000750541687, "learning_rate": 3.493581598618636e-05, "loss": 1.635, "step": 10841 }, { "epoch": 0.6043141407948275, "grad_norm": 0.5444875359535217, "learning_rate": 3.4927325728118055e-05, "loss": 1.7191, "step": 10842 }, { "epoch": 0.6043698790479907, "grad_norm": 0.5294889211654663, "learning_rate": 3.491883594806709e-05, "loss": 1.637, "step": 10843 }, { "epoch": 0.6044256173011537, "grad_norm": 0.5691200494766235, "learning_rate": 3.4910346646302716e-05, "loss": 1.6545, "step": 10844 }, { "epoch": 0.6044813555543169, "grad_norm": 0.6108242869377136, "learning_rate": 3.4901857823094184e-05, "loss": 1.7508, "step": 10845 }, { "epoch": 0.6045370938074801, "grad_norm": 0.5625608563423157, "learning_rate": 3.489336947871067e-05, "loss": 1.5451, "step": 10846 }, { "epoch": 0.6045928320606432, "grad_norm": 0.5418411493301392, "learning_rate": 3.488488161342143e-05, "loss": 1.4548, "step": 10847 }, { "epoch": 0.6046485703138064, "grad_norm": 0.6216863393783569, "learning_rate": 3.487639422749559e-05, "loss": 1.926, "step": 10848 }, { "epoch": 0.6047043085669696, "grad_norm": 0.6217368245124817, "learning_rate": 3.486790732120235e-05, "loss": 1.8622, "step": 10849 }, { "epoch": 0.6047600468201326, "grad_norm": 0.525329053401947, "learning_rate": 3.485942089481089e-05, "loss": 1.6005, "step": 10850 }, { "epoch": 0.6048157850732958, "grad_norm": 0.5369617938995361, "learning_rate": 3.4850934948590295e-05, "loss": 1.6028, "step": 10851 }, { "epoch": 0.604871523326459, "grad_norm": 0.584526538848877, "learning_rate": 3.484244948280974e-05, "loss": 1.8607, "step": 10852 }, { "epoch": 0.6049272615796221, "grad_norm": 0.5628748536109924, "learning_rate": 3.4833964497738305e-05, "loss": 1.6398, "step": 10853 }, { "epoch": 0.6049829998327853, "grad_norm": 0.5512056946754456, "learning_rate": 3.482547999364509e-05, "loss": 1.6174, "step": 10854 }, { "epoch": 0.6050387380859484, "grad_norm": 0.5609381794929504, "learning_rate": 3.481699597079916e-05, "loss": 1.742, "step": 10855 }, { "epoch": 0.6050944763391115, "grad_norm": 0.5258693099021912, "learning_rate": 3.480851242946961e-05, "loss": 1.626, "step": 10856 }, { "epoch": 0.6051502145922747, "grad_norm": 0.5588704347610474, "learning_rate": 3.4800029369925476e-05, "loss": 1.4834, "step": 10857 }, { "epoch": 0.6052059528454379, "grad_norm": 0.6147596836090088, "learning_rate": 3.4791546792435785e-05, "loss": 1.9472, "step": 10858 }, { "epoch": 0.605261691098601, "grad_norm": 0.5736888647079468, "learning_rate": 3.478306469726957e-05, "loss": 1.8025, "step": 10859 }, { "epoch": 0.6053174293517641, "grad_norm": 0.5551031231880188, "learning_rate": 3.4774583084695804e-05, "loss": 1.7116, "step": 10860 }, { "epoch": 0.6053731676049272, "grad_norm": 0.5682637095451355, "learning_rate": 3.476610195498351e-05, "loss": 1.5637, "step": 10861 }, { "epoch": 0.6054289058580904, "grad_norm": 0.5585805773735046, "learning_rate": 3.4757621308401625e-05, "loss": 1.7162, "step": 10862 }, { "epoch": 0.6054846441112536, "grad_norm": 0.7432271242141724, "learning_rate": 3.474914114521912e-05, "loss": 1.8017, "step": 10863 }, { "epoch": 0.6055403823644167, "grad_norm": 0.5110836029052734, "learning_rate": 3.474066146570496e-05, "loss": 1.5233, "step": 10864 }, { "epoch": 0.6055961206175798, "grad_norm": 0.5634505748748779, "learning_rate": 3.4732182270128026e-05, "loss": 1.7133, "step": 10865 }, { "epoch": 0.605651858870743, "grad_norm": 0.5704511404037476, "learning_rate": 3.472370355875727e-05, "loss": 1.6873, "step": 10866 }, { "epoch": 0.6057075971239061, "grad_norm": 0.5130916833877563, "learning_rate": 3.471522533186157e-05, "loss": 1.4978, "step": 10867 }, { "epoch": 0.6057633353770693, "grad_norm": 0.5763922333717346, "learning_rate": 3.470674758970981e-05, "loss": 1.7908, "step": 10868 }, { "epoch": 0.6058190736302325, "grad_norm": 0.5909677147865295, "learning_rate": 3.4698270332570835e-05, "loss": 1.8559, "step": 10869 }, { "epoch": 0.6058748118833955, "grad_norm": 0.5599098205566406, "learning_rate": 3.468979356071351e-05, "loss": 1.8091, "step": 10870 }, { "epoch": 0.6059305501365587, "grad_norm": 0.5690943598747253, "learning_rate": 3.468131727440669e-05, "loss": 1.6181, "step": 10871 }, { "epoch": 0.6059862883897219, "grad_norm": 0.5756849646568298, "learning_rate": 3.467284147391914e-05, "loss": 1.8721, "step": 10872 }, { "epoch": 0.606042026642885, "grad_norm": 0.5114251971244812, "learning_rate": 3.466436615951973e-05, "loss": 1.3427, "step": 10873 }, { "epoch": 0.6060977648960482, "grad_norm": 0.5647757053375244, "learning_rate": 3.465589133147718e-05, "loss": 1.6981, "step": 10874 }, { "epoch": 0.6061535031492113, "grad_norm": 0.6103023290634155, "learning_rate": 3.464741699006031e-05, "loss": 1.3025, "step": 10875 }, { "epoch": 0.6062092414023744, "grad_norm": 0.5083357095718384, "learning_rate": 3.4638943135537864e-05, "loss": 1.5597, "step": 10876 }, { "epoch": 0.6062649796555376, "grad_norm": 0.5760177373886108, "learning_rate": 3.463046976817857e-05, "loss": 1.6599, "step": 10877 }, { "epoch": 0.6063207179087008, "grad_norm": 0.5807527899742126, "learning_rate": 3.462199688825119e-05, "loss": 1.7096, "step": 10878 }, { "epoch": 0.6063764561618639, "grad_norm": 0.5611261129379272, "learning_rate": 3.461352449602439e-05, "loss": 1.6227, "step": 10879 }, { "epoch": 0.606432194415027, "grad_norm": 0.5763663053512573, "learning_rate": 3.4605052591766884e-05, "loss": 1.9214, "step": 10880 }, { "epoch": 0.6064879326681902, "grad_norm": 0.572849452495575, "learning_rate": 3.459658117574733e-05, "loss": 1.6082, "step": 10881 }, { "epoch": 0.6065436709213533, "grad_norm": 0.6065723896026611, "learning_rate": 3.458811024823444e-05, "loss": 1.5039, "step": 10882 }, { "epoch": 0.6065994091745165, "grad_norm": 0.5590231418609619, "learning_rate": 3.4579639809496835e-05, "loss": 1.5629, "step": 10883 }, { "epoch": 0.6066551474276796, "grad_norm": 0.535981297492981, "learning_rate": 3.4571169859803135e-05, "loss": 1.5471, "step": 10884 }, { "epoch": 0.6067108856808427, "grad_norm": 0.5847405195236206, "learning_rate": 3.4562700399421985e-05, "loss": 1.7089, "step": 10885 }, { "epoch": 0.6067666239340059, "grad_norm": 0.5974164605140686, "learning_rate": 3.455423142862196e-05, "loss": 1.7032, "step": 10886 }, { "epoch": 0.606822362187169, "grad_norm": 0.5608033537864685, "learning_rate": 3.4545762947671676e-05, "loss": 1.6528, "step": 10887 }, { "epoch": 0.6068781004403322, "grad_norm": 0.5671756863594055, "learning_rate": 3.453729495683967e-05, "loss": 1.641, "step": 10888 }, { "epoch": 0.6069338386934954, "grad_norm": 0.5884689688682556, "learning_rate": 3.4528827456394506e-05, "loss": 1.77, "step": 10889 }, { "epoch": 0.6069895769466584, "grad_norm": 0.5784920454025269, "learning_rate": 3.452036044660476e-05, "loss": 1.7991, "step": 10890 }, { "epoch": 0.6070453151998216, "grad_norm": 0.5790646076202393, "learning_rate": 3.451189392773891e-05, "loss": 1.592, "step": 10891 }, { "epoch": 0.6071010534529848, "grad_norm": 0.576714038848877, "learning_rate": 3.45034279000655e-05, "loss": 1.4394, "step": 10892 }, { "epoch": 0.6071567917061479, "grad_norm": 0.6155091524124146, "learning_rate": 3.449496236385298e-05, "loss": 1.7956, "step": 10893 }, { "epoch": 0.6072125299593111, "grad_norm": 0.571345329284668, "learning_rate": 3.448649731936988e-05, "loss": 1.6404, "step": 10894 }, { "epoch": 0.6072682682124743, "grad_norm": 0.5463574528694153, "learning_rate": 3.4478032766884615e-05, "loss": 1.6412, "step": 10895 }, { "epoch": 0.6073240064656373, "grad_norm": 0.5688880085945129, "learning_rate": 3.446956870666565e-05, "loss": 1.699, "step": 10896 }, { "epoch": 0.6073797447188005, "grad_norm": 0.5384257435798645, "learning_rate": 3.446110513898143e-05, "loss": 1.6323, "step": 10897 }, { "epoch": 0.6074354829719637, "grad_norm": 0.5217699408531189, "learning_rate": 3.445264206410034e-05, "loss": 1.727, "step": 10898 }, { "epoch": 0.6074912212251268, "grad_norm": 0.5378054976463318, "learning_rate": 3.444417948229083e-05, "loss": 1.4174, "step": 10899 }, { "epoch": 0.60754695947829, "grad_norm": 0.5939854383468628, "learning_rate": 3.443571739382121e-05, "loss": 1.7969, "step": 10900 }, { "epoch": 0.6076026977314531, "grad_norm": 0.6069276332855225, "learning_rate": 3.44272557989599e-05, "loss": 1.7629, "step": 10901 }, { "epoch": 0.6076584359846162, "grad_norm": 0.5685117244720459, "learning_rate": 3.4418794697975254e-05, "loss": 1.6894, "step": 10902 }, { "epoch": 0.6077141742377794, "grad_norm": 0.5635547041893005, "learning_rate": 3.4410334091135586e-05, "loss": 1.6697, "step": 10903 }, { "epoch": 0.6077699124909426, "grad_norm": 0.5866085290908813, "learning_rate": 3.440187397870923e-05, "loss": 1.7436, "step": 10904 }, { "epoch": 0.6078256507441057, "grad_norm": 0.599195659160614, "learning_rate": 3.4393414360964486e-05, "loss": 1.921, "step": 10905 }, { "epoch": 0.6078813889972688, "grad_norm": 0.5734924674034119, "learning_rate": 3.438495523816966e-05, "loss": 1.5432, "step": 10906 }, { "epoch": 0.6079371272504319, "grad_norm": 0.5457361340522766, "learning_rate": 3.437649661059298e-05, "loss": 1.4088, "step": 10907 }, { "epoch": 0.6079928655035951, "grad_norm": 0.6204760074615479, "learning_rate": 3.436803847850275e-05, "loss": 1.964, "step": 10908 }, { "epoch": 0.6080486037567583, "grad_norm": 0.5474656224250793, "learning_rate": 3.4359580842167205e-05, "loss": 1.8181, "step": 10909 }, { "epoch": 0.6081043420099214, "grad_norm": 0.5399399399757385, "learning_rate": 3.435112370185456e-05, "loss": 1.5749, "step": 10910 }, { "epoch": 0.6081600802630845, "grad_norm": 0.5384534001350403, "learning_rate": 3.434266705783305e-05, "loss": 1.5584, "step": 10911 }, { "epoch": 0.6082158185162477, "grad_norm": 0.5624092221260071, "learning_rate": 3.4334210910370833e-05, "loss": 1.4784, "step": 10912 }, { "epoch": 0.6082715567694108, "grad_norm": 0.5896902084350586, "learning_rate": 3.4325755259736114e-05, "loss": 1.748, "step": 10913 }, { "epoch": 0.608327295022574, "grad_norm": 0.5084818601608276, "learning_rate": 3.4317300106197045e-05, "loss": 1.4765, "step": 10914 }, { "epoch": 0.6083830332757372, "grad_norm": 0.5438110828399658, "learning_rate": 3.430884545002178e-05, "loss": 1.5289, "step": 10915 }, { "epoch": 0.6084387715289002, "grad_norm": 0.5773879885673523, "learning_rate": 3.430039129147846e-05, "loss": 1.7349, "step": 10916 }, { "epoch": 0.6084945097820634, "grad_norm": 0.5718106031417847, "learning_rate": 3.4291937630835184e-05, "loss": 1.8119, "step": 10917 }, { "epoch": 0.6085502480352266, "grad_norm": 0.5841074585914612, "learning_rate": 3.428348446836008e-05, "loss": 1.4698, "step": 10918 }, { "epoch": 0.6086059862883897, "grad_norm": 0.5340185165405273, "learning_rate": 3.42750318043212e-05, "loss": 1.7174, "step": 10919 }, { "epoch": 0.6086617245415529, "grad_norm": 0.5589284300804138, "learning_rate": 3.4266579638986637e-05, "loss": 1.7808, "step": 10920 }, { "epoch": 0.6087174627947161, "grad_norm": 0.5883926153182983, "learning_rate": 3.4258127972624423e-05, "loss": 1.5736, "step": 10921 }, { "epoch": 0.6087732010478791, "grad_norm": 0.525952160358429, "learning_rate": 3.424967680550261e-05, "loss": 1.3187, "step": 10922 }, { "epoch": 0.6088289393010423, "grad_norm": 0.6014953851699829, "learning_rate": 3.424122613788923e-05, "loss": 1.6827, "step": 10923 }, { "epoch": 0.6088846775542055, "grad_norm": 0.5605544447898865, "learning_rate": 3.423277597005226e-05, "loss": 1.548, "step": 10924 }, { "epoch": 0.6089404158073686, "grad_norm": 0.5829630494117737, "learning_rate": 3.422432630225974e-05, "loss": 1.7789, "step": 10925 }, { "epoch": 0.6089961540605318, "grad_norm": 0.5912023186683655, "learning_rate": 3.421587713477957e-05, "loss": 1.6069, "step": 10926 }, { "epoch": 0.6090518923136949, "grad_norm": 0.5478500127792358, "learning_rate": 3.4207428467879774e-05, "loss": 1.4861, "step": 10927 }, { "epoch": 0.609107630566858, "grad_norm": 0.5500767827033997, "learning_rate": 3.419898030182825e-05, "loss": 1.5054, "step": 10928 }, { "epoch": 0.6091633688200212, "grad_norm": 0.6093530654907227, "learning_rate": 3.4190532636892955e-05, "loss": 1.9566, "step": 10929 }, { "epoch": 0.6092191070731843, "grad_norm": 0.5310298204421997, "learning_rate": 3.418208547334181e-05, "loss": 1.4754, "step": 10930 }, { "epoch": 0.6092748453263475, "grad_norm": 0.5761799216270447, "learning_rate": 3.417363881144267e-05, "loss": 1.4885, "step": 10931 }, { "epoch": 0.6093305835795106, "grad_norm": 0.5826595425605774, "learning_rate": 3.416519265146343e-05, "loss": 1.5243, "step": 10932 }, { "epoch": 0.6093863218326737, "grad_norm": 0.5796806216239929, "learning_rate": 3.415674699367195e-05, "loss": 1.5245, "step": 10933 }, { "epoch": 0.6094420600858369, "grad_norm": 0.584348738193512, "learning_rate": 3.414830183833608e-05, "loss": 1.6732, "step": 10934 }, { "epoch": 0.6094977983390001, "grad_norm": 0.5404664874076843, "learning_rate": 3.413985718572368e-05, "loss": 1.6273, "step": 10935 }, { "epoch": 0.6095535365921632, "grad_norm": 0.5560908317565918, "learning_rate": 3.413141303610252e-05, "loss": 1.7216, "step": 10936 }, { "epoch": 0.6096092748453263, "grad_norm": 0.5780743956565857, "learning_rate": 3.412296938974043e-05, "loss": 1.6605, "step": 10937 }, { "epoch": 0.6096650130984895, "grad_norm": 0.5821511745452881, "learning_rate": 3.4114526246905176e-05, "loss": 1.6557, "step": 10938 }, { "epoch": 0.6097207513516526, "grad_norm": 0.6239771246910095, "learning_rate": 3.410608360786454e-05, "loss": 1.6936, "step": 10939 }, { "epoch": 0.6097764896048158, "grad_norm": 0.5467875003814697, "learning_rate": 3.4097641472886245e-05, "loss": 1.5873, "step": 10940 }, { "epoch": 0.609832227857979, "grad_norm": 0.5692501068115234, "learning_rate": 3.408919984223804e-05, "loss": 1.636, "step": 10941 }, { "epoch": 0.609887966111142, "grad_norm": 0.553924560546875, "learning_rate": 3.408075871618767e-05, "loss": 1.5813, "step": 10942 }, { "epoch": 0.6099437043643052, "grad_norm": 0.601086437702179, "learning_rate": 3.407231809500281e-05, "loss": 1.8607, "step": 10943 }, { "epoch": 0.6099994426174684, "grad_norm": 0.57811039686203, "learning_rate": 3.406387797895116e-05, "loss": 1.6621, "step": 10944 }, { "epoch": 0.6100551808706315, "grad_norm": 0.5589052438735962, "learning_rate": 3.405543836830038e-05, "loss": 1.8064, "step": 10945 }, { "epoch": 0.6101109191237947, "grad_norm": 0.5751338005065918, "learning_rate": 3.404699926331814e-05, "loss": 1.6508, "step": 10946 }, { "epoch": 0.6101666573769579, "grad_norm": 0.5896758437156677, "learning_rate": 3.403856066427207e-05, "loss": 1.8309, "step": 10947 }, { "epoch": 0.6102223956301209, "grad_norm": 0.5646328330039978, "learning_rate": 3.403012257142977e-05, "loss": 1.6079, "step": 10948 }, { "epoch": 0.6102781338832841, "grad_norm": 0.5370541214942932, "learning_rate": 3.4021684985058914e-05, "loss": 1.5691, "step": 10949 }, { "epoch": 0.6103338721364473, "grad_norm": 0.5964412093162537, "learning_rate": 3.4013247905427e-05, "loss": 1.7893, "step": 10950 }, { "epoch": 0.6103896103896104, "grad_norm": 0.5477089285850525, "learning_rate": 3.4004811332801705e-05, "loss": 1.708, "step": 10951 }, { "epoch": 0.6104453486427736, "grad_norm": 0.7580403089523315, "learning_rate": 3.39963752674505e-05, "loss": 1.9378, "step": 10952 }, { "epoch": 0.6105010868959366, "grad_norm": 0.5177296996116638, "learning_rate": 3.398793970964098e-05, "loss": 1.7007, "step": 10953 }, { "epoch": 0.6105568251490998, "grad_norm": 0.5849555134773254, "learning_rate": 3.397950465964065e-05, "loss": 1.6816, "step": 10954 }, { "epoch": 0.610612563402263, "grad_norm": 0.5685425996780396, "learning_rate": 3.3971070117717016e-05, "loss": 1.8814, "step": 10955 }, { "epoch": 0.6106683016554261, "grad_norm": 0.6520184278488159, "learning_rate": 3.39626360841376e-05, "loss": 1.7222, "step": 10956 }, { "epoch": 0.6107240399085893, "grad_norm": 0.579638659954071, "learning_rate": 3.395420255916986e-05, "loss": 1.6526, "step": 10957 }, { "epoch": 0.6107797781617524, "grad_norm": 0.5428391098976135, "learning_rate": 3.3945769543081264e-05, "loss": 1.5243, "step": 10958 }, { "epoch": 0.6108355164149155, "grad_norm": 0.5601480603218079, "learning_rate": 3.3937337036139236e-05, "loss": 1.6196, "step": 10959 }, { "epoch": 0.6108912546680787, "grad_norm": 0.6202312111854553, "learning_rate": 3.392890503861124e-05, "loss": 1.5272, "step": 10960 }, { "epoch": 0.6109469929212419, "grad_norm": 0.5722497701644897, "learning_rate": 3.3920473550764676e-05, "loss": 1.6407, "step": 10961 }, { "epoch": 0.611002731174405, "grad_norm": 0.5435575246810913, "learning_rate": 3.3912042572866934e-05, "loss": 1.618, "step": 10962 }, { "epoch": 0.6110584694275681, "grad_norm": 0.6046878695487976, "learning_rate": 3.390361210518542e-05, "loss": 1.7345, "step": 10963 }, { "epoch": 0.6111142076807313, "grad_norm": 0.5520839095115662, "learning_rate": 3.389518214798746e-05, "loss": 1.5911, "step": 10964 }, { "epoch": 0.6111699459338944, "grad_norm": 0.502733588218689, "learning_rate": 3.388675270154045e-05, "loss": 1.4994, "step": 10965 }, { "epoch": 0.6112256841870576, "grad_norm": 0.5726004242897034, "learning_rate": 3.3878323766111675e-05, "loss": 1.5398, "step": 10966 }, { "epoch": 0.6112814224402208, "grad_norm": 0.8424298763275146, "learning_rate": 3.3869895341968463e-05, "loss": 1.501, "step": 10967 }, { "epoch": 0.6113371606933838, "grad_norm": 0.5562017560005188, "learning_rate": 3.386146742937815e-05, "loss": 1.5948, "step": 10968 }, { "epoch": 0.611392898946547, "grad_norm": 0.5517137050628662, "learning_rate": 3.385304002860799e-05, "loss": 1.5442, "step": 10969 }, { "epoch": 0.6114486371997102, "grad_norm": 0.5374296307563782, "learning_rate": 3.384461313992526e-05, "loss": 1.743, "step": 10970 }, { "epoch": 0.6115043754528733, "grad_norm": 0.6051550507545471, "learning_rate": 3.38361867635972e-05, "loss": 1.8001, "step": 10971 }, { "epoch": 0.6115601137060365, "grad_norm": 0.5788960456848145, "learning_rate": 3.382776089989107e-05, "loss": 1.8217, "step": 10972 }, { "epoch": 0.6116158519591997, "grad_norm": 0.5100364089012146, "learning_rate": 3.3819335549074064e-05, "loss": 1.5457, "step": 10973 }, { "epoch": 0.6116715902123627, "grad_norm": 0.5339128971099854, "learning_rate": 3.3810910711413376e-05, "loss": 1.571, "step": 10974 }, { "epoch": 0.6117273284655259, "grad_norm": 0.5776057839393616, "learning_rate": 3.380248638717625e-05, "loss": 1.579, "step": 10975 }, { "epoch": 0.611783066718689, "grad_norm": 0.7491598725318909, "learning_rate": 3.379406257662977e-05, "loss": 1.5398, "step": 10976 }, { "epoch": 0.6118388049718522, "grad_norm": 0.5306689739227295, "learning_rate": 3.378563928004118e-05, "loss": 1.7963, "step": 10977 }, { "epoch": 0.6118945432250154, "grad_norm": 0.6286993026733398, "learning_rate": 3.377721649767755e-05, "loss": 1.7298, "step": 10978 }, { "epoch": 0.6119502814781784, "grad_norm": 0.5252998471260071, "learning_rate": 3.376879422980605e-05, "loss": 1.6033, "step": 10979 }, { "epoch": 0.6120060197313416, "grad_norm": 0.5619044899940491, "learning_rate": 3.3760372476693744e-05, "loss": 1.5339, "step": 10980 }, { "epoch": 0.6120617579845048, "grad_norm": 0.5480098128318787, "learning_rate": 3.375195123860774e-05, "loss": 1.4833, "step": 10981 }, { "epoch": 0.6121174962376679, "grad_norm": 0.5810719728469849, "learning_rate": 3.374353051581513e-05, "loss": 1.7522, "step": 10982 }, { "epoch": 0.612173234490831, "grad_norm": 0.6109387874603271, "learning_rate": 3.373511030858292e-05, "loss": 1.7102, "step": 10983 }, { "epoch": 0.6122289727439942, "grad_norm": 0.556450605392456, "learning_rate": 3.372669061717821e-05, "loss": 1.6045, "step": 10984 }, { "epoch": 0.6122847109971573, "grad_norm": 0.5535019636154175, "learning_rate": 3.3718271441867964e-05, "loss": 1.7434, "step": 10985 }, { "epoch": 0.6123404492503205, "grad_norm": 0.5736287832260132, "learning_rate": 3.370985278291923e-05, "loss": 1.5278, "step": 10986 }, { "epoch": 0.6123961875034837, "grad_norm": 0.5478652715682983, "learning_rate": 3.3701434640599e-05, "loss": 1.4593, "step": 10987 }, { "epoch": 0.6124519257566468, "grad_norm": 0.5607852935791016, "learning_rate": 3.369301701517422e-05, "loss": 1.616, "step": 10988 }, { "epoch": 0.6125076640098099, "grad_norm": 0.6062629222869873, "learning_rate": 3.3684599906911885e-05, "loss": 1.7739, "step": 10989 }, { "epoch": 0.6125634022629731, "grad_norm": 0.6138796806335449, "learning_rate": 3.36761833160789e-05, "loss": 1.7103, "step": 10990 }, { "epoch": 0.6126191405161362, "grad_norm": 0.5835701823234558, "learning_rate": 3.3667767242942215e-05, "loss": 1.6394, "step": 10991 }, { "epoch": 0.6126748787692994, "grad_norm": 0.5834316611289978, "learning_rate": 3.3659351687768714e-05, "loss": 1.6211, "step": 10992 }, { "epoch": 0.6127306170224626, "grad_norm": 0.5939779877662659, "learning_rate": 3.3650936650825305e-05, "loss": 1.9074, "step": 10993 }, { "epoch": 0.6127863552756256, "grad_norm": 0.5545198321342468, "learning_rate": 3.364252213237887e-05, "loss": 1.7556, "step": 10994 }, { "epoch": 0.6128420935287888, "grad_norm": 0.536385715007782, "learning_rate": 3.363410813269627e-05, "loss": 1.6704, "step": 10995 }, { "epoch": 0.612897831781952, "grad_norm": 0.5516625046730042, "learning_rate": 3.362569465204434e-05, "loss": 1.629, "step": 10996 }, { "epoch": 0.6129535700351151, "grad_norm": 0.5734841227531433, "learning_rate": 3.361728169068989e-05, "loss": 1.5543, "step": 10997 }, { "epoch": 0.6130093082882783, "grad_norm": 0.48854759335517883, "learning_rate": 3.360886924889977e-05, "loss": 1.484, "step": 10998 }, { "epoch": 0.6130650465414413, "grad_norm": 0.5883350372314453, "learning_rate": 3.360045732694074e-05, "loss": 1.5407, "step": 10999 }, { "epoch": 0.6131207847946045, "grad_norm": 0.5424591898918152, "learning_rate": 3.3592045925079575e-05, "loss": 1.801, "step": 11000 }, { "epoch": 0.6131765230477677, "grad_norm": 0.5398431420326233, "learning_rate": 3.3583635043583075e-05, "loss": 1.7198, "step": 11001 }, { "epoch": 0.6132322613009308, "grad_norm": 0.5736198425292969, "learning_rate": 3.357522468271793e-05, "loss": 1.8192, "step": 11002 }, { "epoch": 0.613287999554094, "grad_norm": 0.5277306437492371, "learning_rate": 3.356681484275091e-05, "loss": 1.5158, "step": 11003 }, { "epoch": 0.6133437378072571, "grad_norm": 0.539786696434021, "learning_rate": 3.3558405523948703e-05, "loss": 1.5025, "step": 11004 }, { "epoch": 0.6133994760604202, "grad_norm": 0.5373875498771667, "learning_rate": 3.354999672657802e-05, "loss": 1.4258, "step": 11005 }, { "epoch": 0.6134552143135834, "grad_norm": 0.6097363829612732, "learning_rate": 3.354158845090553e-05, "loss": 1.5761, "step": 11006 }, { "epoch": 0.6135109525667466, "grad_norm": 0.576100766658783, "learning_rate": 3.3533180697197886e-05, "loss": 1.8187, "step": 11007 }, { "epoch": 0.6135666908199097, "grad_norm": 0.5665978789329529, "learning_rate": 3.352477346572176e-05, "loss": 1.655, "step": 11008 }, { "epoch": 0.6136224290730729, "grad_norm": 0.5619946122169495, "learning_rate": 3.351636675674373e-05, "loss": 1.7595, "step": 11009 }, { "epoch": 0.613678167326236, "grad_norm": 0.5662100911140442, "learning_rate": 3.350796057053048e-05, "loss": 1.7159, "step": 11010 }, { "epoch": 0.6137339055793991, "grad_norm": 0.5565624237060547, "learning_rate": 3.349955490734854e-05, "loss": 1.6548, "step": 11011 }, { "epoch": 0.6137896438325623, "grad_norm": 0.5969928503036499, "learning_rate": 3.349114976746451e-05, "loss": 1.6303, "step": 11012 }, { "epoch": 0.6138453820857255, "grad_norm": 0.552580714225769, "learning_rate": 3.348274515114498e-05, "loss": 1.6347, "step": 11013 }, { "epoch": 0.6139011203388886, "grad_norm": 0.4925132393836975, "learning_rate": 3.3474341058656453e-05, "loss": 1.3515, "step": 11014 }, { "epoch": 0.6139568585920517, "grad_norm": 0.5885698795318604, "learning_rate": 3.346593749026549e-05, "loss": 1.8692, "step": 11015 }, { "epoch": 0.6140125968452149, "grad_norm": 0.5670897364616394, "learning_rate": 3.345753444623858e-05, "loss": 1.6248, "step": 11016 }, { "epoch": 0.614068335098378, "grad_norm": 0.5874050259590149, "learning_rate": 3.344913192684224e-05, "loss": 1.6746, "step": 11017 }, { "epoch": 0.6141240733515412, "grad_norm": 0.5586856603622437, "learning_rate": 3.344072993234292e-05, "loss": 1.6177, "step": 11018 }, { "epoch": 0.6141798116047044, "grad_norm": 0.5451282858848572, "learning_rate": 3.343232846300709e-05, "loss": 1.6588, "step": 11019 }, { "epoch": 0.6142355498578674, "grad_norm": 0.5075144171714783, "learning_rate": 3.3423927519101225e-05, "loss": 1.257, "step": 11020 }, { "epoch": 0.6142912881110306, "grad_norm": 0.569128692150116, "learning_rate": 3.3415527100891734e-05, "loss": 1.5108, "step": 11021 }, { "epoch": 0.6143470263641937, "grad_norm": 0.6079961061477661, "learning_rate": 3.3407127208645026e-05, "loss": 1.8459, "step": 11022 }, { "epoch": 0.6144027646173569, "grad_norm": 0.5876660943031311, "learning_rate": 3.33987278426275e-05, "loss": 1.9795, "step": 11023 }, { "epoch": 0.6144585028705201, "grad_norm": 0.5557812452316284, "learning_rate": 3.339032900310554e-05, "loss": 1.5239, "step": 11024 }, { "epoch": 0.6145142411236831, "grad_norm": 0.7021792531013489, "learning_rate": 3.338193069034549e-05, "loss": 2.0899, "step": 11025 }, { "epoch": 0.6145699793768463, "grad_norm": 0.5470578074455261, "learning_rate": 3.33735329046137e-05, "loss": 1.7312, "step": 11026 }, { "epoch": 0.6146257176300095, "grad_norm": 0.5664429664611816, "learning_rate": 3.336513564617654e-05, "loss": 1.5185, "step": 11027 }, { "epoch": 0.6146814558831726, "grad_norm": 0.6334248185157776, "learning_rate": 3.3356738915300255e-05, "loss": 1.8831, "step": 11028 }, { "epoch": 0.6147371941363358, "grad_norm": 0.5621626377105713, "learning_rate": 3.33483427122512e-05, "loss": 1.7854, "step": 11029 }, { "epoch": 0.614792932389499, "grad_norm": 0.5289725065231323, "learning_rate": 3.333994703729562e-05, "loss": 1.5749, "step": 11030 }, { "epoch": 0.614848670642662, "grad_norm": 0.5943875312805176, "learning_rate": 3.333155189069978e-05, "loss": 1.7339, "step": 11031 }, { "epoch": 0.6149044088958252, "grad_norm": 0.5255435109138489, "learning_rate": 3.332315727272994e-05, "loss": 1.6177, "step": 11032 }, { "epoch": 0.6149601471489884, "grad_norm": 0.5951310396194458, "learning_rate": 3.331476318365231e-05, "loss": 1.8444, "step": 11033 }, { "epoch": 0.6150158854021515, "grad_norm": 0.5913835763931274, "learning_rate": 3.330636962373312e-05, "loss": 1.5832, "step": 11034 }, { "epoch": 0.6150716236553146, "grad_norm": 0.5857747197151184, "learning_rate": 3.329797659323853e-05, "loss": 1.7523, "step": 11035 }, { "epoch": 0.6151273619084778, "grad_norm": 0.5293170213699341, "learning_rate": 3.3289584092434785e-05, "loss": 1.5468, "step": 11036 }, { "epoch": 0.6151831001616409, "grad_norm": 0.5425816178321838, "learning_rate": 3.328119212158797e-05, "loss": 1.6177, "step": 11037 }, { "epoch": 0.6152388384148041, "grad_norm": 0.53827965259552, "learning_rate": 3.327280068096429e-05, "loss": 1.585, "step": 11038 }, { "epoch": 0.6152945766679673, "grad_norm": 0.5500679612159729, "learning_rate": 3.326440977082981e-05, "loss": 1.5652, "step": 11039 }, { "epoch": 0.6153503149211303, "grad_norm": 0.5159964561462402, "learning_rate": 3.325601939145069e-05, "loss": 1.6, "step": 11040 }, { "epoch": 0.6154060531742935, "grad_norm": 0.5599196553230286, "learning_rate": 3.3247629543093025e-05, "loss": 1.7046, "step": 11041 }, { "epoch": 0.6154617914274567, "grad_norm": 0.5590962767601013, "learning_rate": 3.323924022602287e-05, "loss": 1.7124, "step": 11042 }, { "epoch": 0.6155175296806198, "grad_norm": 0.5326443910598755, "learning_rate": 3.32308514405063e-05, "loss": 1.4437, "step": 11043 }, { "epoch": 0.615573267933783, "grad_norm": 0.6075683832168579, "learning_rate": 3.322246318680934e-05, "loss": 1.809, "step": 11044 }, { "epoch": 0.615629006186946, "grad_norm": 0.5951778888702393, "learning_rate": 3.321407546519802e-05, "loss": 1.6689, "step": 11045 }, { "epoch": 0.6156847444401092, "grad_norm": 0.5833475589752197, "learning_rate": 3.320568827593837e-05, "loss": 1.7449, "step": 11046 }, { "epoch": 0.6157404826932724, "grad_norm": 0.562238872051239, "learning_rate": 3.319730161929637e-05, "loss": 1.6372, "step": 11047 }, { "epoch": 0.6157962209464355, "grad_norm": 0.5967754125595093, "learning_rate": 3.318891549553801e-05, "loss": 1.6837, "step": 11048 }, { "epoch": 0.6158519591995987, "grad_norm": 0.5980004072189331, "learning_rate": 3.318052990492921e-05, "loss": 1.7888, "step": 11049 }, { "epoch": 0.6159076974527619, "grad_norm": 0.53914874792099, "learning_rate": 3.317214484773596e-05, "loss": 1.6178, "step": 11050 }, { "epoch": 0.6159634357059249, "grad_norm": 0.606036365032196, "learning_rate": 3.316376032422415e-05, "loss": 1.9799, "step": 11051 }, { "epoch": 0.6160191739590881, "grad_norm": 0.6011738181114197, "learning_rate": 3.315537633465968e-05, "loss": 1.7318, "step": 11052 }, { "epoch": 0.6160749122122513, "grad_norm": 0.4829182028770447, "learning_rate": 3.3146992879308505e-05, "loss": 1.2948, "step": 11053 }, { "epoch": 0.6161306504654144, "grad_norm": 0.5873551368713379, "learning_rate": 3.3138609958436414e-05, "loss": 1.7205, "step": 11054 }, { "epoch": 0.6161863887185776, "grad_norm": 0.5033444762229919, "learning_rate": 3.3130227572309334e-05, "loss": 1.4926, "step": 11055 }, { "epoch": 0.6162421269717407, "grad_norm": 0.5444531440734863, "learning_rate": 3.3121845721193065e-05, "loss": 1.5604, "step": 11056 }, { "epoch": 0.6162978652249038, "grad_norm": 0.585561990737915, "learning_rate": 3.311346440535346e-05, "loss": 1.737, "step": 11057 }, { "epoch": 0.616353603478067, "grad_norm": 0.6604238748550415, "learning_rate": 3.31050836250563e-05, "loss": 1.2615, "step": 11058 }, { "epoch": 0.6164093417312302, "grad_norm": 0.5413322448730469, "learning_rate": 3.3096703380567376e-05, "loss": 1.6531, "step": 11059 }, { "epoch": 0.6164650799843933, "grad_norm": 0.5525966882705688, "learning_rate": 3.3088323672152474e-05, "loss": 1.7572, "step": 11060 }, { "epoch": 0.6165208182375564, "grad_norm": 0.62772136926651, "learning_rate": 3.307994450007733e-05, "loss": 1.7705, "step": 11061 }, { "epoch": 0.6165765564907196, "grad_norm": 0.5861853957176208, "learning_rate": 3.3071565864607723e-05, "loss": 1.7726, "step": 11062 }, { "epoch": 0.6166322947438827, "grad_norm": 0.5600711107254028, "learning_rate": 3.3063187766009316e-05, "loss": 1.4762, "step": 11063 }, { "epoch": 0.6166880329970459, "grad_norm": 0.5767860412597656, "learning_rate": 3.305481020454787e-05, "loss": 1.7204, "step": 11064 }, { "epoch": 0.6167437712502091, "grad_norm": 0.5620105862617493, "learning_rate": 3.304643318048903e-05, "loss": 1.5623, "step": 11065 }, { "epoch": 0.6167995095033721, "grad_norm": 0.5560240745544434, "learning_rate": 3.303805669409848e-05, "loss": 1.7162, "step": 11066 }, { "epoch": 0.6168552477565353, "grad_norm": 0.5894601345062256, "learning_rate": 3.30296807456419e-05, "loss": 1.666, "step": 11067 }, { "epoch": 0.6169109860096984, "grad_norm": 0.5420163869857788, "learning_rate": 3.3021305335384886e-05, "loss": 1.3855, "step": 11068 }, { "epoch": 0.6169667242628616, "grad_norm": 0.6015777587890625, "learning_rate": 3.3012930463593084e-05, "loss": 1.8732, "step": 11069 }, { "epoch": 0.6170224625160248, "grad_norm": 0.5890399217605591, "learning_rate": 3.300455613053207e-05, "loss": 1.5905, "step": 11070 }, { "epoch": 0.6170782007691878, "grad_norm": 0.5790014863014221, "learning_rate": 3.299618233646745e-05, "loss": 1.5319, "step": 11071 }, { "epoch": 0.617133939022351, "grad_norm": 0.5821947455406189, "learning_rate": 3.29878090816648e-05, "loss": 1.7873, "step": 11072 }, { "epoch": 0.6171896772755142, "grad_norm": 0.5517497658729553, "learning_rate": 3.297943636638965e-05, "loss": 1.8069, "step": 11073 }, { "epoch": 0.6172454155286773, "grad_norm": 0.509792149066925, "learning_rate": 3.297106419090754e-05, "loss": 1.3797, "step": 11074 }, { "epoch": 0.6173011537818405, "grad_norm": 0.5943223834037781, "learning_rate": 3.296269255548399e-05, "loss": 1.8426, "step": 11075 }, { "epoch": 0.6173568920350037, "grad_norm": 0.5859249830245972, "learning_rate": 3.2954321460384506e-05, "loss": 1.5177, "step": 11076 }, { "epoch": 0.6174126302881667, "grad_norm": 0.5453793406486511, "learning_rate": 3.294595090587455e-05, "loss": 1.641, "step": 11077 }, { "epoch": 0.6174683685413299, "grad_norm": 0.5777225494384766, "learning_rate": 3.293758089221959e-05, "loss": 1.6019, "step": 11078 }, { "epoch": 0.6175241067944931, "grad_norm": 0.6105305552482605, "learning_rate": 3.292921141968511e-05, "loss": 1.5064, "step": 11079 }, { "epoch": 0.6175798450476562, "grad_norm": 0.5501500964164734, "learning_rate": 3.292084248853649e-05, "loss": 1.7632, "step": 11080 }, { "epoch": 0.6176355833008194, "grad_norm": 0.546392560005188, "learning_rate": 3.291247409903919e-05, "loss": 1.5724, "step": 11081 }, { "epoch": 0.6176913215539825, "grad_norm": 0.6007018685340881, "learning_rate": 3.290410625145857e-05, "loss": 1.8628, "step": 11082 }, { "epoch": 0.6177470598071456, "grad_norm": 0.5350911617279053, "learning_rate": 3.289573894606003e-05, "loss": 1.5649, "step": 11083 }, { "epoch": 0.6178027980603088, "grad_norm": 0.5646401047706604, "learning_rate": 3.288737218310892e-05, "loss": 1.5843, "step": 11084 }, { "epoch": 0.617858536313472, "grad_norm": 0.5566853880882263, "learning_rate": 3.287900596287059e-05, "loss": 1.6214, "step": 11085 }, { "epoch": 0.6179142745666351, "grad_norm": 0.659518837928772, "learning_rate": 3.2870640285610375e-05, "loss": 1.9879, "step": 11086 }, { "epoch": 0.6179700128197982, "grad_norm": 0.5458612442016602, "learning_rate": 3.286227515159357e-05, "loss": 1.6907, "step": 11087 }, { "epoch": 0.6180257510729614, "grad_norm": 0.5440777540206909, "learning_rate": 3.28539105610855e-05, "loss": 1.7293, "step": 11088 }, { "epoch": 0.6180814893261245, "grad_norm": 0.5791444182395935, "learning_rate": 3.284554651435138e-05, "loss": 1.7966, "step": 11089 }, { "epoch": 0.6181372275792877, "grad_norm": 0.5879372358322144, "learning_rate": 3.2837183011656533e-05, "loss": 1.7938, "step": 11090 }, { "epoch": 0.6181929658324508, "grad_norm": 0.5607120394706726, "learning_rate": 3.2828820053266176e-05, "loss": 1.6818, "step": 11091 }, { "epoch": 0.6182487040856139, "grad_norm": 0.5742748379707336, "learning_rate": 3.2820457639445525e-05, "loss": 1.7102, "step": 11092 }, { "epoch": 0.6183044423387771, "grad_norm": 0.584244966506958, "learning_rate": 3.28120957704598e-05, "loss": 1.7051, "step": 11093 }, { "epoch": 0.6183601805919402, "grad_norm": 0.5676624178886414, "learning_rate": 3.280373444657417e-05, "loss": 1.6841, "step": 11094 }, { "epoch": 0.6184159188451034, "grad_norm": 0.6396898031234741, "learning_rate": 3.279537366805384e-05, "loss": 2.0282, "step": 11095 }, { "epoch": 0.6184716570982666, "grad_norm": 0.5480191111564636, "learning_rate": 3.278701343516393e-05, "loss": 1.6552, "step": 11096 }, { "epoch": 0.6185273953514296, "grad_norm": 0.5784115195274353, "learning_rate": 3.27786537481696e-05, "loss": 1.7273, "step": 11097 }, { "epoch": 0.6185831336045928, "grad_norm": 0.5261921286582947, "learning_rate": 3.277029460733598e-05, "loss": 1.6164, "step": 11098 }, { "epoch": 0.618638871857756, "grad_norm": 0.6365436315536499, "learning_rate": 3.276193601292815e-05, "loss": 1.9246, "step": 11099 }, { "epoch": 0.6186946101109191, "grad_norm": 0.539184033870697, "learning_rate": 3.275357796521121e-05, "loss": 1.5328, "step": 11100 }, { "epoch": 0.6187503483640823, "grad_norm": 0.5453305840492249, "learning_rate": 3.274522046445021e-05, "loss": 1.7782, "step": 11101 }, { "epoch": 0.6188060866172455, "grad_norm": 0.5231083035469055, "learning_rate": 3.273686351091023e-05, "loss": 1.5018, "step": 11102 }, { "epoch": 0.6188618248704085, "grad_norm": 0.5448489189147949, "learning_rate": 3.272850710485628e-05, "loss": 1.6596, "step": 11103 }, { "epoch": 0.6189175631235717, "grad_norm": 0.5277993083000183, "learning_rate": 3.2720151246553366e-05, "loss": 1.4884, "step": 11104 }, { "epoch": 0.6189733013767349, "grad_norm": 0.5456802845001221, "learning_rate": 3.271179593626654e-05, "loss": 1.5642, "step": 11105 }, { "epoch": 0.619029039629898, "grad_norm": 0.5369241833686829, "learning_rate": 3.270344117426073e-05, "loss": 1.7674, "step": 11106 }, { "epoch": 0.6190847778830612, "grad_norm": 0.5577540397644043, "learning_rate": 3.269508696080093e-05, "loss": 1.5923, "step": 11107 }, { "epoch": 0.6191405161362243, "grad_norm": 0.5356347560882568, "learning_rate": 3.268673329615207e-05, "loss": 1.5301, "step": 11108 }, { "epoch": 0.6191962543893874, "grad_norm": 0.5975854396820068, "learning_rate": 3.267838018057909e-05, "loss": 1.5344, "step": 11109 }, { "epoch": 0.6192519926425506, "grad_norm": 0.5814411044120789, "learning_rate": 3.2670027614346896e-05, "loss": 1.8432, "step": 11110 }, { "epoch": 0.6193077308957138, "grad_norm": 0.559992253780365, "learning_rate": 3.2661675597720384e-05, "loss": 1.6279, "step": 11111 }, { "epoch": 0.6193634691488769, "grad_norm": 0.5477424263954163, "learning_rate": 3.265332413096444e-05, "loss": 1.5426, "step": 11112 }, { "epoch": 0.61941920740204, "grad_norm": 0.5477100014686584, "learning_rate": 3.26449732143439e-05, "loss": 1.709, "step": 11113 }, { "epoch": 0.6194749456552031, "grad_norm": 0.5695294141769409, "learning_rate": 3.263662284812365e-05, "loss": 1.6154, "step": 11114 }, { "epoch": 0.6195306839083663, "grad_norm": 0.5602665543556213, "learning_rate": 3.262827303256846e-05, "loss": 1.4069, "step": 11115 }, { "epoch": 0.6195864221615295, "grad_norm": 0.5472245216369629, "learning_rate": 3.261992376794318e-05, "loss": 1.6916, "step": 11116 }, { "epoch": 0.6196421604146926, "grad_norm": 0.5716364979743958, "learning_rate": 3.2611575054512584e-05, "loss": 1.829, "step": 11117 }, { "epoch": 0.6196978986678557, "grad_norm": 0.5174562931060791, "learning_rate": 3.2603226892541437e-05, "loss": 1.4311, "step": 11118 }, { "epoch": 0.6197536369210189, "grad_norm": 0.5370128750801086, "learning_rate": 3.2594879282294524e-05, "loss": 1.7092, "step": 11119 }, { "epoch": 0.619809375174182, "grad_norm": 0.5618498921394348, "learning_rate": 3.258653222403654e-05, "loss": 1.5921, "step": 11120 }, { "epoch": 0.6198651134273452, "grad_norm": 0.54872065782547, "learning_rate": 3.257818571803224e-05, "loss": 1.6191, "step": 11121 }, { "epoch": 0.6199208516805084, "grad_norm": 0.5251935720443726, "learning_rate": 3.25698397645463e-05, "loss": 1.3492, "step": 11122 }, { "epoch": 0.6199765899336714, "grad_norm": 0.5758818984031677, "learning_rate": 3.2561494363843416e-05, "loss": 1.7222, "step": 11123 }, { "epoch": 0.6200323281868346, "grad_norm": 0.5772950649261475, "learning_rate": 3.255314951618827e-05, "loss": 1.5677, "step": 11124 }, { "epoch": 0.6200880664399978, "grad_norm": 0.5665372014045715, "learning_rate": 3.2544805221845485e-05, "loss": 1.4315, "step": 11125 }, { "epoch": 0.6201438046931609, "grad_norm": 0.5531750321388245, "learning_rate": 3.253646148107973e-05, "loss": 1.4994, "step": 11126 }, { "epoch": 0.6201995429463241, "grad_norm": 0.5572689771652222, "learning_rate": 3.2528118294155576e-05, "loss": 1.4227, "step": 11127 }, { "epoch": 0.6202552811994873, "grad_norm": 0.577793538570404, "learning_rate": 3.251977566133766e-05, "loss": 1.8407, "step": 11128 }, { "epoch": 0.6203110194526503, "grad_norm": 0.6016719341278076, "learning_rate": 3.251143358289053e-05, "loss": 1.8582, "step": 11129 }, { "epoch": 0.6203667577058135, "grad_norm": 0.5398997068405151, "learning_rate": 3.2503092059078754e-05, "loss": 1.6491, "step": 11130 }, { "epoch": 0.6204224959589767, "grad_norm": 0.5354841947555542, "learning_rate": 3.2494751090166907e-05, "loss": 1.6451, "step": 11131 }, { "epoch": 0.6204782342121398, "grad_norm": 0.5381180047988892, "learning_rate": 3.2486410676419467e-05, "loss": 1.563, "step": 11132 }, { "epoch": 0.620533972465303, "grad_norm": 0.5650672912597656, "learning_rate": 3.247807081810099e-05, "loss": 1.7636, "step": 11133 }, { "epoch": 0.6205897107184661, "grad_norm": 0.5720324516296387, "learning_rate": 3.246973151547594e-05, "loss": 1.6853, "step": 11134 }, { "epoch": 0.6206454489716292, "grad_norm": 0.6177263855934143, "learning_rate": 3.2461392768808796e-05, "loss": 1.7606, "step": 11135 }, { "epoch": 0.6207011872247924, "grad_norm": 0.547572672367096, "learning_rate": 3.245305457836402e-05, "loss": 1.6584, "step": 11136 }, { "epoch": 0.6207569254779555, "grad_norm": 0.5631645917892456, "learning_rate": 3.244471694440604e-05, "loss": 1.6822, "step": 11137 }, { "epoch": 0.6208126637311187, "grad_norm": 0.5759522914886475, "learning_rate": 3.243637986719929e-05, "loss": 1.7112, "step": 11138 }, { "epoch": 0.6208684019842818, "grad_norm": 0.557873547077179, "learning_rate": 3.2428043347008154e-05, "loss": 1.731, "step": 11139 }, { "epoch": 0.6209241402374449, "grad_norm": 0.5248095393180847, "learning_rate": 3.241970738409707e-05, "loss": 1.6321, "step": 11140 }, { "epoch": 0.6209798784906081, "grad_norm": 0.5478214025497437, "learning_rate": 3.241137197873032e-05, "loss": 1.5864, "step": 11141 }, { "epoch": 0.6210356167437713, "grad_norm": 0.6157545447349548, "learning_rate": 3.2403037131172324e-05, "loss": 1.8697, "step": 11142 }, { "epoch": 0.6210913549969344, "grad_norm": 0.5615748167037964, "learning_rate": 3.239470284168739e-05, "loss": 1.7243, "step": 11143 }, { "epoch": 0.6211470932500975, "grad_norm": 0.6518558859825134, "learning_rate": 3.238636911053984e-05, "loss": 1.953, "step": 11144 }, { "epoch": 0.6212028315032607, "grad_norm": 0.518277108669281, "learning_rate": 3.237803593799397e-05, "loss": 1.7371, "step": 11145 }, { "epoch": 0.6212585697564238, "grad_norm": 0.5324394106864929, "learning_rate": 3.2369703324314046e-05, "loss": 1.6465, "step": 11146 }, { "epoch": 0.621314308009587, "grad_norm": 0.5850804448127747, "learning_rate": 3.236137126976435e-05, "loss": 1.7146, "step": 11147 }, { "epoch": 0.6213700462627502, "grad_norm": 0.5877463221549988, "learning_rate": 3.23530397746091e-05, "loss": 1.8071, "step": 11148 }, { "epoch": 0.6214257845159132, "grad_norm": 0.582880973815918, "learning_rate": 3.234470883911255e-05, "loss": 1.6476, "step": 11149 }, { "epoch": 0.6214815227690764, "grad_norm": 0.5952877402305603, "learning_rate": 3.2336378463538907e-05, "loss": 1.7425, "step": 11150 }, { "epoch": 0.6215372610222396, "grad_norm": 0.5596649646759033, "learning_rate": 3.232804864815234e-05, "loss": 1.5382, "step": 11151 }, { "epoch": 0.6215929992754027, "grad_norm": 0.5372732281684875, "learning_rate": 3.2319719393217055e-05, "loss": 1.5486, "step": 11152 }, { "epoch": 0.6216487375285659, "grad_norm": 0.6073440909385681, "learning_rate": 3.231139069899717e-05, "loss": 1.8761, "step": 11153 }, { "epoch": 0.621704475781729, "grad_norm": 0.6037800312042236, "learning_rate": 3.230306256575685e-05, "loss": 1.8006, "step": 11154 }, { "epoch": 0.6217602140348921, "grad_norm": 0.5567789673805237, "learning_rate": 3.2294734993760196e-05, "loss": 1.681, "step": 11155 }, { "epoch": 0.6218159522880553, "grad_norm": 0.5184069275856018, "learning_rate": 3.228640798327131e-05, "loss": 1.6012, "step": 11156 }, { "epoch": 0.6218716905412185, "grad_norm": 0.5890794992446899, "learning_rate": 3.227808153455431e-05, "loss": 1.8304, "step": 11157 }, { "epoch": 0.6219274287943816, "grad_norm": 0.5692599415779114, "learning_rate": 3.226975564787322e-05, "loss": 1.699, "step": 11158 }, { "epoch": 0.6219831670475447, "grad_norm": 0.5787486433982849, "learning_rate": 3.226143032349211e-05, "loss": 1.5854, "step": 11159 }, { "epoch": 0.6220389053007078, "grad_norm": 0.6033738851547241, "learning_rate": 3.225310556167501e-05, "loss": 1.8011, "step": 11160 }, { "epoch": 0.622094643553871, "grad_norm": 0.5922840237617493, "learning_rate": 3.2244781362685937e-05, "loss": 1.8349, "step": 11161 }, { "epoch": 0.6221503818070342, "grad_norm": 0.5423793792724609, "learning_rate": 3.223645772678887e-05, "loss": 1.6352, "step": 11162 }, { "epoch": 0.6222061200601973, "grad_norm": 0.5888994336128235, "learning_rate": 3.2228134654247785e-05, "loss": 1.7301, "step": 11163 }, { "epoch": 0.6222618583133604, "grad_norm": 0.5580011010169983, "learning_rate": 3.2219812145326675e-05, "loss": 1.939, "step": 11164 }, { "epoch": 0.6223175965665236, "grad_norm": 0.5396100878715515, "learning_rate": 3.221149020028944e-05, "loss": 1.5153, "step": 11165 }, { "epoch": 0.6223733348196867, "grad_norm": 0.5858661532402039, "learning_rate": 3.2203168819400045e-05, "loss": 1.7389, "step": 11166 }, { "epoch": 0.6224290730728499, "grad_norm": 0.5977375507354736, "learning_rate": 3.219484800292234e-05, "loss": 1.6778, "step": 11167 }, { "epoch": 0.6224848113260131, "grad_norm": 0.5393698215484619, "learning_rate": 3.2186527751120264e-05, "loss": 1.5979, "step": 11168 }, { "epoch": 0.6225405495791762, "grad_norm": 0.5956501960754395, "learning_rate": 3.2178208064257666e-05, "loss": 1.6583, "step": 11169 }, { "epoch": 0.6225962878323393, "grad_norm": 0.5951047539710999, "learning_rate": 3.2169888942598395e-05, "loss": 1.8095, "step": 11170 }, { "epoch": 0.6226520260855025, "grad_norm": 0.5699685215950012, "learning_rate": 3.2161570386406305e-05, "loss": 1.7863, "step": 11171 }, { "epoch": 0.6227077643386656, "grad_norm": 0.5904039144515991, "learning_rate": 3.2153252395945176e-05, "loss": 1.5287, "step": 11172 }, { "epoch": 0.6227635025918288, "grad_norm": 0.5787484645843506, "learning_rate": 3.214493497147885e-05, "loss": 1.5626, "step": 11173 }, { "epoch": 0.622819240844992, "grad_norm": 0.5047122836112976, "learning_rate": 3.2136618113271055e-05, "loss": 1.3227, "step": 11174 }, { "epoch": 0.622874979098155, "grad_norm": 0.5570552349090576, "learning_rate": 3.2128301821585616e-05, "loss": 1.4615, "step": 11175 }, { "epoch": 0.6229307173513182, "grad_norm": 0.5824396014213562, "learning_rate": 3.2119986096686215e-05, "loss": 1.5484, "step": 11176 }, { "epoch": 0.6229864556044814, "grad_norm": 0.5634551048278809, "learning_rate": 3.211167093883661e-05, "loss": 1.3652, "step": 11177 }, { "epoch": 0.6230421938576445, "grad_norm": 0.5389364957809448, "learning_rate": 3.2103356348300525e-05, "loss": 1.7177, "step": 11178 }, { "epoch": 0.6230979321108077, "grad_norm": 0.5723541975021362, "learning_rate": 3.2095042325341626e-05, "loss": 1.6482, "step": 11179 }, { "epoch": 0.6231536703639708, "grad_norm": 0.5335341691970825, "learning_rate": 3.2086728870223594e-05, "loss": 1.6545, "step": 11180 }, { "epoch": 0.6232094086171339, "grad_norm": 0.6035029292106628, "learning_rate": 3.207841598321007e-05, "loss": 1.9482, "step": 11181 }, { "epoch": 0.6232651468702971, "grad_norm": 0.5398215055465698, "learning_rate": 3.207010366456469e-05, "loss": 1.4994, "step": 11182 }, { "epoch": 0.6233208851234602, "grad_norm": 0.5689934492111206, "learning_rate": 3.206179191455111e-05, "loss": 1.6828, "step": 11183 }, { "epoch": 0.6233766233766234, "grad_norm": 0.5845012068748474, "learning_rate": 3.2053480733432886e-05, "loss": 1.66, "step": 11184 }, { "epoch": 0.6234323616297865, "grad_norm": 0.6214088797569275, "learning_rate": 3.204517012147363e-05, "loss": 1.7498, "step": 11185 }, { "epoch": 0.6234880998829496, "grad_norm": 0.5731697678565979, "learning_rate": 3.2036860078936886e-05, "loss": 1.7147, "step": 11186 }, { "epoch": 0.6235438381361128, "grad_norm": 0.5301964282989502, "learning_rate": 3.2028550606086216e-05, "loss": 1.5002, "step": 11187 }, { "epoch": 0.623599576389276, "grad_norm": 0.5406346321105957, "learning_rate": 3.202024170318513e-05, "loss": 1.6626, "step": 11188 }, { "epoch": 0.6236553146424391, "grad_norm": 0.5429883599281311, "learning_rate": 3.201193337049714e-05, "loss": 1.6432, "step": 11189 }, { "epoch": 0.6237110528956022, "grad_norm": 0.5684347748756409, "learning_rate": 3.2003625608285776e-05, "loss": 1.6593, "step": 11190 }, { "epoch": 0.6237667911487654, "grad_norm": 0.6120270490646362, "learning_rate": 3.199531841681445e-05, "loss": 1.7227, "step": 11191 }, { "epoch": 0.6238225294019285, "grad_norm": 0.5948870778083801, "learning_rate": 3.198701179634668e-05, "loss": 1.663, "step": 11192 }, { "epoch": 0.6238782676550917, "grad_norm": 0.5670180320739746, "learning_rate": 3.197870574714584e-05, "loss": 1.3727, "step": 11193 }, { "epoch": 0.6239340059082549, "grad_norm": 0.5460881590843201, "learning_rate": 3.197040026947541e-05, "loss": 1.7794, "step": 11194 }, { "epoch": 0.623989744161418, "grad_norm": 0.5323398113250732, "learning_rate": 3.196209536359874e-05, "loss": 1.7328, "step": 11195 }, { "epoch": 0.6240454824145811, "grad_norm": 0.5424444675445557, "learning_rate": 3.195379102977925e-05, "loss": 1.621, "step": 11196 }, { "epoch": 0.6241012206677443, "grad_norm": 0.5800326466560364, "learning_rate": 3.19454872682803e-05, "loss": 1.6368, "step": 11197 }, { "epoch": 0.6241569589209074, "grad_norm": 0.5419188737869263, "learning_rate": 3.193718407936521e-05, "loss": 1.4724, "step": 11198 }, { "epoch": 0.6242126971740706, "grad_norm": 0.5642205476760864, "learning_rate": 3.192888146329734e-05, "loss": 1.5669, "step": 11199 }, { "epoch": 0.6242684354272338, "grad_norm": 0.6043959856033325, "learning_rate": 3.192057942033997e-05, "loss": 1.904, "step": 11200 }, { "epoch": 0.6243241736803968, "grad_norm": 0.5266156196594238, "learning_rate": 3.191227795075644e-05, "loss": 1.4223, "step": 11201 }, { "epoch": 0.62437991193356, "grad_norm": 0.5283826589584351, "learning_rate": 3.190397705480997e-05, "loss": 1.6111, "step": 11202 }, { "epoch": 0.6244356501867232, "grad_norm": 0.6343064308166504, "learning_rate": 3.189567673276385e-05, "loss": 2.0359, "step": 11203 }, { "epoch": 0.6244913884398863, "grad_norm": 0.5972675085067749, "learning_rate": 3.1887376984881315e-05, "loss": 1.6094, "step": 11204 }, { "epoch": 0.6245471266930495, "grad_norm": 0.5392424464225769, "learning_rate": 3.187907781142556e-05, "loss": 1.581, "step": 11205 }, { "epoch": 0.6246028649462125, "grad_norm": 0.5838165283203125, "learning_rate": 3.187077921265983e-05, "loss": 1.4707, "step": 11206 }, { "epoch": 0.6246586031993757, "grad_norm": 0.5730239152908325, "learning_rate": 3.186248118884726e-05, "loss": 1.6216, "step": 11207 }, { "epoch": 0.6247143414525389, "grad_norm": 0.5531439185142517, "learning_rate": 3.185418374025104e-05, "loss": 1.5112, "step": 11208 }, { "epoch": 0.624770079705702, "grad_norm": 0.5780851244926453, "learning_rate": 3.184588686713432e-05, "loss": 1.8131, "step": 11209 }, { "epoch": 0.6248258179588652, "grad_norm": 0.6342042684555054, "learning_rate": 3.183759056976022e-05, "loss": 1.6177, "step": 11210 }, { "epoch": 0.6248815562120283, "grad_norm": 0.5741910338401794, "learning_rate": 3.1829294848391867e-05, "loss": 1.8943, "step": 11211 }, { "epoch": 0.6249372944651914, "grad_norm": 0.5274877548217773, "learning_rate": 3.182099970329232e-05, "loss": 1.4587, "step": 11212 }, { "epoch": 0.6249930327183546, "grad_norm": 0.573377251625061, "learning_rate": 3.181270513472469e-05, "loss": 1.7161, "step": 11213 }, { "epoch": 0.6250487709715178, "grad_norm": 0.5359188318252563, "learning_rate": 3.1804411142952e-05, "loss": 1.6094, "step": 11214 }, { "epoch": 0.6251045092246809, "grad_norm": 0.5997651219367981, "learning_rate": 3.179611772823729e-05, "loss": 1.8517, "step": 11215 }, { "epoch": 0.625160247477844, "grad_norm": 0.536719024181366, "learning_rate": 3.178782489084362e-05, "loss": 1.4891, "step": 11216 }, { "epoch": 0.6252159857310072, "grad_norm": 0.5246587991714478, "learning_rate": 3.177953263103394e-05, "loss": 1.5387, "step": 11217 }, { "epoch": 0.6252717239841703, "grad_norm": 0.6258318424224854, "learning_rate": 3.177124094907127e-05, "loss": 1.8772, "step": 11218 }, { "epoch": 0.6253274622373335, "grad_norm": 0.5783872604370117, "learning_rate": 3.176294984521852e-05, "loss": 1.6118, "step": 11219 }, { "epoch": 0.6253832004904967, "grad_norm": 0.5509163737297058, "learning_rate": 3.175465931973871e-05, "loss": 1.5575, "step": 11220 }, { "epoch": 0.6254389387436597, "grad_norm": 0.5583782196044922, "learning_rate": 3.174636937289471e-05, "loss": 1.8273, "step": 11221 }, { "epoch": 0.6254946769968229, "grad_norm": 0.5733713507652283, "learning_rate": 3.173808000494944e-05, "loss": 1.7487, "step": 11222 }, { "epoch": 0.6255504152499861, "grad_norm": 0.5729860067367554, "learning_rate": 3.1729791216165814e-05, "loss": 1.7391, "step": 11223 }, { "epoch": 0.6256061535031492, "grad_norm": 0.6327353715896606, "learning_rate": 3.172150300680669e-05, "loss": 1.9217, "step": 11224 }, { "epoch": 0.6256618917563124, "grad_norm": 0.6311041116714478, "learning_rate": 3.171321537713491e-05, "loss": 1.9327, "step": 11225 }, { "epoch": 0.6257176300094756, "grad_norm": 0.5261319279670715, "learning_rate": 3.1704928327413307e-05, "loss": 1.6966, "step": 11226 }, { "epoch": 0.6257733682626386, "grad_norm": 0.583069384098053, "learning_rate": 3.169664185790474e-05, "loss": 1.664, "step": 11227 }, { "epoch": 0.6258291065158018, "grad_norm": 0.5649895668029785, "learning_rate": 3.1688355968871945e-05, "loss": 1.5542, "step": 11228 }, { "epoch": 0.6258848447689649, "grad_norm": 0.5580496191978455, "learning_rate": 3.1680070660577746e-05, "loss": 1.6896, "step": 11229 }, { "epoch": 0.6259405830221281, "grad_norm": 0.6010125875473022, "learning_rate": 3.1671785933284906e-05, "loss": 1.6811, "step": 11230 }, { "epoch": 0.6259963212752913, "grad_norm": 0.5710118412971497, "learning_rate": 3.166350178725615e-05, "loss": 1.7108, "step": 11231 }, { "epoch": 0.6260520595284543, "grad_norm": 0.5758123397827148, "learning_rate": 3.16552182227542e-05, "loss": 1.6537, "step": 11232 }, { "epoch": 0.6261077977816175, "grad_norm": 0.5612704753875732, "learning_rate": 3.164693524004178e-05, "loss": 1.5966, "step": 11233 }, { "epoch": 0.6261635360347807, "grad_norm": 0.5761590600013733, "learning_rate": 3.1638652839381544e-05, "loss": 1.8528, "step": 11234 }, { "epoch": 0.6262192742879438, "grad_norm": 0.5486272573471069, "learning_rate": 3.1630371021036214e-05, "loss": 1.7224, "step": 11235 }, { "epoch": 0.626275012541107, "grad_norm": 0.5285595059394836, "learning_rate": 3.16220897852684e-05, "loss": 1.5045, "step": 11236 }, { "epoch": 0.6263307507942701, "grad_norm": 0.533839225769043, "learning_rate": 3.1613809132340756e-05, "loss": 1.6119, "step": 11237 }, { "epoch": 0.6263864890474332, "grad_norm": 0.6031431555747986, "learning_rate": 3.1605529062515884e-05, "loss": 1.7967, "step": 11238 }, { "epoch": 0.6264422273005964, "grad_norm": 0.5195392370223999, "learning_rate": 3.1597249576056384e-05, "loss": 1.4543, "step": 11239 }, { "epoch": 0.6264979655537596, "grad_norm": 0.5455713868141174, "learning_rate": 3.1588970673224826e-05, "loss": 1.6086, "step": 11240 }, { "epoch": 0.6265537038069227, "grad_norm": 0.5660552382469177, "learning_rate": 3.158069235428377e-05, "loss": 1.8036, "step": 11241 }, { "epoch": 0.6266094420600858, "grad_norm": 0.5812910199165344, "learning_rate": 3.157241461949578e-05, "loss": 1.5898, "step": 11242 }, { "epoch": 0.626665180313249, "grad_norm": 0.556128978729248, "learning_rate": 3.1564137469123336e-05, "loss": 1.5166, "step": 11243 }, { "epoch": 0.6267209185664121, "grad_norm": 0.5934070348739624, "learning_rate": 3.155586090342898e-05, "loss": 1.9267, "step": 11244 }, { "epoch": 0.6267766568195753, "grad_norm": 0.5680559873580933, "learning_rate": 3.1547584922675163e-05, "loss": 1.7181, "step": 11245 }, { "epoch": 0.6268323950727385, "grad_norm": 0.6229578256607056, "learning_rate": 3.1539309527124394e-05, "loss": 1.7861, "step": 11246 }, { "epoch": 0.6268881333259015, "grad_norm": 0.549788236618042, "learning_rate": 3.153103471703907e-05, "loss": 1.7002, "step": 11247 }, { "epoch": 0.6269438715790647, "grad_norm": 0.5750143527984619, "learning_rate": 3.1522760492681647e-05, "loss": 1.6348, "step": 11248 }, { "epoch": 0.6269996098322279, "grad_norm": 0.5771127939224243, "learning_rate": 3.151448685431454e-05, "loss": 1.6681, "step": 11249 }, { "epoch": 0.627055348085391, "grad_norm": 0.5842124223709106, "learning_rate": 3.150621380220011e-05, "loss": 1.6242, "step": 11250 }, { "epoch": 0.6271110863385542, "grad_norm": 0.5503714084625244, "learning_rate": 3.149794133660079e-05, "loss": 1.5461, "step": 11251 }, { "epoch": 0.6271668245917172, "grad_norm": 0.5330989956855774, "learning_rate": 3.148966945777886e-05, "loss": 1.6535, "step": 11252 }, { "epoch": 0.6272225628448804, "grad_norm": 0.5222387909889221, "learning_rate": 3.148139816599672e-05, "loss": 1.5138, "step": 11253 }, { "epoch": 0.6272783010980436, "grad_norm": 0.5381698608398438, "learning_rate": 3.147312746151664e-05, "loss": 1.546, "step": 11254 }, { "epoch": 0.6273340393512067, "grad_norm": 0.6186235547065735, "learning_rate": 3.1464857344600935e-05, "loss": 1.6739, "step": 11255 }, { "epoch": 0.6273897776043699, "grad_norm": 0.5606537461280823, "learning_rate": 3.145658781551191e-05, "loss": 1.5668, "step": 11256 }, { "epoch": 0.627445515857533, "grad_norm": 0.5646564364433289, "learning_rate": 3.144831887451178e-05, "loss": 1.545, "step": 11257 }, { "epoch": 0.6275012541106961, "grad_norm": 0.595557689666748, "learning_rate": 3.144005052186283e-05, "loss": 1.6925, "step": 11258 }, { "epoch": 0.6275569923638593, "grad_norm": 0.5523800253868103, "learning_rate": 3.1431782757827256e-05, "loss": 1.5535, "step": 11259 }, { "epoch": 0.6276127306170225, "grad_norm": 0.559516191482544, "learning_rate": 3.142351558266726e-05, "loss": 1.5023, "step": 11260 }, { "epoch": 0.6276684688701856, "grad_norm": 0.5421967506408691, "learning_rate": 3.1415248996645056e-05, "loss": 1.4871, "step": 11261 }, { "epoch": 0.6277242071233488, "grad_norm": 0.6028934717178345, "learning_rate": 3.1406983000022795e-05, "loss": 1.79, "step": 11262 }, { "epoch": 0.6277799453765119, "grad_norm": 0.5425928235054016, "learning_rate": 3.1398717593062635e-05, "loss": 1.7054, "step": 11263 }, { "epoch": 0.627835683629675, "grad_norm": 0.6035993099212646, "learning_rate": 3.139045277602669e-05, "loss": 1.7227, "step": 11264 }, { "epoch": 0.6278914218828382, "grad_norm": 0.6094220876693726, "learning_rate": 3.138218854917709e-05, "loss": 1.7799, "step": 11265 }, { "epoch": 0.6279471601360014, "grad_norm": 0.6169310808181763, "learning_rate": 3.137392491277592e-05, "loss": 1.8094, "step": 11266 }, { "epoch": 0.6280028983891645, "grad_norm": 0.47607964277267456, "learning_rate": 3.1365661867085236e-05, "loss": 1.2856, "step": 11267 }, { "epoch": 0.6280586366423276, "grad_norm": 0.6014509797096252, "learning_rate": 3.135739941236714e-05, "loss": 1.8226, "step": 11268 }, { "epoch": 0.6281143748954908, "grad_norm": 0.5377684235572815, "learning_rate": 3.134913754888362e-05, "loss": 1.4785, "step": 11269 }, { "epoch": 0.6281701131486539, "grad_norm": 0.5159873366355896, "learning_rate": 3.134087627689672e-05, "loss": 1.7147, "step": 11270 }, { "epoch": 0.6282258514018171, "grad_norm": 0.523975133895874, "learning_rate": 3.133261559666843e-05, "loss": 1.6772, "step": 11271 }, { "epoch": 0.6282815896549803, "grad_norm": 0.5608593821525574, "learning_rate": 3.132435550846076e-05, "loss": 1.5868, "step": 11272 }, { "epoch": 0.6283373279081433, "grad_norm": 0.6289455890655518, "learning_rate": 3.1316096012535626e-05, "loss": 1.8946, "step": 11273 }, { "epoch": 0.6283930661613065, "grad_norm": 0.5686862468719482, "learning_rate": 3.130783710915498e-05, "loss": 1.6757, "step": 11274 }, { "epoch": 0.6284488044144696, "grad_norm": 0.55696702003479, "learning_rate": 3.129957879858078e-05, "loss": 1.4474, "step": 11275 }, { "epoch": 0.6285045426676328, "grad_norm": 0.5241310596466064, "learning_rate": 3.1291321081074884e-05, "loss": 1.5844, "step": 11276 }, { "epoch": 0.628560280920796, "grad_norm": 0.5064421892166138, "learning_rate": 3.1283063956899244e-05, "loss": 1.4043, "step": 11277 }, { "epoch": 0.628616019173959, "grad_norm": 0.5911862850189209, "learning_rate": 3.127480742631565e-05, "loss": 1.7536, "step": 11278 }, { "epoch": 0.6286717574271222, "grad_norm": 0.619687557220459, "learning_rate": 3.126655148958602e-05, "loss": 2.0976, "step": 11279 }, { "epoch": 0.6287274956802854, "grad_norm": 0.573886513710022, "learning_rate": 3.125829614697213e-05, "loss": 1.6716, "step": 11280 }, { "epoch": 0.6287832339334485, "grad_norm": 0.5493733882904053, "learning_rate": 3.125004139873582e-05, "loss": 1.7497, "step": 11281 }, { "epoch": 0.6288389721866117, "grad_norm": 0.5901930332183838, "learning_rate": 3.1241787245138884e-05, "loss": 1.7312, "step": 11282 }, { "epoch": 0.6288947104397749, "grad_norm": 0.5369457602500916, "learning_rate": 3.123353368644307e-05, "loss": 1.5331, "step": 11283 }, { "epoch": 0.6289504486929379, "grad_norm": 0.5471475124359131, "learning_rate": 3.1225280722910175e-05, "loss": 1.6681, "step": 11284 }, { "epoch": 0.6290061869461011, "grad_norm": 0.6188231706619263, "learning_rate": 3.12170283548019e-05, "loss": 1.7812, "step": 11285 }, { "epoch": 0.6290619251992643, "grad_norm": 0.5695561170578003, "learning_rate": 3.120877658237998e-05, "loss": 1.7624, "step": 11286 }, { "epoch": 0.6291176634524274, "grad_norm": 0.5495535135269165, "learning_rate": 3.120052540590609e-05, "loss": 1.5895, "step": 11287 }, { "epoch": 0.6291734017055906, "grad_norm": 0.5273025035858154, "learning_rate": 3.1192274825641935e-05, "loss": 1.5756, "step": 11288 }, { "epoch": 0.6292291399587537, "grad_norm": 0.6200233697891235, "learning_rate": 3.118402484184917e-05, "loss": 1.9689, "step": 11289 }, { "epoch": 0.6292848782119168, "grad_norm": 0.527696967124939, "learning_rate": 3.1175775454789424e-05, "loss": 1.422, "step": 11290 }, { "epoch": 0.62934061646508, "grad_norm": 0.5265816450119019, "learning_rate": 3.1167526664724346e-05, "loss": 1.5129, "step": 11291 }, { "epoch": 0.6293963547182432, "grad_norm": 0.5591835379600525, "learning_rate": 3.11592784719155e-05, "loss": 1.5274, "step": 11292 }, { "epoch": 0.6294520929714063, "grad_norm": 0.5992676615715027, "learning_rate": 3.1151030876624486e-05, "loss": 1.7378, "step": 11293 }, { "epoch": 0.6295078312245694, "grad_norm": 0.5295802354812622, "learning_rate": 3.1142783879112914e-05, "loss": 1.6353, "step": 11294 }, { "epoch": 0.6295635694777326, "grad_norm": 0.5344937443733215, "learning_rate": 3.113453747964225e-05, "loss": 1.6101, "step": 11295 }, { "epoch": 0.6296193077308957, "grad_norm": 0.5788000822067261, "learning_rate": 3.112629167847409e-05, "loss": 1.6695, "step": 11296 }, { "epoch": 0.6296750459840589, "grad_norm": 0.5490555763244629, "learning_rate": 3.11180464758699e-05, "loss": 1.4184, "step": 11297 }, { "epoch": 0.629730784237222, "grad_norm": 0.5981817841529846, "learning_rate": 3.1109801872091205e-05, "loss": 1.7076, "step": 11298 }, { "epoch": 0.6297865224903851, "grad_norm": 0.5663672685623169, "learning_rate": 3.1101557867399444e-05, "loss": 1.8046, "step": 11299 }, { "epoch": 0.6298422607435483, "grad_norm": 0.5466318726539612, "learning_rate": 3.109331446205608e-05, "loss": 1.6712, "step": 11300 }, { "epoch": 0.6298979989967114, "grad_norm": 0.5660746693611145, "learning_rate": 3.108507165632256e-05, "loss": 1.7935, "step": 11301 }, { "epoch": 0.6299537372498746, "grad_norm": 0.539685070514679, "learning_rate": 3.1076829450460266e-05, "loss": 1.4931, "step": 11302 }, { "epoch": 0.6300094755030378, "grad_norm": 0.561718761920929, "learning_rate": 3.106858784473064e-05, "loss": 1.5466, "step": 11303 }, { "epoch": 0.6300652137562008, "grad_norm": 0.5737816095352173, "learning_rate": 3.1060346839395e-05, "loss": 1.5863, "step": 11304 }, { "epoch": 0.630120952009364, "grad_norm": 0.5613131523132324, "learning_rate": 3.105210643471476e-05, "loss": 1.5272, "step": 11305 }, { "epoch": 0.6301766902625272, "grad_norm": 0.571135938167572, "learning_rate": 3.10438666309512e-05, "loss": 1.6595, "step": 11306 }, { "epoch": 0.6302324285156903, "grad_norm": 0.5821939706802368, "learning_rate": 3.103562742836569e-05, "loss": 1.7253, "step": 11307 }, { "epoch": 0.6302881667688535, "grad_norm": 0.5542194843292236, "learning_rate": 3.1027388827219506e-05, "loss": 1.7749, "step": 11308 }, { "epoch": 0.6303439050220166, "grad_norm": 0.5321241021156311, "learning_rate": 3.1019150827773925e-05, "loss": 1.6484, "step": 11309 }, { "epoch": 0.6303996432751797, "grad_norm": 0.5949715971946716, "learning_rate": 3.1010913430290224e-05, "loss": 1.6021, "step": 11310 }, { "epoch": 0.6304553815283429, "grad_norm": 0.550311267375946, "learning_rate": 3.100267663502962e-05, "loss": 1.3745, "step": 11311 }, { "epoch": 0.6305111197815061, "grad_norm": 0.5823655724525452, "learning_rate": 3.099444044225336e-05, "loss": 1.6346, "step": 11312 }, { "epoch": 0.6305668580346692, "grad_norm": 0.5521398186683655, "learning_rate": 3.0986204852222626e-05, "loss": 1.7061, "step": 11313 }, { "epoch": 0.6306225962878323, "grad_norm": 0.5518872737884521, "learning_rate": 3.097796986519863e-05, "loss": 1.7118, "step": 11314 }, { "epoch": 0.6306783345409955, "grad_norm": 0.6041616797447205, "learning_rate": 3.096973548144252e-05, "loss": 1.5219, "step": 11315 }, { "epoch": 0.6307340727941586, "grad_norm": 0.632793128490448, "learning_rate": 3.096150170121545e-05, "loss": 1.8662, "step": 11316 }, { "epoch": 0.6307898110473218, "grad_norm": 0.5445522665977478, "learning_rate": 3.0953268524778544e-05, "loss": 1.6059, "step": 11317 }, { "epoch": 0.630845549300485, "grad_norm": 0.5500385165214539, "learning_rate": 3.09450359523929e-05, "loss": 1.7426, "step": 11318 }, { "epoch": 0.630901287553648, "grad_norm": 0.5449601411819458, "learning_rate": 3.093680398431962e-05, "loss": 1.6988, "step": 11319 }, { "epoch": 0.6309570258068112, "grad_norm": 0.5738338232040405, "learning_rate": 3.0928572620819786e-05, "loss": 1.6672, "step": 11320 }, { "epoch": 0.6310127640599743, "grad_norm": 0.5188368558883667, "learning_rate": 3.092034186215441e-05, "loss": 1.3523, "step": 11321 }, { "epoch": 0.6310685023131375, "grad_norm": 0.5617424845695496, "learning_rate": 3.091211170858457e-05, "loss": 1.6872, "step": 11322 }, { "epoch": 0.6311242405663007, "grad_norm": 0.5671687722206116, "learning_rate": 3.0903882160371246e-05, "loss": 1.7043, "step": 11323 }, { "epoch": 0.6311799788194637, "grad_norm": 0.5423902869224548, "learning_rate": 3.089565321777546e-05, "loss": 1.7498, "step": 11324 }, { "epoch": 0.6312357170726269, "grad_norm": 0.5642695426940918, "learning_rate": 3.088742488105814e-05, "loss": 1.6361, "step": 11325 }, { "epoch": 0.6312914553257901, "grad_norm": 0.5880917906761169, "learning_rate": 3.0879197150480274e-05, "loss": 1.707, "step": 11326 }, { "epoch": 0.6313471935789532, "grad_norm": 0.6160138249397278, "learning_rate": 3.0870970026302813e-05, "loss": 1.8144, "step": 11327 }, { "epoch": 0.6314029318321164, "grad_norm": 0.5911991000175476, "learning_rate": 3.0862743508786626e-05, "loss": 1.8036, "step": 11328 }, { "epoch": 0.6314586700852796, "grad_norm": 0.5307081937789917, "learning_rate": 3.0854517598192666e-05, "loss": 1.666, "step": 11329 }, { "epoch": 0.6315144083384426, "grad_norm": 0.5666818618774414, "learning_rate": 3.084629229478175e-05, "loss": 1.6637, "step": 11330 }, { "epoch": 0.6315701465916058, "grad_norm": 0.5223289728164673, "learning_rate": 3.083806759881479e-05, "loss": 1.597, "step": 11331 }, { "epoch": 0.631625884844769, "grad_norm": 0.5430996417999268, "learning_rate": 3.0829843510552604e-05, "loss": 1.6464, "step": 11332 }, { "epoch": 0.6316816230979321, "grad_norm": 0.555894672870636, "learning_rate": 3.0821620030256e-05, "loss": 1.5438, "step": 11333 }, { "epoch": 0.6317373613510953, "grad_norm": 0.5556870698928833, "learning_rate": 3.0813397158185806e-05, "loss": 1.7269, "step": 11334 }, { "epoch": 0.6317930996042584, "grad_norm": 0.5671871900558472, "learning_rate": 3.0805174894602775e-05, "loss": 1.6349, "step": 11335 }, { "epoch": 0.6318488378574215, "grad_norm": 0.5389631986618042, "learning_rate": 3.0796953239767693e-05, "loss": 1.6013, "step": 11336 }, { "epoch": 0.6319045761105847, "grad_norm": 0.543947160243988, "learning_rate": 3.078873219394127e-05, "loss": 1.7542, "step": 11337 }, { "epoch": 0.6319603143637479, "grad_norm": 0.587973415851593, "learning_rate": 3.078051175738429e-05, "loss": 1.8181, "step": 11338 }, { "epoch": 0.632016052616911, "grad_norm": 0.5861559510231018, "learning_rate": 3.0772291930357386e-05, "loss": 1.6423, "step": 11339 }, { "epoch": 0.6320717908700741, "grad_norm": 0.5492725968360901, "learning_rate": 3.076407271312129e-05, "loss": 1.5643, "step": 11340 }, { "epoch": 0.6321275291232373, "grad_norm": 0.5426955819129944, "learning_rate": 3.075585410593666e-05, "loss": 1.609, "step": 11341 }, { "epoch": 0.6321832673764004, "grad_norm": 0.5526770353317261, "learning_rate": 3.074763610906413e-05, "loss": 1.9504, "step": 11342 }, { "epoch": 0.6322390056295636, "grad_norm": 0.6021462082862854, "learning_rate": 3.073941872276434e-05, "loss": 1.704, "step": 11343 }, { "epoch": 0.6322947438827267, "grad_norm": 0.6182892322540283, "learning_rate": 3.073120194729788e-05, "loss": 1.8544, "step": 11344 }, { "epoch": 0.6323504821358898, "grad_norm": 0.5577238202095032, "learning_rate": 3.072298578292534e-05, "loss": 1.6185, "step": 11345 }, { "epoch": 0.632406220389053, "grad_norm": 0.5607499480247498, "learning_rate": 3.071477022990734e-05, "loss": 1.764, "step": 11346 }, { "epoch": 0.6324619586422161, "grad_norm": 0.5537651777267456, "learning_rate": 3.070655528850435e-05, "loss": 1.6142, "step": 11347 }, { "epoch": 0.6325176968953793, "grad_norm": 0.5657694935798645, "learning_rate": 3.0698340958976943e-05, "loss": 1.6187, "step": 11348 }, { "epoch": 0.6325734351485425, "grad_norm": 0.551733672618866, "learning_rate": 3.069012724158563e-05, "loss": 1.5745, "step": 11349 }, { "epoch": 0.6326291734017055, "grad_norm": 0.5896459221839905, "learning_rate": 3.068191413659091e-05, "loss": 1.9964, "step": 11350 }, { "epoch": 0.6326849116548687, "grad_norm": 0.5522114634513855, "learning_rate": 3.067370164425322e-05, "loss": 1.5707, "step": 11351 }, { "epoch": 0.6327406499080319, "grad_norm": 0.6097670793533325, "learning_rate": 3.066548976483304e-05, "loss": 1.8577, "step": 11352 }, { "epoch": 0.632796388161195, "grad_norm": 0.6086198091506958, "learning_rate": 3.06572784985908e-05, "loss": 1.6431, "step": 11353 }, { "epoch": 0.6328521264143582, "grad_norm": 0.5401943325996399, "learning_rate": 3.0649067845786895e-05, "loss": 1.5172, "step": 11354 }, { "epoch": 0.6329078646675214, "grad_norm": 0.5912280678749084, "learning_rate": 3.0640857806681764e-05, "loss": 1.8141, "step": 11355 }, { "epoch": 0.6329636029206844, "grad_norm": 0.577514111995697, "learning_rate": 3.0632648381535725e-05, "loss": 1.679, "step": 11356 }, { "epoch": 0.6330193411738476, "grad_norm": 0.5429527163505554, "learning_rate": 3.062443957060918e-05, "loss": 1.5311, "step": 11357 }, { "epoch": 0.6330750794270108, "grad_norm": 0.5673772096633911, "learning_rate": 3.061623137416243e-05, "loss": 1.6186, "step": 11358 }, { "epoch": 0.6331308176801739, "grad_norm": 0.5661761164665222, "learning_rate": 3.060802379245581e-05, "loss": 1.7088, "step": 11359 }, { "epoch": 0.6331865559333371, "grad_norm": 0.5400183796882629, "learning_rate": 3.059981682574961e-05, "loss": 1.5318, "step": 11360 }, { "epoch": 0.6332422941865002, "grad_norm": 0.5342452526092529, "learning_rate": 3.059161047430411e-05, "loss": 1.5, "step": 11361 }, { "epoch": 0.6332980324396633, "grad_norm": 0.5756200551986694, "learning_rate": 3.058340473837958e-05, "loss": 1.6703, "step": 11362 }, { "epoch": 0.6333537706928265, "grad_norm": 0.5399934649467468, "learning_rate": 3.057519961823622e-05, "loss": 1.42, "step": 11363 }, { "epoch": 0.6334095089459897, "grad_norm": 0.6465393304824829, "learning_rate": 3.05669951141343e-05, "loss": 1.8826, "step": 11364 }, { "epoch": 0.6334652471991528, "grad_norm": 0.6080984473228455, "learning_rate": 3.055879122633397e-05, "loss": 1.7671, "step": 11365 }, { "epoch": 0.6335209854523159, "grad_norm": 0.6007773280143738, "learning_rate": 3.055058795509544e-05, "loss": 1.7308, "step": 11366 }, { "epoch": 0.633576723705479, "grad_norm": 0.6644430756568909, "learning_rate": 3.0542385300678875e-05, "loss": 1.8272, "step": 11367 }, { "epoch": 0.6336324619586422, "grad_norm": 0.4975641369819641, "learning_rate": 3.053418326334438e-05, "loss": 1.3861, "step": 11368 }, { "epoch": 0.6336882002118054, "grad_norm": 0.6592350006103516, "learning_rate": 3.052598184335212e-05, "loss": 2.0965, "step": 11369 }, { "epoch": 0.6337439384649685, "grad_norm": 0.4995543360710144, "learning_rate": 3.0517781040962163e-05, "loss": 1.5337, "step": 11370 }, { "epoch": 0.6337996767181316, "grad_norm": 0.548430860042572, "learning_rate": 3.0509580856434595e-05, "loss": 1.6827, "step": 11371 }, { "epoch": 0.6338554149712948, "grad_norm": 0.5827524662017822, "learning_rate": 3.0501381290029506e-05, "loss": 1.6712, "step": 11372 }, { "epoch": 0.6339111532244579, "grad_norm": 0.5682417750358582, "learning_rate": 3.049318234200689e-05, "loss": 1.7436, "step": 11373 }, { "epoch": 0.6339668914776211, "grad_norm": 0.5213860273361206, "learning_rate": 3.048498401262683e-05, "loss": 1.5253, "step": 11374 }, { "epoch": 0.6340226297307843, "grad_norm": 0.5296808481216431, "learning_rate": 3.0476786302149274e-05, "loss": 1.6458, "step": 11375 }, { "epoch": 0.6340783679839473, "grad_norm": 0.5561826229095459, "learning_rate": 3.0468589210834237e-05, "loss": 1.8774, "step": 11376 }, { "epoch": 0.6341341062371105, "grad_norm": 0.619141697883606, "learning_rate": 3.046039273894167e-05, "loss": 1.7102, "step": 11377 }, { "epoch": 0.6341898444902737, "grad_norm": 0.5776034593582153, "learning_rate": 3.045219688673152e-05, "loss": 1.8709, "step": 11378 }, { "epoch": 0.6342455827434368, "grad_norm": 0.5861794948577881, "learning_rate": 3.044400165446372e-05, "loss": 1.6028, "step": 11379 }, { "epoch": 0.6343013209966, "grad_norm": 0.5809794068336487, "learning_rate": 3.043580704239815e-05, "loss": 1.6706, "step": 11380 }, { "epoch": 0.6343570592497632, "grad_norm": 0.4941370189189911, "learning_rate": 3.0427613050794745e-05, "loss": 1.3136, "step": 11381 }, { "epoch": 0.6344127975029262, "grad_norm": 0.5624459385871887, "learning_rate": 3.0419419679913307e-05, "loss": 1.7069, "step": 11382 }, { "epoch": 0.6344685357560894, "grad_norm": 0.6081975698471069, "learning_rate": 3.0411226930013735e-05, "loss": 1.9564, "step": 11383 }, { "epoch": 0.6345242740092526, "grad_norm": 0.5885438919067383, "learning_rate": 3.0403034801355828e-05, "loss": 1.655, "step": 11384 }, { "epoch": 0.6345800122624157, "grad_norm": 0.5063520073890686, "learning_rate": 3.0394843294199395e-05, "loss": 1.4787, "step": 11385 }, { "epoch": 0.6346357505155789, "grad_norm": 0.5593530535697937, "learning_rate": 3.038665240880424e-05, "loss": 1.6266, "step": 11386 }, { "epoch": 0.634691488768742, "grad_norm": 0.5777801275253296, "learning_rate": 3.0378462145430102e-05, "loss": 1.8088, "step": 11387 }, { "epoch": 0.6347472270219051, "grad_norm": 0.5216872692108154, "learning_rate": 3.0370272504336762e-05, "loss": 1.5096, "step": 11388 }, { "epoch": 0.6348029652750683, "grad_norm": 0.5875978469848633, "learning_rate": 3.0362083485783897e-05, "loss": 1.9104, "step": 11389 }, { "epoch": 0.6348587035282314, "grad_norm": 0.6326603889465332, "learning_rate": 3.035389509003128e-05, "loss": 1.8119, "step": 11390 }, { "epoch": 0.6349144417813946, "grad_norm": 0.5737482309341431, "learning_rate": 3.0345707317338545e-05, "loss": 1.7255, "step": 11391 }, { "epoch": 0.6349701800345577, "grad_norm": 0.5858984589576721, "learning_rate": 3.0337520167965383e-05, "loss": 1.6893, "step": 11392 }, { "epoch": 0.6350259182877208, "grad_norm": 0.5381850600242615, "learning_rate": 3.0329333642171454e-05, "loss": 1.6162, "step": 11393 }, { "epoch": 0.635081656540884, "grad_norm": 0.597037672996521, "learning_rate": 3.032114774021636e-05, "loss": 1.6565, "step": 11394 }, { "epoch": 0.6351373947940472, "grad_norm": 0.5514940023422241, "learning_rate": 3.031296246235974e-05, "loss": 1.6259, "step": 11395 }, { "epoch": 0.6351931330472103, "grad_norm": 0.5908069014549255, "learning_rate": 3.0304777808861152e-05, "loss": 1.8429, "step": 11396 }, { "epoch": 0.6352488713003734, "grad_norm": 0.5402722954750061, "learning_rate": 3.0296593779980177e-05, "loss": 1.5404, "step": 11397 }, { "epoch": 0.6353046095535366, "grad_norm": 0.6599807739257812, "learning_rate": 3.028841037597639e-05, "loss": 1.6664, "step": 11398 }, { "epoch": 0.6353603478066997, "grad_norm": 0.5322889685630798, "learning_rate": 3.0280227597109272e-05, "loss": 1.4491, "step": 11399 }, { "epoch": 0.6354160860598629, "grad_norm": 0.5631368160247803, "learning_rate": 3.0272045443638386e-05, "loss": 1.7172, "step": 11400 }, { "epoch": 0.6354718243130261, "grad_norm": 0.5631436705589294, "learning_rate": 3.0263863915823182e-05, "loss": 1.6882, "step": 11401 }, { "epoch": 0.6355275625661891, "grad_norm": 0.5329986810684204, "learning_rate": 3.0255683013923154e-05, "loss": 1.5914, "step": 11402 }, { "epoch": 0.6355833008193523, "grad_norm": 0.5338492393493652, "learning_rate": 3.0247502738197735e-05, "loss": 1.5801, "step": 11403 }, { "epoch": 0.6356390390725155, "grad_norm": 0.6237903237342834, "learning_rate": 3.0239323088906357e-05, "loss": 1.6125, "step": 11404 }, { "epoch": 0.6356947773256786, "grad_norm": 0.5942304134368896, "learning_rate": 3.0231144066308463e-05, "loss": 1.8681, "step": 11405 }, { "epoch": 0.6357505155788418, "grad_norm": 0.5691540241241455, "learning_rate": 3.0222965670663394e-05, "loss": 1.6304, "step": 11406 }, { "epoch": 0.635806253832005, "grad_norm": 0.5843005776405334, "learning_rate": 3.021478790223057e-05, "loss": 1.6737, "step": 11407 }, { "epoch": 0.635861992085168, "grad_norm": 0.5377256274223328, "learning_rate": 3.0206610761269293e-05, "loss": 1.3904, "step": 11408 }, { "epoch": 0.6359177303383312, "grad_norm": 0.5730248093605042, "learning_rate": 3.0198434248038933e-05, "loss": 1.5715, "step": 11409 }, { "epoch": 0.6359734685914944, "grad_norm": 0.5647515654563904, "learning_rate": 3.0190258362798783e-05, "loss": 1.5383, "step": 11410 }, { "epoch": 0.6360292068446575, "grad_norm": 0.5440086722373962, "learning_rate": 3.0182083105808134e-05, "loss": 1.4719, "step": 11411 }, { "epoch": 0.6360849450978207, "grad_norm": 0.5645092725753784, "learning_rate": 3.017390847732628e-05, "loss": 1.7294, "step": 11412 }, { "epoch": 0.6361406833509837, "grad_norm": 0.5475958585739136, "learning_rate": 3.016573447761244e-05, "loss": 1.6529, "step": 11413 }, { "epoch": 0.6361964216041469, "grad_norm": 0.5536510944366455, "learning_rate": 3.015756110692587e-05, "loss": 1.3404, "step": 11414 }, { "epoch": 0.6362521598573101, "grad_norm": 0.6730042099952698, "learning_rate": 3.0149388365525756e-05, "loss": 1.726, "step": 11415 }, { "epoch": 0.6363078981104732, "grad_norm": 0.5912729501724243, "learning_rate": 3.0141216253671334e-05, "loss": 1.7075, "step": 11416 }, { "epoch": 0.6363636363636364, "grad_norm": 0.5724123120307922, "learning_rate": 3.013304477162171e-05, "loss": 1.4312, "step": 11417 }, { "epoch": 0.6364193746167995, "grad_norm": 0.558233380317688, "learning_rate": 3.0124873919636098e-05, "loss": 1.6381, "step": 11418 }, { "epoch": 0.6364751128699626, "grad_norm": 0.5994689464569092, "learning_rate": 3.0116703697973604e-05, "loss": 1.669, "step": 11419 }, { "epoch": 0.6365308511231258, "grad_norm": 0.5841171741485596, "learning_rate": 3.0108534106893336e-05, "loss": 1.7477, "step": 11420 }, { "epoch": 0.636586589376289, "grad_norm": 0.5982357263565063, "learning_rate": 3.01003651466544e-05, "loss": 1.6329, "step": 11421 }, { "epoch": 0.636642327629452, "grad_norm": 0.5657229423522949, "learning_rate": 3.009219681751585e-05, "loss": 1.65, "step": 11422 }, { "epoch": 0.6366980658826152, "grad_norm": 0.5730195045471191, "learning_rate": 3.008402911973675e-05, "loss": 1.6053, "step": 11423 }, { "epoch": 0.6367538041357784, "grad_norm": 0.5259706974029541, "learning_rate": 3.0075862053576115e-05, "loss": 1.4455, "step": 11424 }, { "epoch": 0.6368095423889415, "grad_norm": 0.5402256846427917, "learning_rate": 3.006769561929298e-05, "loss": 1.6209, "step": 11425 }, { "epoch": 0.6368652806421047, "grad_norm": 0.5747603178024292, "learning_rate": 3.005952981714633e-05, "loss": 1.5629, "step": 11426 }, { "epoch": 0.6369210188952679, "grad_norm": 0.5934039354324341, "learning_rate": 3.0051364647395132e-05, "loss": 1.6286, "step": 11427 }, { "epoch": 0.6369767571484309, "grad_norm": 0.5477046370506287, "learning_rate": 3.004320011029834e-05, "loss": 1.3619, "step": 11428 }, { "epoch": 0.6370324954015941, "grad_norm": 0.6017884016036987, "learning_rate": 3.003503620611488e-05, "loss": 1.8033, "step": 11429 }, { "epoch": 0.6370882336547573, "grad_norm": 0.5695460438728333, "learning_rate": 3.0026872935103658e-05, "loss": 1.7917, "step": 11430 }, { "epoch": 0.6371439719079204, "grad_norm": 0.5401207208633423, "learning_rate": 3.00187102975236e-05, "loss": 1.5718, "step": 11431 }, { "epoch": 0.6371997101610836, "grad_norm": 0.5520731806755066, "learning_rate": 3.0010548293633527e-05, "loss": 1.6192, "step": 11432 }, { "epoch": 0.6372554484142468, "grad_norm": 0.5545134544372559, "learning_rate": 3.0002386923692344e-05, "loss": 1.7251, "step": 11433 }, { "epoch": 0.6373111866674098, "grad_norm": 0.5655757784843445, "learning_rate": 2.9994226187958823e-05, "loss": 1.6958, "step": 11434 }, { "epoch": 0.637366924920573, "grad_norm": 0.7290306091308594, "learning_rate": 2.9986066086691828e-05, "loss": 1.688, "step": 11435 }, { "epoch": 0.6374226631737361, "grad_norm": 0.5409923195838928, "learning_rate": 2.997790662015012e-05, "loss": 1.4879, "step": 11436 }, { "epoch": 0.6374784014268993, "grad_norm": 0.5542522668838501, "learning_rate": 2.9969747788592472e-05, "loss": 1.7174, "step": 11437 }, { "epoch": 0.6375341396800625, "grad_norm": 0.5595596432685852, "learning_rate": 2.996158959227765e-05, "loss": 1.6473, "step": 11438 }, { "epoch": 0.6375898779332255, "grad_norm": 0.5823233127593994, "learning_rate": 2.995343203146436e-05, "loss": 1.8093, "step": 11439 }, { "epoch": 0.6376456161863887, "grad_norm": 0.5893445014953613, "learning_rate": 2.994527510641134e-05, "loss": 1.5227, "step": 11440 }, { "epoch": 0.6377013544395519, "grad_norm": 0.5489947199821472, "learning_rate": 2.993711881737725e-05, "loss": 1.5738, "step": 11441 }, { "epoch": 0.637757092692715, "grad_norm": 0.5400989055633545, "learning_rate": 2.9928963164620806e-05, "loss": 1.6712, "step": 11442 }, { "epoch": 0.6378128309458782, "grad_norm": 0.5572450160980225, "learning_rate": 2.992080814840059e-05, "loss": 1.3923, "step": 11443 }, { "epoch": 0.6378685691990413, "grad_norm": 0.6122034192085266, "learning_rate": 2.991265376897529e-05, "loss": 1.7614, "step": 11444 }, { "epoch": 0.6379243074522044, "grad_norm": 0.5644296407699585, "learning_rate": 2.99045000266035e-05, "loss": 1.6199, "step": 11445 }, { "epoch": 0.6379800457053676, "grad_norm": 0.5942640900611877, "learning_rate": 2.9896346921543796e-05, "loss": 1.9717, "step": 11446 }, { "epoch": 0.6380357839585308, "grad_norm": 0.6068518757820129, "learning_rate": 2.9888194454054762e-05, "loss": 1.7399, "step": 11447 }, { "epoch": 0.6380915222116939, "grad_norm": 0.6198694705963135, "learning_rate": 2.988004262439493e-05, "loss": 1.7467, "step": 11448 }, { "epoch": 0.638147260464857, "grad_norm": 0.6115100383758545, "learning_rate": 2.9871891432822858e-05, "loss": 1.8546, "step": 11449 }, { "epoch": 0.6382029987180202, "grad_norm": 0.540266752243042, "learning_rate": 2.9863740879597006e-05, "loss": 1.5476, "step": 11450 }, { "epoch": 0.6382587369711833, "grad_norm": 0.5491194725036621, "learning_rate": 2.98555909649759e-05, "loss": 1.5871, "step": 11451 }, { "epoch": 0.6383144752243465, "grad_norm": 0.5302134156227112, "learning_rate": 2.9847441689218014e-05, "loss": 1.592, "step": 11452 }, { "epoch": 0.6383702134775097, "grad_norm": 0.5229141116142273, "learning_rate": 2.9839293052581767e-05, "loss": 1.4722, "step": 11453 }, { "epoch": 0.6384259517306727, "grad_norm": 0.5786263942718506, "learning_rate": 2.983114505532561e-05, "loss": 1.5706, "step": 11454 }, { "epoch": 0.6384816899838359, "grad_norm": 0.5235024690628052, "learning_rate": 2.9822997697707927e-05, "loss": 1.6502, "step": 11455 }, { "epoch": 0.6385374282369991, "grad_norm": 0.5675355792045593, "learning_rate": 2.981485097998711e-05, "loss": 1.7896, "step": 11456 }, { "epoch": 0.6385931664901622, "grad_norm": 0.5436956882476807, "learning_rate": 2.9806704902421555e-05, "loss": 1.5343, "step": 11457 }, { "epoch": 0.6386489047433254, "grad_norm": 0.5603477358818054, "learning_rate": 2.9798559465269564e-05, "loss": 1.6853, "step": 11458 }, { "epoch": 0.6387046429964884, "grad_norm": 0.5103815197944641, "learning_rate": 2.9790414668789514e-05, "loss": 1.3626, "step": 11459 }, { "epoch": 0.6387603812496516, "grad_norm": 0.6653990149497986, "learning_rate": 2.978227051323965e-05, "loss": 1.7424, "step": 11460 }, { "epoch": 0.6388161195028148, "grad_norm": 0.5415511131286621, "learning_rate": 2.9774126998878305e-05, "loss": 1.586, "step": 11461 }, { "epoch": 0.6388718577559779, "grad_norm": 0.5623989701271057, "learning_rate": 2.976598412596372e-05, "loss": 1.7159, "step": 11462 }, { "epoch": 0.6389275960091411, "grad_norm": 0.47628477215766907, "learning_rate": 2.975784189475414e-05, "loss": 1.1537, "step": 11463 }, { "epoch": 0.6389833342623042, "grad_norm": 0.6199833750724792, "learning_rate": 2.974970030550781e-05, "loss": 1.8564, "step": 11464 }, { "epoch": 0.6390390725154673, "grad_norm": 0.5712960362434387, "learning_rate": 2.9741559358482908e-05, "loss": 1.474, "step": 11465 }, { "epoch": 0.6390948107686305, "grad_norm": 0.5492451190948486, "learning_rate": 2.973341905393764e-05, "loss": 1.6722, "step": 11466 }, { "epoch": 0.6391505490217937, "grad_norm": 0.5948915481567383, "learning_rate": 2.972527939213013e-05, "loss": 1.499, "step": 11467 }, { "epoch": 0.6392062872749568, "grad_norm": 0.5698557496070862, "learning_rate": 2.9717140373318587e-05, "loss": 1.7276, "step": 11468 }, { "epoch": 0.63926202552812, "grad_norm": 0.5909801721572876, "learning_rate": 2.9709001997761053e-05, "loss": 1.7654, "step": 11469 }, { "epoch": 0.6393177637812831, "grad_norm": 0.5476438999176025, "learning_rate": 2.970086426571569e-05, "loss": 1.634, "step": 11470 }, { "epoch": 0.6393735020344462, "grad_norm": 0.5444782972335815, "learning_rate": 2.969272717744057e-05, "loss": 1.6631, "step": 11471 }, { "epoch": 0.6394292402876094, "grad_norm": 0.5920711159706116, "learning_rate": 2.9684590733193728e-05, "loss": 1.9441, "step": 11472 }, { "epoch": 0.6394849785407726, "grad_norm": 0.5431917309761047, "learning_rate": 2.9676454933233238e-05, "loss": 1.5584, "step": 11473 }, { "epoch": 0.6395407167939356, "grad_norm": 0.5913118720054626, "learning_rate": 2.9668319777817088e-05, "loss": 1.6103, "step": 11474 }, { "epoch": 0.6395964550470988, "grad_norm": 0.5852524042129517, "learning_rate": 2.966018526720331e-05, "loss": 1.7739, "step": 11475 }, { "epoch": 0.639652193300262, "grad_norm": 0.5839758515357971, "learning_rate": 2.965205140164984e-05, "loss": 1.8033, "step": 11476 }, { "epoch": 0.6397079315534251, "grad_norm": 0.5499829649925232, "learning_rate": 2.9643918181414676e-05, "loss": 1.5886, "step": 11477 }, { "epoch": 0.6397636698065883, "grad_norm": 0.5322203636169434, "learning_rate": 2.963578560675575e-05, "loss": 1.3883, "step": 11478 }, { "epoch": 0.6398194080597515, "grad_norm": 0.5608605146408081, "learning_rate": 2.962765367793096e-05, "loss": 1.764, "step": 11479 }, { "epoch": 0.6398751463129145, "grad_norm": 0.4977636933326721, "learning_rate": 2.9619522395198228e-05, "loss": 1.2644, "step": 11480 }, { "epoch": 0.6399308845660777, "grad_norm": 0.580485999584198, "learning_rate": 2.9611391758815416e-05, "loss": 1.6284, "step": 11481 }, { "epoch": 0.6399866228192408, "grad_norm": 0.5771294832229614, "learning_rate": 2.9603261769040368e-05, "loss": 1.5244, "step": 11482 }, { "epoch": 0.640042361072404, "grad_norm": 0.5638933777809143, "learning_rate": 2.9595132426130968e-05, "loss": 1.662, "step": 11483 }, { "epoch": 0.6400980993255672, "grad_norm": 0.590519368648529, "learning_rate": 2.9587003730344965e-05, "loss": 1.7329, "step": 11484 }, { "epoch": 0.6401538375787302, "grad_norm": 0.5339858531951904, "learning_rate": 2.9578875681940223e-05, "loss": 1.5648, "step": 11485 }, { "epoch": 0.6402095758318934, "grad_norm": 0.5530378818511963, "learning_rate": 2.9570748281174443e-05, "loss": 1.639, "step": 11486 }, { "epoch": 0.6402653140850566, "grad_norm": 0.5428372621536255, "learning_rate": 2.9562621528305445e-05, "loss": 1.5573, "step": 11487 }, { "epoch": 0.6403210523382197, "grad_norm": 0.5383026003837585, "learning_rate": 2.9554495423590924e-05, "loss": 1.6015, "step": 11488 }, { "epoch": 0.6403767905913829, "grad_norm": 0.5547573566436768, "learning_rate": 2.9546369967288594e-05, "loss": 1.6447, "step": 11489 }, { "epoch": 0.640432528844546, "grad_norm": 0.5519043207168579, "learning_rate": 2.9538245159656174e-05, "loss": 1.5088, "step": 11490 }, { "epoch": 0.6404882670977091, "grad_norm": 0.5677748322486877, "learning_rate": 2.9530121000951294e-05, "loss": 1.6811, "step": 11491 }, { "epoch": 0.6405440053508723, "grad_norm": 0.5701718926429749, "learning_rate": 2.952199749143165e-05, "loss": 1.5924, "step": 11492 }, { "epoch": 0.6405997436040355, "grad_norm": 0.5985507369041443, "learning_rate": 2.9513874631354833e-05, "loss": 1.5902, "step": 11493 }, { "epoch": 0.6406554818571986, "grad_norm": 0.5509946346282959, "learning_rate": 2.9505752420978495e-05, "loss": 1.4045, "step": 11494 }, { "epoch": 0.6407112201103617, "grad_norm": 0.5370048880577087, "learning_rate": 2.9497630860560178e-05, "loss": 1.4327, "step": 11495 }, { "epoch": 0.6407669583635249, "grad_norm": 0.5271995663642883, "learning_rate": 2.9489509950357476e-05, "loss": 1.5244, "step": 11496 }, { "epoch": 0.640822696616688, "grad_norm": 0.5988388061523438, "learning_rate": 2.9481389690627943e-05, "loss": 1.7219, "step": 11497 }, { "epoch": 0.6408784348698512, "grad_norm": 0.5371741652488708, "learning_rate": 2.947327008162909e-05, "loss": 1.5458, "step": 11498 }, { "epoch": 0.6409341731230144, "grad_norm": 0.6004182696342468, "learning_rate": 2.946515112361844e-05, "loss": 1.8704, "step": 11499 }, { "epoch": 0.6409899113761774, "grad_norm": 0.5374881029129028, "learning_rate": 2.945703281685346e-05, "loss": 1.6822, "step": 11500 }, { "epoch": 0.6410456496293406, "grad_norm": 0.571029007434845, "learning_rate": 2.944891516159163e-05, "loss": 1.7695, "step": 11501 }, { "epoch": 0.6411013878825038, "grad_norm": 0.5365399122238159, "learning_rate": 2.9440798158090377e-05, "loss": 1.4151, "step": 11502 }, { "epoch": 0.6411571261356669, "grad_norm": 0.574303388595581, "learning_rate": 2.9432681806607143e-05, "loss": 1.7826, "step": 11503 }, { "epoch": 0.6412128643888301, "grad_norm": 0.571097195148468, "learning_rate": 2.9424566107399342e-05, "loss": 1.6817, "step": 11504 }, { "epoch": 0.6412686026419931, "grad_norm": 0.5556220412254333, "learning_rate": 2.9416451060724325e-05, "loss": 1.7257, "step": 11505 }, { "epoch": 0.6413243408951563, "grad_norm": 0.5598143935203552, "learning_rate": 2.9408336666839488e-05, "loss": 1.5757, "step": 11506 }, { "epoch": 0.6413800791483195, "grad_norm": 0.6592652201652527, "learning_rate": 2.940022292600213e-05, "loss": 1.8345, "step": 11507 }, { "epoch": 0.6414358174014826, "grad_norm": 0.5990983843803406, "learning_rate": 2.9392109838469594e-05, "loss": 1.7728, "step": 11508 }, { "epoch": 0.6414915556546458, "grad_norm": 0.5551927089691162, "learning_rate": 2.938399740449922e-05, "loss": 1.4375, "step": 11509 }, { "epoch": 0.641547293907809, "grad_norm": 0.5568867325782776, "learning_rate": 2.937588562434821e-05, "loss": 1.6464, "step": 11510 }, { "epoch": 0.641603032160972, "grad_norm": 0.569223940372467, "learning_rate": 2.936777449827388e-05, "loss": 1.8679, "step": 11511 }, { "epoch": 0.6416587704141352, "grad_norm": 0.6105400323867798, "learning_rate": 2.935966402653344e-05, "loss": 1.8918, "step": 11512 }, { "epoch": 0.6417145086672984, "grad_norm": 0.5622360706329346, "learning_rate": 2.9351554209384125e-05, "loss": 1.6716, "step": 11513 }, { "epoch": 0.6417702469204615, "grad_norm": 0.5488218069076538, "learning_rate": 2.9343445047083117e-05, "loss": 1.7824, "step": 11514 }, { "epoch": 0.6418259851736247, "grad_norm": 0.547471284866333, "learning_rate": 2.933533653988759e-05, "loss": 1.557, "step": 11515 }, { "epoch": 0.6418817234267878, "grad_norm": 0.5255625247955322, "learning_rate": 2.932722868805472e-05, "loss": 1.498, "step": 11516 }, { "epoch": 0.6419374616799509, "grad_norm": 0.5560644268989563, "learning_rate": 2.93191214918416e-05, "loss": 1.628, "step": 11517 }, { "epoch": 0.6419931999331141, "grad_norm": 0.5956060886383057, "learning_rate": 2.93110149515054e-05, "loss": 1.5985, "step": 11518 }, { "epoch": 0.6420489381862773, "grad_norm": 0.5386204719543457, "learning_rate": 2.9302909067303152e-05, "loss": 1.5679, "step": 11519 }, { "epoch": 0.6421046764394404, "grad_norm": 0.5531619787216187, "learning_rate": 2.929480383949198e-05, "loss": 1.8117, "step": 11520 }, { "epoch": 0.6421604146926035, "grad_norm": 0.5731996297836304, "learning_rate": 2.9286699268328887e-05, "loss": 1.6363, "step": 11521 }, { "epoch": 0.6422161529457667, "grad_norm": 0.5966832041740417, "learning_rate": 2.9278595354070937e-05, "loss": 1.7658, "step": 11522 }, { "epoch": 0.6422718911989298, "grad_norm": 0.5194985866546631, "learning_rate": 2.9270492096975134e-05, "loss": 1.4656, "step": 11523 }, { "epoch": 0.642327629452093, "grad_norm": 0.5613593459129333, "learning_rate": 2.9262389497298454e-05, "loss": 1.5639, "step": 11524 }, { "epoch": 0.6423833677052562, "grad_norm": 0.6349266171455383, "learning_rate": 2.9254287555297876e-05, "loss": 1.9102, "step": 11525 }, { "epoch": 0.6424391059584192, "grad_norm": 0.523147702217102, "learning_rate": 2.9246186271230337e-05, "loss": 1.72, "step": 11526 }, { "epoch": 0.6424948442115824, "grad_norm": 0.65010005235672, "learning_rate": 2.9238085645352776e-05, "loss": 1.729, "step": 11527 }, { "epoch": 0.6425505824647455, "grad_norm": 0.5469678640365601, "learning_rate": 2.9229985677922062e-05, "loss": 1.6568, "step": 11528 }, { "epoch": 0.6426063207179087, "grad_norm": 0.5617852210998535, "learning_rate": 2.9221886369195116e-05, "loss": 1.4062, "step": 11529 }, { "epoch": 0.6426620589710719, "grad_norm": 0.500699520111084, "learning_rate": 2.9213787719428805e-05, "loss": 1.5071, "step": 11530 }, { "epoch": 0.6427177972242349, "grad_norm": 0.5502698421478271, "learning_rate": 2.9205689728879936e-05, "loss": 1.572, "step": 11531 }, { "epoch": 0.6427735354773981, "grad_norm": 0.6290270090103149, "learning_rate": 2.919759239780537e-05, "loss": 1.3922, "step": 11532 }, { "epoch": 0.6428292737305613, "grad_norm": 0.5990520715713501, "learning_rate": 2.9189495726461868e-05, "loss": 1.6264, "step": 11533 }, { "epoch": 0.6428850119837244, "grad_norm": 0.5172905921936035, "learning_rate": 2.918139971510624e-05, "loss": 1.4878, "step": 11534 }, { "epoch": 0.6429407502368876, "grad_norm": 0.5496329069137573, "learning_rate": 2.917330436399522e-05, "loss": 1.6821, "step": 11535 }, { "epoch": 0.6429964884900508, "grad_norm": 0.5884643793106079, "learning_rate": 2.9165209673385563e-05, "loss": 1.8271, "step": 11536 }, { "epoch": 0.6430522267432138, "grad_norm": 0.5393579602241516, "learning_rate": 2.9157115643533993e-05, "loss": 1.4832, "step": 11537 }, { "epoch": 0.643107964996377, "grad_norm": 0.5137203335762024, "learning_rate": 2.914902227469718e-05, "loss": 1.5079, "step": 11538 }, { "epoch": 0.6431637032495402, "grad_norm": 0.5596064329147339, "learning_rate": 2.9140929567131815e-05, "loss": 1.5945, "step": 11539 }, { "epoch": 0.6432194415027033, "grad_norm": 0.5552858114242554, "learning_rate": 2.9132837521094535e-05, "loss": 1.598, "step": 11540 }, { "epoch": 0.6432751797558665, "grad_norm": 0.5611785650253296, "learning_rate": 2.9124746136841996e-05, "loss": 1.6551, "step": 11541 }, { "epoch": 0.6433309180090296, "grad_norm": 0.601570188999176, "learning_rate": 2.911665541463079e-05, "loss": 1.5584, "step": 11542 }, { "epoch": 0.6433866562621927, "grad_norm": 0.5507310032844543, "learning_rate": 2.9108565354717522e-05, "loss": 1.7545, "step": 11543 }, { "epoch": 0.6434423945153559, "grad_norm": 0.5532577633857727, "learning_rate": 2.910047595735877e-05, "loss": 1.5668, "step": 11544 }, { "epoch": 0.6434981327685191, "grad_norm": 0.5722034573554993, "learning_rate": 2.9092387222811045e-05, "loss": 1.7969, "step": 11545 }, { "epoch": 0.6435538710216822, "grad_norm": 0.5793879628181458, "learning_rate": 2.9084299151330906e-05, "loss": 1.7327, "step": 11546 }, { "epoch": 0.6436096092748453, "grad_norm": 0.6248428821563721, "learning_rate": 2.9076211743174854e-05, "loss": 1.7837, "step": 11547 }, { "epoch": 0.6436653475280085, "grad_norm": 0.5645349621772766, "learning_rate": 2.9068124998599362e-05, "loss": 1.6744, "step": 11548 }, { "epoch": 0.6437210857811716, "grad_norm": 0.5263849496841431, "learning_rate": 2.9060038917860928e-05, "loss": 1.6584, "step": 11549 }, { "epoch": 0.6437768240343348, "grad_norm": 0.5999687910079956, "learning_rate": 2.9051953501215928e-05, "loss": 1.6557, "step": 11550 }, { "epoch": 0.6438325622874979, "grad_norm": 0.5746318101882935, "learning_rate": 2.9043868748920868e-05, "loss": 1.7061, "step": 11551 }, { "epoch": 0.643888300540661, "grad_norm": 0.553269624710083, "learning_rate": 2.903578466123209e-05, "loss": 1.7217, "step": 11552 }, { "epoch": 0.6439440387938242, "grad_norm": 0.5399090647697449, "learning_rate": 2.902770123840599e-05, "loss": 1.6482, "step": 11553 }, { "epoch": 0.6439997770469873, "grad_norm": 0.5580053925514221, "learning_rate": 2.901961848069894e-05, "loss": 1.5762, "step": 11554 }, { "epoch": 0.6440555153001505, "grad_norm": 0.5592229962348938, "learning_rate": 2.9011536388367256e-05, "loss": 1.5193, "step": 11555 }, { "epoch": 0.6441112535533137, "grad_norm": 0.5572046041488647, "learning_rate": 2.900345496166729e-05, "loss": 1.6022, "step": 11556 }, { "epoch": 0.6441669918064767, "grad_norm": 0.6023865938186646, "learning_rate": 2.8995374200855275e-05, "loss": 1.748, "step": 11557 }, { "epoch": 0.6442227300596399, "grad_norm": 0.5624969005584717, "learning_rate": 2.8987294106187567e-05, "loss": 1.4763, "step": 11558 }, { "epoch": 0.6442784683128031, "grad_norm": 0.55375736951828, "learning_rate": 2.8979214677920353e-05, "loss": 1.6101, "step": 11559 }, { "epoch": 0.6443342065659662, "grad_norm": 0.5634573101997375, "learning_rate": 2.8971135916309895e-05, "loss": 1.7446, "step": 11560 }, { "epoch": 0.6443899448191294, "grad_norm": 0.6170838475227356, "learning_rate": 2.8963057821612394e-05, "loss": 1.8012, "step": 11561 }, { "epoch": 0.6444456830722926, "grad_norm": 0.5855227112770081, "learning_rate": 2.8954980394084046e-05, "loss": 1.4746, "step": 11562 }, { "epoch": 0.6445014213254556, "grad_norm": 0.5456474423408508, "learning_rate": 2.8946903633981038e-05, "loss": 1.5389, "step": 11563 }, { "epoch": 0.6445571595786188, "grad_norm": 0.6640143394470215, "learning_rate": 2.8938827541559482e-05, "loss": 1.9017, "step": 11564 }, { "epoch": 0.644612897831782, "grad_norm": 0.5690329074859619, "learning_rate": 2.893075211707552e-05, "loss": 1.668, "step": 11565 }, { "epoch": 0.6446686360849451, "grad_norm": 0.6500377655029297, "learning_rate": 2.8922677360785255e-05, "loss": 1.9912, "step": 11566 }, { "epoch": 0.6447243743381083, "grad_norm": 0.532332181930542, "learning_rate": 2.8914603272944784e-05, "loss": 1.4963, "step": 11567 }, { "epoch": 0.6447801125912714, "grad_norm": 0.5918958187103271, "learning_rate": 2.890652985381015e-05, "loss": 1.6432, "step": 11568 }, { "epoch": 0.6448358508444345, "grad_norm": 0.6097450256347656, "learning_rate": 2.8898457103637412e-05, "loss": 1.65, "step": 11569 }, { "epoch": 0.6448915890975977, "grad_norm": 0.5675815939903259, "learning_rate": 2.8890385022682603e-05, "loss": 1.6351, "step": 11570 }, { "epoch": 0.6449473273507609, "grad_norm": 0.6037099957466125, "learning_rate": 2.8882313611201684e-05, "loss": 2.0205, "step": 11571 }, { "epoch": 0.645003065603924, "grad_norm": 0.6402329206466675, "learning_rate": 2.887424286945065e-05, "loss": 1.8547, "step": 11572 }, { "epoch": 0.6450588038570871, "grad_norm": 0.5856971740722656, "learning_rate": 2.8866172797685463e-05, "loss": 1.733, "step": 11573 }, { "epoch": 0.6451145421102502, "grad_norm": 0.613845705986023, "learning_rate": 2.8858103396162055e-05, "loss": 1.7774, "step": 11574 }, { "epoch": 0.6451702803634134, "grad_norm": 0.5045792460441589, "learning_rate": 2.8850034665136345e-05, "loss": 1.4179, "step": 11575 }, { "epoch": 0.6452260186165766, "grad_norm": 0.589607834815979, "learning_rate": 2.8841966604864218e-05, "loss": 1.7132, "step": 11576 }, { "epoch": 0.6452817568697397, "grad_norm": 0.5298007726669312, "learning_rate": 2.8833899215601567e-05, "loss": 1.6576, "step": 11577 }, { "epoch": 0.6453374951229028, "grad_norm": 0.5419639348983765, "learning_rate": 2.8825832497604215e-05, "loss": 1.5273, "step": 11578 }, { "epoch": 0.645393233376066, "grad_norm": 0.503090500831604, "learning_rate": 2.8817766451127997e-05, "loss": 1.3098, "step": 11579 }, { "epoch": 0.6454489716292291, "grad_norm": 0.5862603187561035, "learning_rate": 2.880970107642873e-05, "loss": 1.7311, "step": 11580 }, { "epoch": 0.6455047098823923, "grad_norm": 0.5508575439453125, "learning_rate": 2.8801636373762193e-05, "loss": 1.5223, "step": 11581 }, { "epoch": 0.6455604481355555, "grad_norm": 0.5836048126220703, "learning_rate": 2.879357234338418e-05, "loss": 1.7109, "step": 11582 }, { "epoch": 0.6456161863887185, "grad_norm": 0.665833592414856, "learning_rate": 2.878550898555036e-05, "loss": 1.811, "step": 11583 }, { "epoch": 0.6456719246418817, "grad_norm": 0.5850480794906616, "learning_rate": 2.8777446300516552e-05, "loss": 1.6823, "step": 11584 }, { "epoch": 0.6457276628950449, "grad_norm": 0.5332453846931458, "learning_rate": 2.876938428853839e-05, "loss": 1.576, "step": 11585 }, { "epoch": 0.645783401148208, "grad_norm": 0.5579224228858948, "learning_rate": 2.8761322949871582e-05, "loss": 1.6211, "step": 11586 }, { "epoch": 0.6458391394013712, "grad_norm": 0.6172266602516174, "learning_rate": 2.8753262284771776e-05, "loss": 1.8955, "step": 11587 }, { "epoch": 0.6458948776545343, "grad_norm": 0.5603342652320862, "learning_rate": 2.874520229349461e-05, "loss": 1.6045, "step": 11588 }, { "epoch": 0.6459506159076974, "grad_norm": 0.596922755241394, "learning_rate": 2.8737142976295723e-05, "loss": 1.3921, "step": 11589 }, { "epoch": 0.6460063541608606, "grad_norm": 0.5799021124839783, "learning_rate": 2.8729084333430673e-05, "loss": 1.6893, "step": 11590 }, { "epoch": 0.6460620924140238, "grad_norm": 0.6304532289505005, "learning_rate": 2.8721026365155046e-05, "loss": 1.6962, "step": 11591 }, { "epoch": 0.6461178306671869, "grad_norm": 0.5298161506652832, "learning_rate": 2.8712969071724405e-05, "loss": 1.5756, "step": 11592 }, { "epoch": 0.64617356892035, "grad_norm": 0.5427919030189514, "learning_rate": 2.8704912453394266e-05, "loss": 1.5104, "step": 11593 }, { "epoch": 0.6462293071735132, "grad_norm": 0.5470585823059082, "learning_rate": 2.8696856510420146e-05, "loss": 1.6283, "step": 11594 }, { "epoch": 0.6462850454266763, "grad_norm": 0.55455082654953, "learning_rate": 2.8688801243057532e-05, "loss": 1.7311, "step": 11595 }, { "epoch": 0.6463407836798395, "grad_norm": 0.5554984211921692, "learning_rate": 2.868074665156191e-05, "loss": 1.695, "step": 11596 }, { "epoch": 0.6463965219330026, "grad_norm": 0.5633205771446228, "learning_rate": 2.867269273618869e-05, "loss": 1.6918, "step": 11597 }, { "epoch": 0.6464522601861658, "grad_norm": 0.5851027965545654, "learning_rate": 2.8664639497193303e-05, "loss": 1.865, "step": 11598 }, { "epoch": 0.6465079984393289, "grad_norm": 0.526494026184082, "learning_rate": 2.865658693483116e-05, "loss": 1.5144, "step": 11599 }, { "epoch": 0.646563736692492, "grad_norm": 0.5389431118965149, "learning_rate": 2.8648535049357637e-05, "loss": 1.5973, "step": 11600 }, { "epoch": 0.6466194749456552, "grad_norm": 0.5754119753837585, "learning_rate": 2.86404838410281e-05, "loss": 1.763, "step": 11601 }, { "epoch": 0.6466752131988184, "grad_norm": 0.5695751309394836, "learning_rate": 2.863243331009787e-05, "loss": 1.5489, "step": 11602 }, { "epoch": 0.6467309514519815, "grad_norm": 0.5716252326965332, "learning_rate": 2.86243834568223e-05, "loss": 1.8196, "step": 11603 }, { "epoch": 0.6467866897051446, "grad_norm": 0.5450440049171448, "learning_rate": 2.8616334281456643e-05, "loss": 1.7187, "step": 11604 }, { "epoch": 0.6468424279583078, "grad_norm": 0.5670022964477539, "learning_rate": 2.8608285784256182e-05, "loss": 1.6422, "step": 11605 }, { "epoch": 0.6468981662114709, "grad_norm": 0.5809680819511414, "learning_rate": 2.8600237965476172e-05, "loss": 1.6867, "step": 11606 }, { "epoch": 0.6469539044646341, "grad_norm": 0.5372865796089172, "learning_rate": 2.8592190825371845e-05, "loss": 1.561, "step": 11607 }, { "epoch": 0.6470096427177973, "grad_norm": 0.557598888874054, "learning_rate": 2.8584144364198428e-05, "loss": 1.7218, "step": 11608 }, { "epoch": 0.6470653809709603, "grad_norm": 0.57530277967453, "learning_rate": 2.8576098582211054e-05, "loss": 1.5977, "step": 11609 }, { "epoch": 0.6471211192241235, "grad_norm": 0.5730135440826416, "learning_rate": 2.856805347966496e-05, "loss": 1.6856, "step": 11610 }, { "epoch": 0.6471768574772867, "grad_norm": 0.5707370042800903, "learning_rate": 2.8560009056815235e-05, "loss": 1.6719, "step": 11611 }, { "epoch": 0.6472325957304498, "grad_norm": 0.5852611064910889, "learning_rate": 2.855196531391702e-05, "loss": 1.5255, "step": 11612 }, { "epoch": 0.647288333983613, "grad_norm": 0.5563340783119202, "learning_rate": 2.8543922251225408e-05, "loss": 1.543, "step": 11613 }, { "epoch": 0.6473440722367761, "grad_norm": 0.6254847645759583, "learning_rate": 2.8535879868995487e-05, "loss": 1.9824, "step": 11614 }, { "epoch": 0.6473998104899392, "grad_norm": 0.5794035196304321, "learning_rate": 2.8527838167482336e-05, "loss": 1.9843, "step": 11615 }, { "epoch": 0.6474555487431024, "grad_norm": 0.565477728843689, "learning_rate": 2.851979714694094e-05, "loss": 1.6633, "step": 11616 }, { "epoch": 0.6475112869962656, "grad_norm": 0.5635069608688354, "learning_rate": 2.8511756807626345e-05, "loss": 1.7438, "step": 11617 }, { "epoch": 0.6475670252494287, "grad_norm": 0.5621711015701294, "learning_rate": 2.850371714979354e-05, "loss": 1.4934, "step": 11618 }, { "epoch": 0.6476227635025918, "grad_norm": 0.5810402035713196, "learning_rate": 2.8495678173697494e-05, "loss": 1.7621, "step": 11619 }, { "epoch": 0.6476785017557549, "grad_norm": 0.5873593091964722, "learning_rate": 2.8487639879593153e-05, "loss": 1.6058, "step": 11620 }, { "epoch": 0.6477342400089181, "grad_norm": 0.604656994342804, "learning_rate": 2.847960226773545e-05, "loss": 1.8631, "step": 11621 }, { "epoch": 0.6477899782620813, "grad_norm": 0.5500726699829102, "learning_rate": 2.8471565338379313e-05, "loss": 1.5498, "step": 11622 }, { "epoch": 0.6478457165152444, "grad_norm": 0.5837782621383667, "learning_rate": 2.8463529091779583e-05, "loss": 1.5045, "step": 11623 }, { "epoch": 0.6479014547684075, "grad_norm": 0.5583280920982361, "learning_rate": 2.8455493528191145e-05, "loss": 1.606, "step": 11624 }, { "epoch": 0.6479571930215707, "grad_norm": 0.5514859557151794, "learning_rate": 2.844745864786884e-05, "loss": 1.7629, "step": 11625 }, { "epoch": 0.6480129312747338, "grad_norm": 0.5777311325073242, "learning_rate": 2.8439424451067487e-05, "loss": 1.644, "step": 11626 }, { "epoch": 0.648068669527897, "grad_norm": 0.5456623435020447, "learning_rate": 2.843139093804188e-05, "loss": 1.6939, "step": 11627 }, { "epoch": 0.6481244077810602, "grad_norm": 0.5963661074638367, "learning_rate": 2.8423358109046806e-05, "loss": 1.7068, "step": 11628 }, { "epoch": 0.6481801460342232, "grad_norm": 0.5352113246917725, "learning_rate": 2.8415325964337026e-05, "loss": 1.6281, "step": 11629 }, { "epoch": 0.6482358842873864, "grad_norm": 0.5343273878097534, "learning_rate": 2.8407294504167236e-05, "loss": 1.4476, "step": 11630 }, { "epoch": 0.6482916225405496, "grad_norm": 0.5758916139602661, "learning_rate": 2.839926372879218e-05, "loss": 1.7469, "step": 11631 }, { "epoch": 0.6483473607937127, "grad_norm": 0.5887387990951538, "learning_rate": 2.839123363846653e-05, "loss": 1.635, "step": 11632 }, { "epoch": 0.6484030990468759, "grad_norm": 0.5245947241783142, "learning_rate": 2.838320423344496e-05, "loss": 1.6089, "step": 11633 }, { "epoch": 0.6484588373000391, "grad_norm": 0.579623281955719, "learning_rate": 2.8375175513982144e-05, "loss": 1.6684, "step": 11634 }, { "epoch": 0.6485145755532021, "grad_norm": 0.5545833110809326, "learning_rate": 2.8367147480332635e-05, "loss": 1.6369, "step": 11635 }, { "epoch": 0.6485703138063653, "grad_norm": 0.5932074785232544, "learning_rate": 2.8359120132751116e-05, "loss": 1.7348, "step": 11636 }, { "epoch": 0.6486260520595285, "grad_norm": 0.5304184556007385, "learning_rate": 2.835109347149212e-05, "loss": 1.5308, "step": 11637 }, { "epoch": 0.6486817903126916, "grad_norm": 0.5450805425643921, "learning_rate": 2.834306749681021e-05, "loss": 1.6735, "step": 11638 }, { "epoch": 0.6487375285658548, "grad_norm": 0.5163072347640991, "learning_rate": 2.8335042208959932e-05, "loss": 1.3315, "step": 11639 }, { "epoch": 0.6487932668190179, "grad_norm": 0.5149058103561401, "learning_rate": 2.8327017608195804e-05, "loss": 1.4162, "step": 11640 }, { "epoch": 0.648849005072181, "grad_norm": 0.5630050897598267, "learning_rate": 2.831899369477233e-05, "loss": 1.407, "step": 11641 }, { "epoch": 0.6489047433253442, "grad_norm": 0.5516093969345093, "learning_rate": 2.8310970468943947e-05, "loss": 1.4329, "step": 11642 }, { "epoch": 0.6489604815785073, "grad_norm": 0.5966786742210388, "learning_rate": 2.830294793096513e-05, "loss": 1.7362, "step": 11643 }, { "epoch": 0.6490162198316705, "grad_norm": 0.5372209548950195, "learning_rate": 2.8294926081090296e-05, "loss": 1.8945, "step": 11644 }, { "epoch": 0.6490719580848336, "grad_norm": 0.5281509160995483, "learning_rate": 2.8286904919573858e-05, "loss": 1.6381, "step": 11645 }, { "epoch": 0.6491276963379967, "grad_norm": 0.5646560788154602, "learning_rate": 2.8278884446670205e-05, "loss": 1.5749, "step": 11646 }, { "epoch": 0.6491834345911599, "grad_norm": 0.5708281993865967, "learning_rate": 2.827086466263369e-05, "loss": 1.6901, "step": 11647 }, { "epoch": 0.6492391728443231, "grad_norm": 0.5605478882789612, "learning_rate": 2.8262845567718676e-05, "loss": 1.588, "step": 11648 }, { "epoch": 0.6492949110974862, "grad_norm": 0.5626661777496338, "learning_rate": 2.8254827162179453e-05, "loss": 1.5874, "step": 11649 }, { "epoch": 0.6493506493506493, "grad_norm": 0.5522568225860596, "learning_rate": 2.8246809446270323e-05, "loss": 1.5423, "step": 11650 }, { "epoch": 0.6494063876038125, "grad_norm": 0.5682557225227356, "learning_rate": 2.8238792420245564e-05, "loss": 1.4861, "step": 11651 }, { "epoch": 0.6494621258569756, "grad_norm": 0.5430989265441895, "learning_rate": 2.823077608435944e-05, "loss": 1.4421, "step": 11652 }, { "epoch": 0.6495178641101388, "grad_norm": 0.5792363286018372, "learning_rate": 2.822276043886617e-05, "loss": 1.5598, "step": 11653 }, { "epoch": 0.649573602363302, "grad_norm": 0.6064366102218628, "learning_rate": 2.8214745484019972e-05, "loss": 1.485, "step": 11654 }, { "epoch": 0.649629340616465, "grad_norm": 0.5442171692848206, "learning_rate": 2.820673122007505e-05, "loss": 1.4355, "step": 11655 }, { "epoch": 0.6496850788696282, "grad_norm": 0.6010521054267883, "learning_rate": 2.8198717647285534e-05, "loss": 1.6034, "step": 11656 }, { "epoch": 0.6497408171227914, "grad_norm": 0.5923758745193481, "learning_rate": 2.8190704765905573e-05, "loss": 1.7005, "step": 11657 }, { "epoch": 0.6497965553759545, "grad_norm": 0.5728440284729004, "learning_rate": 2.8182692576189306e-05, "loss": 1.6823, "step": 11658 }, { "epoch": 0.6498522936291177, "grad_norm": 0.5419698357582092, "learning_rate": 2.817468107839083e-05, "loss": 1.6961, "step": 11659 }, { "epoch": 0.6499080318822809, "grad_norm": 0.5518703460693359, "learning_rate": 2.816667027276424e-05, "loss": 1.6161, "step": 11660 }, { "epoch": 0.6499637701354439, "grad_norm": 0.5387782454490662, "learning_rate": 2.8158660159563527e-05, "loss": 1.6064, "step": 11661 }, { "epoch": 0.6500195083886071, "grad_norm": 0.5789139866828918, "learning_rate": 2.815065073904281e-05, "loss": 1.6869, "step": 11662 }, { "epoch": 0.6500752466417703, "grad_norm": 0.5399966239929199, "learning_rate": 2.8142642011456045e-05, "loss": 1.6542, "step": 11663 }, { "epoch": 0.6501309848949334, "grad_norm": 0.5608077645301819, "learning_rate": 2.8134633977057235e-05, "loss": 1.6167, "step": 11664 }, { "epoch": 0.6501867231480966, "grad_norm": 0.5759866237640381, "learning_rate": 2.812662663610035e-05, "loss": 1.6651, "step": 11665 }, { "epoch": 0.6502424614012596, "grad_norm": 0.5594897270202637, "learning_rate": 2.8118619988839338e-05, "loss": 1.3785, "step": 11666 }, { "epoch": 0.6502981996544228, "grad_norm": 0.53223717212677, "learning_rate": 2.811061403552815e-05, "loss": 1.5593, "step": 11667 }, { "epoch": 0.650353937907586, "grad_norm": 0.579182505607605, "learning_rate": 2.8102608776420614e-05, "loss": 1.7187, "step": 11668 }, { "epoch": 0.6504096761607491, "grad_norm": 0.5530314445495605, "learning_rate": 2.8094604211770693e-05, "loss": 1.5669, "step": 11669 }, { "epoch": 0.6504654144139123, "grad_norm": 0.6045119166374207, "learning_rate": 2.8086600341832197e-05, "loss": 1.7421, "step": 11670 }, { "epoch": 0.6505211526670754, "grad_norm": 0.509285032749176, "learning_rate": 2.807859716685899e-05, "loss": 1.4496, "step": 11671 }, { "epoch": 0.6505768909202385, "grad_norm": 0.5628162622451782, "learning_rate": 2.8070594687104835e-05, "loss": 1.5671, "step": 11672 }, { "epoch": 0.6506326291734017, "grad_norm": 0.5559877157211304, "learning_rate": 2.806259290282357e-05, "loss": 1.6659, "step": 11673 }, { "epoch": 0.6506883674265649, "grad_norm": 0.5760934352874756, "learning_rate": 2.8054591814268984e-05, "loss": 1.5014, "step": 11674 }, { "epoch": 0.650744105679728, "grad_norm": 0.5483234524726868, "learning_rate": 2.804659142169477e-05, "loss": 1.7134, "step": 11675 }, { "epoch": 0.6507998439328911, "grad_norm": 0.6183010339736938, "learning_rate": 2.803859172535468e-05, "loss": 1.8295, "step": 11676 }, { "epoch": 0.6508555821860543, "grad_norm": 0.5524032711982727, "learning_rate": 2.8030592725502412e-05, "loss": 1.553, "step": 11677 }, { "epoch": 0.6509113204392174, "grad_norm": 0.5912196040153503, "learning_rate": 2.8022594422391663e-05, "loss": 1.5796, "step": 11678 }, { "epoch": 0.6509670586923806, "grad_norm": 0.5911765098571777, "learning_rate": 2.8014596816276073e-05, "loss": 1.6964, "step": 11679 }, { "epoch": 0.6510227969455438, "grad_norm": 0.5506945848464966, "learning_rate": 2.800659990740929e-05, "loss": 1.523, "step": 11680 }, { "epoch": 0.6510785351987068, "grad_norm": 0.5004502534866333, "learning_rate": 2.7998603696044952e-05, "loss": 1.385, "step": 11681 }, { "epoch": 0.65113427345187, "grad_norm": 0.5972052216529846, "learning_rate": 2.79906081824366e-05, "loss": 1.4586, "step": 11682 }, { "epoch": 0.6511900117050332, "grad_norm": 0.5466043949127197, "learning_rate": 2.798261336683784e-05, "loss": 1.5598, "step": 11683 }, { "epoch": 0.6512457499581963, "grad_norm": 0.5743733644485474, "learning_rate": 2.7974619249502208e-05, "loss": 1.6281, "step": 11684 }, { "epoch": 0.6513014882113595, "grad_norm": 0.5645943284034729, "learning_rate": 2.7966625830683235e-05, "loss": 1.6654, "step": 11685 }, { "epoch": 0.6513572264645227, "grad_norm": 0.6178829073905945, "learning_rate": 2.7958633110634457e-05, "loss": 1.6944, "step": 11686 }, { "epoch": 0.6514129647176857, "grad_norm": 0.5012251734733582, "learning_rate": 2.7950641089609274e-05, "loss": 1.3509, "step": 11687 }, { "epoch": 0.6514687029708489, "grad_norm": 0.6008442640304565, "learning_rate": 2.7942649767861252e-05, "loss": 1.8239, "step": 11688 }, { "epoch": 0.651524441224012, "grad_norm": 0.562760055065155, "learning_rate": 2.7934659145643747e-05, "loss": 1.7763, "step": 11689 }, { "epoch": 0.6515801794771752, "grad_norm": 0.572251558303833, "learning_rate": 2.792666922321021e-05, "loss": 1.5611, "step": 11690 }, { "epoch": 0.6516359177303384, "grad_norm": 0.5346998572349548, "learning_rate": 2.7918680000814025e-05, "loss": 1.6913, "step": 11691 }, { "epoch": 0.6516916559835014, "grad_norm": 0.571090817451477, "learning_rate": 2.7910691478708567e-05, "loss": 1.7309, "step": 11692 }, { "epoch": 0.6517473942366646, "grad_norm": 0.6278156042098999, "learning_rate": 2.7902703657147206e-05, "loss": 1.9031, "step": 11693 }, { "epoch": 0.6518031324898278, "grad_norm": 0.5592220425605774, "learning_rate": 2.789471653638321e-05, "loss": 1.6664, "step": 11694 }, { "epoch": 0.6518588707429909, "grad_norm": 0.5627824664115906, "learning_rate": 2.7886730116669963e-05, "loss": 1.7134, "step": 11695 }, { "epoch": 0.651914608996154, "grad_norm": 0.5618991851806641, "learning_rate": 2.787874439826068e-05, "loss": 1.6886, "step": 11696 }, { "epoch": 0.6519703472493172, "grad_norm": 0.5403565168380737, "learning_rate": 2.7870759381408686e-05, "loss": 1.5477, "step": 11697 }, { "epoch": 0.6520260855024803, "grad_norm": 0.5511575937271118, "learning_rate": 2.7862775066367124e-05, "loss": 1.5624, "step": 11698 }, { "epoch": 0.6520818237556435, "grad_norm": 0.6034442782402039, "learning_rate": 2.7854791453389295e-05, "loss": 1.8036, "step": 11699 }, { "epoch": 0.6521375620088067, "grad_norm": 0.5496557950973511, "learning_rate": 2.7846808542728386e-05, "loss": 1.639, "step": 11700 }, { "epoch": 0.6521933002619698, "grad_norm": 0.5528457760810852, "learning_rate": 2.783882633463753e-05, "loss": 1.553, "step": 11701 }, { "epoch": 0.6522490385151329, "grad_norm": 0.49106788635253906, "learning_rate": 2.7830844829369896e-05, "loss": 1.2438, "step": 11702 }, { "epoch": 0.6523047767682961, "grad_norm": 0.5681769251823425, "learning_rate": 2.7822864027178596e-05, "loss": 1.5023, "step": 11703 }, { "epoch": 0.6523605150214592, "grad_norm": 0.6085399985313416, "learning_rate": 2.781488392831676e-05, "loss": 1.5908, "step": 11704 }, { "epoch": 0.6524162532746224, "grad_norm": 0.5347082614898682, "learning_rate": 2.7806904533037455e-05, "loss": 1.6314, "step": 11705 }, { "epoch": 0.6524719915277856, "grad_norm": 0.5232277512550354, "learning_rate": 2.7798925841593743e-05, "loss": 1.4394, "step": 11706 }, { "epoch": 0.6525277297809486, "grad_norm": 0.5716795921325684, "learning_rate": 2.779094785423868e-05, "loss": 1.8412, "step": 11707 }, { "epoch": 0.6525834680341118, "grad_norm": 0.5647847056388855, "learning_rate": 2.7782970571225243e-05, "loss": 1.592, "step": 11708 }, { "epoch": 0.652639206287275, "grad_norm": 0.5854530930519104, "learning_rate": 2.777499399280645e-05, "loss": 1.8748, "step": 11709 }, { "epoch": 0.6526949445404381, "grad_norm": 0.5178894400596619, "learning_rate": 2.7767018119235262e-05, "loss": 1.5548, "step": 11710 }, { "epoch": 0.6527506827936013, "grad_norm": 0.5811799168586731, "learning_rate": 2.7759042950764635e-05, "loss": 1.6619, "step": 11711 }, { "epoch": 0.6528064210467643, "grad_norm": 0.5538857579231262, "learning_rate": 2.7751068487647508e-05, "loss": 1.5367, "step": 11712 }, { "epoch": 0.6528621592999275, "grad_norm": 0.5820496082305908, "learning_rate": 2.774309473013673e-05, "loss": 1.7765, "step": 11713 }, { "epoch": 0.6529178975530907, "grad_norm": 0.5591105222702026, "learning_rate": 2.7735121678485265e-05, "loss": 1.445, "step": 11714 }, { "epoch": 0.6529736358062538, "grad_norm": 0.5089815855026245, "learning_rate": 2.7727149332945902e-05, "loss": 1.4578, "step": 11715 }, { "epoch": 0.653029374059417, "grad_norm": 0.5578038096427917, "learning_rate": 2.7719177693771505e-05, "loss": 1.2704, "step": 11716 }, { "epoch": 0.6530851123125802, "grad_norm": 0.5787779092788696, "learning_rate": 2.7711206761214882e-05, "loss": 1.5699, "step": 11717 }, { "epoch": 0.6531408505657432, "grad_norm": 0.6014275550842285, "learning_rate": 2.770323653552883e-05, "loss": 1.8262, "step": 11718 }, { "epoch": 0.6531965888189064, "grad_norm": 0.5510164499282837, "learning_rate": 2.769526701696613e-05, "loss": 1.6857, "step": 11719 }, { "epoch": 0.6532523270720696, "grad_norm": 0.5538983345031738, "learning_rate": 2.7687298205779488e-05, "loss": 1.6479, "step": 11720 }, { "epoch": 0.6533080653252327, "grad_norm": 0.5758739709854126, "learning_rate": 2.7679330102221684e-05, "loss": 1.5931, "step": 11721 }, { "epoch": 0.6533638035783959, "grad_norm": 0.5778799057006836, "learning_rate": 2.7671362706545377e-05, "loss": 1.6227, "step": 11722 }, { "epoch": 0.653419541831559, "grad_norm": 0.5738372206687927, "learning_rate": 2.7663396019003275e-05, "loss": 1.6291, "step": 11723 }, { "epoch": 0.6534752800847221, "grad_norm": 0.5328623652458191, "learning_rate": 2.7655430039847986e-05, "loss": 1.3945, "step": 11724 }, { "epoch": 0.6535310183378853, "grad_norm": 0.574098527431488, "learning_rate": 2.7647464769332186e-05, "loss": 1.6416, "step": 11725 }, { "epoch": 0.6535867565910485, "grad_norm": 0.5825911164283752, "learning_rate": 2.7639500207708513e-05, "loss": 1.5359, "step": 11726 }, { "epoch": 0.6536424948442116, "grad_norm": 0.5495928525924683, "learning_rate": 2.7631536355229494e-05, "loss": 1.8041, "step": 11727 }, { "epoch": 0.6536982330973747, "grad_norm": 0.5315799117088318, "learning_rate": 2.7623573212147723e-05, "loss": 1.3771, "step": 11728 }, { "epoch": 0.6537539713505379, "grad_norm": 0.5464669466018677, "learning_rate": 2.7615610778715757e-05, "loss": 1.5428, "step": 11729 }, { "epoch": 0.653809709603701, "grad_norm": 0.5899285674095154, "learning_rate": 2.76076490551861e-05, "loss": 1.7209, "step": 11730 }, { "epoch": 0.6538654478568642, "grad_norm": 0.544582188129425, "learning_rate": 2.7599688041811257e-05, "loss": 1.7189, "step": 11731 }, { "epoch": 0.6539211861100274, "grad_norm": 0.5615865588188171, "learning_rate": 2.759172773884371e-05, "loss": 1.6597, "step": 11732 }, { "epoch": 0.6539769243631904, "grad_norm": 0.5366857051849365, "learning_rate": 2.7583768146535925e-05, "loss": 1.5292, "step": 11733 }, { "epoch": 0.6540326626163536, "grad_norm": 0.563675582408905, "learning_rate": 2.7575809265140305e-05, "loss": 1.6176, "step": 11734 }, { "epoch": 0.6540884008695167, "grad_norm": 0.5570533275604248, "learning_rate": 2.756785109490927e-05, "loss": 1.7055, "step": 11735 }, { "epoch": 0.6541441391226799, "grad_norm": 0.548004150390625, "learning_rate": 2.7559893636095212e-05, "loss": 1.5861, "step": 11736 }, { "epoch": 0.6541998773758431, "grad_norm": 0.5964729189872742, "learning_rate": 2.755193688895049e-05, "loss": 1.8638, "step": 11737 }, { "epoch": 0.6542556156290061, "grad_norm": 0.5480014085769653, "learning_rate": 2.754398085372747e-05, "loss": 1.4608, "step": 11738 }, { "epoch": 0.6543113538821693, "grad_norm": 0.5882792472839355, "learning_rate": 2.7536025530678407e-05, "loss": 1.7498, "step": 11739 }, { "epoch": 0.6543670921353325, "grad_norm": 0.5270636081695557, "learning_rate": 2.752807092005568e-05, "loss": 1.4097, "step": 11740 }, { "epoch": 0.6544228303884956, "grad_norm": 0.5782089829444885, "learning_rate": 2.75201170221115e-05, "loss": 1.6588, "step": 11741 }, { "epoch": 0.6544785686416588, "grad_norm": 0.6418783664703369, "learning_rate": 2.7512163837098155e-05, "loss": 1.8219, "step": 11742 }, { "epoch": 0.654534306894822, "grad_norm": 0.5354815125465393, "learning_rate": 2.750421136526785e-05, "loss": 1.4547, "step": 11743 }, { "epoch": 0.654590045147985, "grad_norm": 0.566822350025177, "learning_rate": 2.74962596068728e-05, "loss": 1.6586, "step": 11744 }, { "epoch": 0.6546457834011482, "grad_norm": 0.6059299111366272, "learning_rate": 2.748830856216521e-05, "loss": 1.8293, "step": 11745 }, { "epoch": 0.6547015216543114, "grad_norm": 0.5582495331764221, "learning_rate": 2.7480358231397184e-05, "loss": 1.651, "step": 11746 }, { "epoch": 0.6547572599074745, "grad_norm": 0.6115778088569641, "learning_rate": 2.747240861482093e-05, "loss": 1.4241, "step": 11747 }, { "epoch": 0.6548129981606376, "grad_norm": 0.6121757626533508, "learning_rate": 2.7464459712688516e-05, "loss": 1.8573, "step": 11748 }, { "epoch": 0.6548687364138008, "grad_norm": 0.5520625114440918, "learning_rate": 2.745651152525205e-05, "loss": 1.7483, "step": 11749 }, { "epoch": 0.6549244746669639, "grad_norm": 0.5481840968132019, "learning_rate": 2.744856405276359e-05, "loss": 1.5745, "step": 11750 }, { "epoch": 0.6549802129201271, "grad_norm": 0.575197696685791, "learning_rate": 2.744061729547521e-05, "loss": 1.6733, "step": 11751 }, { "epoch": 0.6550359511732903, "grad_norm": 0.5682066082954407, "learning_rate": 2.743267125363893e-05, "loss": 1.6021, "step": 11752 }, { "epoch": 0.6550916894264533, "grad_norm": 0.6352496147155762, "learning_rate": 2.7424725927506722e-05, "loss": 1.5939, "step": 11753 }, { "epoch": 0.6551474276796165, "grad_norm": 0.5162918567657471, "learning_rate": 2.741678131733058e-05, "loss": 1.5034, "step": 11754 }, { "epoch": 0.6552031659327797, "grad_norm": 0.5746167302131653, "learning_rate": 2.740883742336247e-05, "loss": 1.6453, "step": 11755 }, { "epoch": 0.6552589041859428, "grad_norm": 0.5538302063941956, "learning_rate": 2.7400894245854326e-05, "loss": 1.7042, "step": 11756 }, { "epoch": 0.655314642439106, "grad_norm": 0.5114599466323853, "learning_rate": 2.7392951785058046e-05, "loss": 1.5452, "step": 11757 }, { "epoch": 0.655370380692269, "grad_norm": 0.5948389768600464, "learning_rate": 2.7385010041225534e-05, "loss": 1.7493, "step": 11758 }, { "epoch": 0.6554261189454322, "grad_norm": 0.48783570528030396, "learning_rate": 2.737706901460866e-05, "loss": 1.3269, "step": 11759 }, { "epoch": 0.6554818571985954, "grad_norm": 0.5941017270088196, "learning_rate": 2.7369128705459246e-05, "loss": 1.7478, "step": 11760 }, { "epoch": 0.6555375954517585, "grad_norm": 0.5333529710769653, "learning_rate": 2.736118911402912e-05, "loss": 1.6844, "step": 11761 }, { "epoch": 0.6555933337049217, "grad_norm": 0.6064323782920837, "learning_rate": 2.735325024057007e-05, "loss": 1.8216, "step": 11762 }, { "epoch": 0.6556490719580849, "grad_norm": 0.6015443205833435, "learning_rate": 2.7345312085333897e-05, "loss": 1.7156, "step": 11763 }, { "epoch": 0.6557048102112479, "grad_norm": 0.5998244285583496, "learning_rate": 2.7337374648572354e-05, "loss": 1.7078, "step": 11764 }, { "epoch": 0.6557605484644111, "grad_norm": 0.5969440937042236, "learning_rate": 2.7329437930537115e-05, "loss": 1.798, "step": 11765 }, { "epoch": 0.6558162867175743, "grad_norm": 0.48716482520103455, "learning_rate": 2.7321501931479966e-05, "loss": 1.3555, "step": 11766 }, { "epoch": 0.6558720249707374, "grad_norm": 0.5743377208709717, "learning_rate": 2.7313566651652532e-05, "loss": 1.4993, "step": 11767 }, { "epoch": 0.6559277632239006, "grad_norm": 0.5310671329498291, "learning_rate": 2.7305632091306488e-05, "loss": 1.6846, "step": 11768 }, { "epoch": 0.6559835014770637, "grad_norm": 0.5764484405517578, "learning_rate": 2.729769825069348e-05, "loss": 1.7928, "step": 11769 }, { "epoch": 0.6560392397302268, "grad_norm": 0.5921671986579895, "learning_rate": 2.7289765130065126e-05, "loss": 1.6052, "step": 11770 }, { "epoch": 0.65609497798339, "grad_norm": 0.5272278785705566, "learning_rate": 2.728183272967303e-05, "loss": 1.6509, "step": 11771 }, { "epoch": 0.6561507162365532, "grad_norm": 0.5755944848060608, "learning_rate": 2.7273901049768697e-05, "loss": 1.7614, "step": 11772 }, { "epoch": 0.6562064544897163, "grad_norm": 0.5172051787376404, "learning_rate": 2.7265970090603764e-05, "loss": 1.441, "step": 11773 }, { "epoch": 0.6562621927428794, "grad_norm": 0.6681126356124878, "learning_rate": 2.725803985242969e-05, "loss": 1.7769, "step": 11774 }, { "epoch": 0.6563179309960426, "grad_norm": 0.5718099474906921, "learning_rate": 2.7250110335497996e-05, "loss": 1.7925, "step": 11775 }, { "epoch": 0.6563736692492057, "grad_norm": 0.6293430924415588, "learning_rate": 2.7242181540060163e-05, "loss": 1.8628, "step": 11776 }, { "epoch": 0.6564294075023689, "grad_norm": 0.5449262857437134, "learning_rate": 2.7234253466367643e-05, "loss": 1.5689, "step": 11777 }, { "epoch": 0.6564851457555321, "grad_norm": 0.5448122620582581, "learning_rate": 2.7226326114671895e-05, "loss": 1.5135, "step": 11778 }, { "epoch": 0.6565408840086951, "grad_norm": 0.5253317356109619, "learning_rate": 2.7218399485224278e-05, "loss": 1.5069, "step": 11779 }, { "epoch": 0.6565966222618583, "grad_norm": 0.5427688956260681, "learning_rate": 2.721047357827621e-05, "loss": 1.6105, "step": 11780 }, { "epoch": 0.6566523605150214, "grad_norm": 0.5660583972930908, "learning_rate": 2.7202548394079037e-05, "loss": 1.7517, "step": 11781 }, { "epoch": 0.6567080987681846, "grad_norm": 0.5620884895324707, "learning_rate": 2.7194623932884133e-05, "loss": 1.5322, "step": 11782 }, { "epoch": 0.6567638370213478, "grad_norm": 0.6195741891860962, "learning_rate": 2.718670019494276e-05, "loss": 1.6235, "step": 11783 }, { "epoch": 0.6568195752745108, "grad_norm": 0.578835666179657, "learning_rate": 2.717877718050626e-05, "loss": 1.6345, "step": 11784 }, { "epoch": 0.656875313527674, "grad_norm": 0.561337411403656, "learning_rate": 2.7170854889825914e-05, "loss": 1.7114, "step": 11785 }, { "epoch": 0.6569310517808372, "grad_norm": 0.5934423804283142, "learning_rate": 2.716293332315293e-05, "loss": 1.7212, "step": 11786 }, { "epoch": 0.6569867900340003, "grad_norm": 0.5505562424659729, "learning_rate": 2.7155012480738546e-05, "loss": 1.6627, "step": 11787 }, { "epoch": 0.6570425282871635, "grad_norm": 0.5922462940216064, "learning_rate": 2.714709236283397e-05, "loss": 1.7468, "step": 11788 }, { "epoch": 0.6570982665403267, "grad_norm": 0.5288242101669312, "learning_rate": 2.7139172969690385e-05, "loss": 1.5715, "step": 11789 }, { "epoch": 0.6571540047934897, "grad_norm": 0.5881549715995789, "learning_rate": 2.7131254301558965e-05, "loss": 1.6476, "step": 11790 }, { "epoch": 0.6572097430466529, "grad_norm": 0.5649522542953491, "learning_rate": 2.7123336358690786e-05, "loss": 1.5052, "step": 11791 }, { "epoch": 0.6572654812998161, "grad_norm": 0.6090741157531738, "learning_rate": 2.711541914133704e-05, "loss": 1.673, "step": 11792 }, { "epoch": 0.6573212195529792, "grad_norm": 0.5936515927314758, "learning_rate": 2.710750264974875e-05, "loss": 1.7664, "step": 11793 }, { "epoch": 0.6573769578061424, "grad_norm": 0.5199238657951355, "learning_rate": 2.7099586884177004e-05, "loss": 1.6937, "step": 11794 }, { "epoch": 0.6574326960593055, "grad_norm": 0.5748278498649597, "learning_rate": 2.709167184487284e-05, "loss": 1.6415, "step": 11795 }, { "epoch": 0.6574884343124686, "grad_norm": 0.6542965173721313, "learning_rate": 2.7083757532087285e-05, "loss": 2.0278, "step": 11796 }, { "epoch": 0.6575441725656318, "grad_norm": 0.5692175626754761, "learning_rate": 2.7075843946071343e-05, "loss": 1.7187, "step": 11797 }, { "epoch": 0.657599910818795, "grad_norm": 0.5671214461326599, "learning_rate": 2.7067931087075936e-05, "loss": 1.5879, "step": 11798 }, { "epoch": 0.6576556490719581, "grad_norm": 0.5661264657974243, "learning_rate": 2.7060018955352094e-05, "loss": 1.6877, "step": 11799 }, { "epoch": 0.6577113873251212, "grad_norm": 0.5358115434646606, "learning_rate": 2.7052107551150685e-05, "loss": 1.5547, "step": 11800 }, { "epoch": 0.6577671255782844, "grad_norm": 0.6027678847312927, "learning_rate": 2.7044196874722627e-05, "loss": 1.5204, "step": 11801 }, { "epoch": 0.6578228638314475, "grad_norm": 0.5381132364273071, "learning_rate": 2.7036286926318798e-05, "loss": 1.7442, "step": 11802 }, { "epoch": 0.6578786020846107, "grad_norm": 0.5434688329696655, "learning_rate": 2.7028377706190068e-05, "loss": 1.4083, "step": 11803 }, { "epoch": 0.6579343403377738, "grad_norm": 0.5840322375297546, "learning_rate": 2.7020469214587274e-05, "loss": 1.8607, "step": 11804 }, { "epoch": 0.6579900785909369, "grad_norm": 0.554832935333252, "learning_rate": 2.7012561451761203e-05, "loss": 1.6151, "step": 11805 }, { "epoch": 0.6580458168441001, "grad_norm": 0.564648449420929, "learning_rate": 2.700465441796266e-05, "loss": 1.6476, "step": 11806 }, { "epoch": 0.6581015550972632, "grad_norm": 0.5735161900520325, "learning_rate": 2.6996748113442394e-05, "loss": 1.3972, "step": 11807 }, { "epoch": 0.6581572933504264, "grad_norm": 0.5288743376731873, "learning_rate": 2.6988842538451187e-05, "loss": 1.597, "step": 11808 }, { "epoch": 0.6582130316035896, "grad_norm": 0.5484468936920166, "learning_rate": 2.6980937693239695e-05, "loss": 1.5462, "step": 11809 }, { "epoch": 0.6582687698567526, "grad_norm": 0.5598819851875305, "learning_rate": 2.697303357805866e-05, "loss": 1.6007, "step": 11810 }, { "epoch": 0.6583245081099158, "grad_norm": 0.5698304772377014, "learning_rate": 2.696513019315877e-05, "loss": 1.6685, "step": 11811 }, { "epoch": 0.658380246363079, "grad_norm": 0.5363532900810242, "learning_rate": 2.6957227538790625e-05, "loss": 1.4725, "step": 11812 }, { "epoch": 0.6584359846162421, "grad_norm": 0.5554176568984985, "learning_rate": 2.694932561520488e-05, "loss": 1.4386, "step": 11813 }, { "epoch": 0.6584917228694053, "grad_norm": 0.5538124442100525, "learning_rate": 2.694142442265212e-05, "loss": 1.8516, "step": 11814 }, { "epoch": 0.6585474611225685, "grad_norm": 0.5798441171646118, "learning_rate": 2.6933523961382946e-05, "loss": 1.7817, "step": 11815 }, { "epoch": 0.6586031993757315, "grad_norm": 0.5579670667648315, "learning_rate": 2.6925624231647916e-05, "loss": 1.6708, "step": 11816 }, { "epoch": 0.6586589376288947, "grad_norm": 0.6054978966712952, "learning_rate": 2.691772523369752e-05, "loss": 1.6924, "step": 11817 }, { "epoch": 0.6587146758820579, "grad_norm": 0.5623806118965149, "learning_rate": 2.6909826967782338e-05, "loss": 1.5783, "step": 11818 }, { "epoch": 0.658770414135221, "grad_norm": 0.5320571660995483, "learning_rate": 2.6901929434152802e-05, "loss": 1.5426, "step": 11819 }, { "epoch": 0.6588261523883842, "grad_norm": 0.6053674817085266, "learning_rate": 2.6894032633059392e-05, "loss": 1.67, "step": 11820 }, { "epoch": 0.6588818906415473, "grad_norm": 0.5375155210494995, "learning_rate": 2.688613656475255e-05, "loss": 1.4324, "step": 11821 }, { "epoch": 0.6589376288947104, "grad_norm": 0.5696715116500854, "learning_rate": 2.687824122948269e-05, "loss": 1.5658, "step": 11822 }, { "epoch": 0.6589933671478736, "grad_norm": 0.5656219124794006, "learning_rate": 2.687034662750023e-05, "loss": 1.7141, "step": 11823 }, { "epoch": 0.6590491054010368, "grad_norm": 0.5286223292350769, "learning_rate": 2.6862452759055478e-05, "loss": 1.51, "step": 11824 }, { "epoch": 0.6591048436541999, "grad_norm": 0.5871027708053589, "learning_rate": 2.6854559624398868e-05, "loss": 1.7183, "step": 11825 }, { "epoch": 0.659160581907363, "grad_norm": 0.5437431931495667, "learning_rate": 2.6846667223780658e-05, "loss": 1.6043, "step": 11826 }, { "epoch": 0.6592163201605261, "grad_norm": 0.5970200300216675, "learning_rate": 2.683877555745117e-05, "loss": 1.6279, "step": 11827 }, { "epoch": 0.6592720584136893, "grad_norm": 0.5312990546226501, "learning_rate": 2.683088462566068e-05, "loss": 1.5773, "step": 11828 }, { "epoch": 0.6593277966668525, "grad_norm": 0.5490882992744446, "learning_rate": 2.682299442865945e-05, "loss": 1.5818, "step": 11829 }, { "epoch": 0.6593835349200156, "grad_norm": 0.5834240913391113, "learning_rate": 2.6815104966697718e-05, "loss": 1.7962, "step": 11830 }, { "epoch": 0.6594392731731787, "grad_norm": 0.5726290345191956, "learning_rate": 2.680721624002566e-05, "loss": 1.6222, "step": 11831 }, { "epoch": 0.6594950114263419, "grad_norm": 0.5791205167770386, "learning_rate": 2.6799328248893485e-05, "loss": 1.781, "step": 11832 }, { "epoch": 0.659550749679505, "grad_norm": 0.5636252164840698, "learning_rate": 2.6791440993551343e-05, "loss": 1.5622, "step": 11833 }, { "epoch": 0.6596064879326682, "grad_norm": 0.5404438972473145, "learning_rate": 2.6783554474249407e-05, "loss": 1.6885, "step": 11834 }, { "epoch": 0.6596622261858314, "grad_norm": 0.5758154988288879, "learning_rate": 2.677566869123771e-05, "loss": 1.7058, "step": 11835 }, { "epoch": 0.6597179644389944, "grad_norm": 0.5790137648582458, "learning_rate": 2.676778364476642e-05, "loss": 1.6399, "step": 11836 }, { "epoch": 0.6597737026921576, "grad_norm": 0.548075258731842, "learning_rate": 2.6759899335085602e-05, "loss": 1.6838, "step": 11837 }, { "epoch": 0.6598294409453208, "grad_norm": 0.60141521692276, "learning_rate": 2.6752015762445254e-05, "loss": 1.7896, "step": 11838 }, { "epoch": 0.6598851791984839, "grad_norm": 0.4708482623100281, "learning_rate": 2.674413292709542e-05, "loss": 1.2567, "step": 11839 }, { "epoch": 0.6599409174516471, "grad_norm": 0.5700356960296631, "learning_rate": 2.6736250829286103e-05, "loss": 1.5649, "step": 11840 }, { "epoch": 0.6599966557048103, "grad_norm": 0.5416743755340576, "learning_rate": 2.6728369469267266e-05, "loss": 1.5569, "step": 11841 }, { "epoch": 0.6600523939579733, "grad_norm": 0.5961542129516602, "learning_rate": 2.6720488847288873e-05, "loss": 1.534, "step": 11842 }, { "epoch": 0.6601081322111365, "grad_norm": 0.5858752727508545, "learning_rate": 2.6712608963600843e-05, "loss": 1.722, "step": 11843 }, { "epoch": 0.6601638704642997, "grad_norm": 0.564729630947113, "learning_rate": 2.67047298184531e-05, "loss": 1.5489, "step": 11844 }, { "epoch": 0.6602196087174628, "grad_norm": 0.6010147929191589, "learning_rate": 2.6696851412095492e-05, "loss": 1.8072, "step": 11845 }, { "epoch": 0.660275346970626, "grad_norm": 0.5355246663093567, "learning_rate": 2.6688973744777884e-05, "loss": 1.454, "step": 11846 }, { "epoch": 0.6603310852237891, "grad_norm": 0.5350586175918579, "learning_rate": 2.6681096816750117e-05, "loss": 1.663, "step": 11847 }, { "epoch": 0.6603868234769522, "grad_norm": 0.5176247358322144, "learning_rate": 2.6673220628262e-05, "loss": 1.4612, "step": 11848 }, { "epoch": 0.6604425617301154, "grad_norm": 0.5709378123283386, "learning_rate": 2.6665345179563345e-05, "loss": 1.5046, "step": 11849 }, { "epoch": 0.6604982999832785, "grad_norm": 0.5571310520172119, "learning_rate": 2.6657470470903845e-05, "loss": 1.7277, "step": 11850 }, { "epoch": 0.6605540382364417, "grad_norm": 0.5337514281272888, "learning_rate": 2.6649596502533323e-05, "loss": 1.6117, "step": 11851 }, { "epoch": 0.6606097764896048, "grad_norm": 0.5532556772232056, "learning_rate": 2.6641723274701447e-05, "loss": 1.7025, "step": 11852 }, { "epoch": 0.6606655147427679, "grad_norm": 0.49335777759552, "learning_rate": 2.6633850787657915e-05, "loss": 1.3906, "step": 11853 }, { "epoch": 0.6607212529959311, "grad_norm": 0.5451174974441528, "learning_rate": 2.6625979041652406e-05, "loss": 1.721, "step": 11854 }, { "epoch": 0.6607769912490943, "grad_norm": 0.5025902986526489, "learning_rate": 2.6618108036934554e-05, "loss": 1.4738, "step": 11855 }, { "epoch": 0.6608327295022574, "grad_norm": 0.5471557974815369, "learning_rate": 2.6610237773754015e-05, "loss": 1.6252, "step": 11856 }, { "epoch": 0.6608884677554205, "grad_norm": 0.57525235414505, "learning_rate": 2.6602368252360345e-05, "loss": 1.6125, "step": 11857 }, { "epoch": 0.6609442060085837, "grad_norm": 0.5472937226295471, "learning_rate": 2.6594499473003125e-05, "loss": 1.6452, "step": 11858 }, { "epoch": 0.6609999442617468, "grad_norm": 0.5961613655090332, "learning_rate": 2.658663143593193e-05, "loss": 1.5554, "step": 11859 }, { "epoch": 0.66105568251491, "grad_norm": 0.543452799320221, "learning_rate": 2.6578764141396282e-05, "loss": 1.6729, "step": 11860 }, { "epoch": 0.6611114207680732, "grad_norm": 0.5469802021980286, "learning_rate": 2.6570897589645654e-05, "loss": 1.6435, "step": 11861 }, { "epoch": 0.6611671590212362, "grad_norm": 0.5711193680763245, "learning_rate": 2.656303178092957e-05, "loss": 1.6829, "step": 11862 }, { "epoch": 0.6612228972743994, "grad_norm": 0.5289133787155151, "learning_rate": 2.6555166715497492e-05, "loss": 1.6514, "step": 11863 }, { "epoch": 0.6612786355275626, "grad_norm": 0.5423325896263123, "learning_rate": 2.6547302393598804e-05, "loss": 1.6465, "step": 11864 }, { "epoch": 0.6613343737807257, "grad_norm": 0.5673478245735168, "learning_rate": 2.6539438815482955e-05, "loss": 1.6402, "step": 11865 }, { "epoch": 0.6613901120338889, "grad_norm": 0.5566899180412292, "learning_rate": 2.653157598139932e-05, "loss": 1.6084, "step": 11866 }, { "epoch": 0.661445850287052, "grad_norm": 0.5686150193214417, "learning_rate": 2.652371389159727e-05, "loss": 1.846, "step": 11867 }, { "epoch": 0.6615015885402151, "grad_norm": 0.5833027362823486, "learning_rate": 2.6515852546326124e-05, "loss": 1.8253, "step": 11868 }, { "epoch": 0.6615573267933783, "grad_norm": 0.5765178799629211, "learning_rate": 2.6507991945835227e-05, "loss": 1.7235, "step": 11869 }, { "epoch": 0.6616130650465415, "grad_norm": 0.558733344078064, "learning_rate": 2.6500132090373873e-05, "loss": 1.6533, "step": 11870 }, { "epoch": 0.6616688032997046, "grad_norm": 0.5753670334815979, "learning_rate": 2.649227298019129e-05, "loss": 1.711, "step": 11871 }, { "epoch": 0.6617245415528678, "grad_norm": 0.5563479065895081, "learning_rate": 2.6484414615536757e-05, "loss": 1.6361, "step": 11872 }, { "epoch": 0.6617802798060308, "grad_norm": 0.5799263715744019, "learning_rate": 2.6476556996659484e-05, "loss": 1.6981, "step": 11873 }, { "epoch": 0.661836018059194, "grad_norm": 0.5460615754127502, "learning_rate": 2.646870012380867e-05, "loss": 1.5684, "step": 11874 }, { "epoch": 0.6618917563123572, "grad_norm": 0.5564395189285278, "learning_rate": 2.646084399723351e-05, "loss": 1.5542, "step": 11875 }, { "epoch": 0.6619474945655203, "grad_norm": 0.5553494691848755, "learning_rate": 2.6452988617183106e-05, "loss": 1.7904, "step": 11876 }, { "epoch": 0.6620032328186835, "grad_norm": 0.557140052318573, "learning_rate": 2.6445133983906643e-05, "loss": 1.607, "step": 11877 }, { "epoch": 0.6620589710718466, "grad_norm": 0.63083416223526, "learning_rate": 2.643728009765318e-05, "loss": 1.6927, "step": 11878 }, { "epoch": 0.6621147093250097, "grad_norm": 0.5608395934104919, "learning_rate": 2.642942695867181e-05, "loss": 1.7663, "step": 11879 }, { "epoch": 0.6621704475781729, "grad_norm": 0.603378176689148, "learning_rate": 2.6421574567211595e-05, "loss": 1.7458, "step": 11880 }, { "epoch": 0.6622261858313361, "grad_norm": 0.5721191167831421, "learning_rate": 2.6413722923521555e-05, "loss": 1.6295, "step": 11881 }, { "epoch": 0.6622819240844992, "grad_norm": 0.5742847919464111, "learning_rate": 2.6405872027850732e-05, "loss": 1.5865, "step": 11882 }, { "epoch": 0.6623376623376623, "grad_norm": 0.593904972076416, "learning_rate": 2.6398021880448066e-05, "loss": 1.6666, "step": 11883 }, { "epoch": 0.6623934005908255, "grad_norm": 0.5501095056533813, "learning_rate": 2.6390172481562537e-05, "loss": 1.7405, "step": 11884 }, { "epoch": 0.6624491388439886, "grad_norm": 0.5565217733383179, "learning_rate": 2.6382323831443078e-05, "loss": 1.6778, "step": 11885 }, { "epoch": 0.6625048770971518, "grad_norm": 0.5937246680259705, "learning_rate": 2.6374475930338625e-05, "loss": 1.6827, "step": 11886 }, { "epoch": 0.662560615350315, "grad_norm": 0.6125547289848328, "learning_rate": 2.6366628778498017e-05, "loss": 1.8122, "step": 11887 }, { "epoch": 0.662616353603478, "grad_norm": 0.568310022354126, "learning_rate": 2.6358782376170164e-05, "loss": 1.724, "step": 11888 }, { "epoch": 0.6626720918566412, "grad_norm": 0.5387205481529236, "learning_rate": 2.6350936723603923e-05, "loss": 1.5148, "step": 11889 }, { "epoch": 0.6627278301098044, "grad_norm": 0.5455870032310486, "learning_rate": 2.634309182104806e-05, "loss": 1.7194, "step": 11890 }, { "epoch": 0.6627835683629675, "grad_norm": 0.5351374745368958, "learning_rate": 2.63352476687514e-05, "loss": 1.667, "step": 11891 }, { "epoch": 0.6628393066161307, "grad_norm": 0.5790825486183167, "learning_rate": 2.6327404266962707e-05, "loss": 1.6988, "step": 11892 }, { "epoch": 0.6628950448692938, "grad_norm": 0.5976444482803345, "learning_rate": 2.6319561615930732e-05, "loss": 1.4099, "step": 11893 }, { "epoch": 0.6629507831224569, "grad_norm": 0.5867549180984497, "learning_rate": 2.6311719715904193e-05, "loss": 1.7522, "step": 11894 }, { "epoch": 0.6630065213756201, "grad_norm": 0.5726478099822998, "learning_rate": 2.630387856713179e-05, "loss": 1.5786, "step": 11895 }, { "epoch": 0.6630622596287832, "grad_norm": 0.5656641721725464, "learning_rate": 2.6296038169862226e-05, "loss": 1.5653, "step": 11896 }, { "epoch": 0.6631179978819464, "grad_norm": 0.5478425025939941, "learning_rate": 2.6288198524344106e-05, "loss": 1.6283, "step": 11897 }, { "epoch": 0.6631737361351095, "grad_norm": 0.6097078919410706, "learning_rate": 2.6280359630826078e-05, "loss": 1.6296, "step": 11898 }, { "epoch": 0.6632294743882726, "grad_norm": 0.5414223074913025, "learning_rate": 2.6272521489556746e-05, "loss": 1.5074, "step": 11899 }, { "epoch": 0.6632852126414358, "grad_norm": 0.5565434694290161, "learning_rate": 2.62646841007847e-05, "loss": 1.4944, "step": 11900 }, { "epoch": 0.663340950894599, "grad_norm": 0.5831652879714966, "learning_rate": 2.6256847464758495e-05, "loss": 1.7095, "step": 11901 }, { "epoch": 0.6633966891477621, "grad_norm": 0.5691834092140198, "learning_rate": 2.6249011581726625e-05, "loss": 1.6111, "step": 11902 }, { "epoch": 0.6634524274009252, "grad_norm": 0.5959990620613098, "learning_rate": 2.6241176451937664e-05, "loss": 1.7611, "step": 11903 }, { "epoch": 0.6635081656540884, "grad_norm": 0.5408827662467957, "learning_rate": 2.623334207564005e-05, "loss": 1.6979, "step": 11904 }, { "epoch": 0.6635639039072515, "grad_norm": 0.5845001935958862, "learning_rate": 2.6225508453082247e-05, "loss": 1.8504, "step": 11905 }, { "epoch": 0.6636196421604147, "grad_norm": 0.5302397012710571, "learning_rate": 2.6217675584512713e-05, "loss": 1.4962, "step": 11906 }, { "epoch": 0.6636753804135779, "grad_norm": 0.5679298043251038, "learning_rate": 2.620984347017984e-05, "loss": 1.5897, "step": 11907 }, { "epoch": 0.663731118666741, "grad_norm": 0.5457496643066406, "learning_rate": 2.6202012110332046e-05, "loss": 1.565, "step": 11908 }, { "epoch": 0.6637868569199041, "grad_norm": 0.5777455568313599, "learning_rate": 2.619418150521766e-05, "loss": 1.8616, "step": 11909 }, { "epoch": 0.6638425951730673, "grad_norm": 0.5581088066101074, "learning_rate": 2.6186351655085036e-05, "loss": 1.7539, "step": 11910 }, { "epoch": 0.6638983334262304, "grad_norm": 0.5956060886383057, "learning_rate": 2.6178522560182496e-05, "loss": 1.5453, "step": 11911 }, { "epoch": 0.6639540716793936, "grad_norm": 0.5325225591659546, "learning_rate": 2.6170694220758352e-05, "loss": 1.6533, "step": 11912 }, { "epoch": 0.6640098099325568, "grad_norm": 0.523888885974884, "learning_rate": 2.6162866637060812e-05, "loss": 1.5123, "step": 11913 }, { "epoch": 0.6640655481857198, "grad_norm": 0.5633407235145569, "learning_rate": 2.6155039809338178e-05, "loss": 1.7104, "step": 11914 }, { "epoch": 0.664121286438883, "grad_norm": 0.5729668736457825, "learning_rate": 2.6147213737838682e-05, "loss": 1.6117, "step": 11915 }, { "epoch": 0.6641770246920462, "grad_norm": 0.6129339933395386, "learning_rate": 2.6139388422810468e-05, "loss": 1.9683, "step": 11916 }, { "epoch": 0.6642327629452093, "grad_norm": 0.5193238854408264, "learning_rate": 2.613156386450174e-05, "loss": 1.5635, "step": 11917 }, { "epoch": 0.6642885011983725, "grad_norm": 0.5500721335411072, "learning_rate": 2.6123740063160646e-05, "loss": 1.6013, "step": 11918 }, { "epoch": 0.6643442394515355, "grad_norm": 0.5810501575469971, "learning_rate": 2.6115917019035326e-05, "loss": 1.6931, "step": 11919 }, { "epoch": 0.6643999777046987, "grad_norm": 0.5327263474464417, "learning_rate": 2.6108094732373823e-05, "loss": 1.397, "step": 11920 }, { "epoch": 0.6644557159578619, "grad_norm": 0.5560159087181091, "learning_rate": 2.610027320342428e-05, "loss": 1.5599, "step": 11921 }, { "epoch": 0.664511454211025, "grad_norm": 0.5466412901878357, "learning_rate": 2.6092452432434745e-05, "loss": 1.5155, "step": 11922 }, { "epoch": 0.6645671924641882, "grad_norm": 0.5591964721679688, "learning_rate": 2.6084632419653206e-05, "loss": 1.6088, "step": 11923 }, { "epoch": 0.6646229307173513, "grad_norm": 0.5819532871246338, "learning_rate": 2.6076813165327692e-05, "loss": 1.6295, "step": 11924 }, { "epoch": 0.6646786689705144, "grad_norm": 0.5858190059661865, "learning_rate": 2.606899466970618e-05, "loss": 1.7028, "step": 11925 }, { "epoch": 0.6647344072236776, "grad_norm": 0.593717634677887, "learning_rate": 2.6061176933036636e-05, "loss": 1.7271, "step": 11926 }, { "epoch": 0.6647901454768408, "grad_norm": 0.5807863473892212, "learning_rate": 2.6053359955566997e-05, "loss": 1.6711, "step": 11927 }, { "epoch": 0.6648458837300039, "grad_norm": 0.5966163873672485, "learning_rate": 2.604554373754513e-05, "loss": 1.6863, "step": 11928 }, { "epoch": 0.664901621983167, "grad_norm": 0.6047829389572144, "learning_rate": 2.6037728279218986e-05, "loss": 1.6041, "step": 11929 }, { "epoch": 0.6649573602363302, "grad_norm": 0.5634847283363342, "learning_rate": 2.6029913580836372e-05, "loss": 1.5647, "step": 11930 }, { "epoch": 0.6650130984894933, "grad_norm": 0.5834475159645081, "learning_rate": 2.6022099642645147e-05, "loss": 1.489, "step": 11931 }, { "epoch": 0.6650688367426565, "grad_norm": 0.6043350100517273, "learning_rate": 2.6014286464893116e-05, "loss": 1.7557, "step": 11932 }, { "epoch": 0.6651245749958197, "grad_norm": 0.5589107871055603, "learning_rate": 2.600647404782808e-05, "loss": 1.5685, "step": 11933 }, { "epoch": 0.6651803132489827, "grad_norm": 0.5879253149032593, "learning_rate": 2.5998662391697805e-05, "loss": 1.8277, "step": 11934 }, { "epoch": 0.6652360515021459, "grad_norm": 0.6046743988990784, "learning_rate": 2.599085149674999e-05, "loss": 1.7789, "step": 11935 }, { "epoch": 0.6652917897553091, "grad_norm": 0.5767287611961365, "learning_rate": 2.5983041363232418e-05, "loss": 1.8216, "step": 11936 }, { "epoch": 0.6653475280084722, "grad_norm": 0.627777636051178, "learning_rate": 2.5975231991392725e-05, "loss": 1.7176, "step": 11937 }, { "epoch": 0.6654032662616354, "grad_norm": 0.5515438318252563, "learning_rate": 2.5967423381478616e-05, "loss": 1.656, "step": 11938 }, { "epoch": 0.6654590045147986, "grad_norm": 0.5708695650100708, "learning_rate": 2.5959615533737685e-05, "loss": 1.4709, "step": 11939 }, { "epoch": 0.6655147427679616, "grad_norm": 0.5571765899658203, "learning_rate": 2.5951808448417602e-05, "loss": 1.6504, "step": 11940 }, { "epoch": 0.6655704810211248, "grad_norm": 0.5637586712837219, "learning_rate": 2.5944002125765964e-05, "loss": 1.5995, "step": 11941 }, { "epoch": 0.6656262192742879, "grad_norm": 0.5670571327209473, "learning_rate": 2.5936196566030302e-05, "loss": 1.8318, "step": 11942 }, { "epoch": 0.6656819575274511, "grad_norm": 0.5566664338111877, "learning_rate": 2.5928391769458183e-05, "loss": 1.5158, "step": 11943 }, { "epoch": 0.6657376957806143, "grad_norm": 0.5441628694534302, "learning_rate": 2.592058773629713e-05, "loss": 1.5724, "step": 11944 }, { "epoch": 0.6657934340337773, "grad_norm": 0.5653737187385559, "learning_rate": 2.591278446679466e-05, "loss": 1.6876, "step": 11945 }, { "epoch": 0.6658491722869405, "grad_norm": 0.554476797580719, "learning_rate": 2.5904981961198187e-05, "loss": 1.6439, "step": 11946 }, { "epoch": 0.6659049105401037, "grad_norm": 0.5171441435813904, "learning_rate": 2.5897180219755223e-05, "loss": 1.4966, "step": 11947 }, { "epoch": 0.6659606487932668, "grad_norm": 0.5593156814575195, "learning_rate": 2.5889379242713197e-05, "loss": 1.6051, "step": 11948 }, { "epoch": 0.66601638704643, "grad_norm": 0.5688751339912415, "learning_rate": 2.588157903031947e-05, "loss": 1.768, "step": 11949 }, { "epoch": 0.6660721252995931, "grad_norm": 0.5453287959098816, "learning_rate": 2.5873779582821428e-05, "loss": 1.6705, "step": 11950 }, { "epoch": 0.6661278635527562, "grad_norm": 0.5424460172653198, "learning_rate": 2.5865980900466436e-05, "loss": 1.7035, "step": 11951 }, { "epoch": 0.6661836018059194, "grad_norm": 0.5378473997116089, "learning_rate": 2.5858182983501817e-05, "loss": 1.6159, "step": 11952 }, { "epoch": 0.6662393400590826, "grad_norm": 0.5400096774101257, "learning_rate": 2.5850385832174896e-05, "loss": 1.5236, "step": 11953 }, { "epoch": 0.6662950783122457, "grad_norm": 0.5049753189086914, "learning_rate": 2.58425894467329e-05, "loss": 1.5456, "step": 11954 }, { "epoch": 0.6663508165654088, "grad_norm": 0.6285840272903442, "learning_rate": 2.5834793827423155e-05, "loss": 1.8465, "step": 11955 }, { "epoch": 0.666406554818572, "grad_norm": 0.5433966517448425, "learning_rate": 2.582699897449284e-05, "loss": 1.7157, "step": 11956 }, { "epoch": 0.6664622930717351, "grad_norm": 0.5678963661193848, "learning_rate": 2.5819204888189173e-05, "loss": 1.5398, "step": 11957 }, { "epoch": 0.6665180313248983, "grad_norm": 0.5653471350669861, "learning_rate": 2.5811411568759346e-05, "loss": 1.4759, "step": 11958 }, { "epoch": 0.6665737695780615, "grad_norm": 0.5493046045303345, "learning_rate": 2.5803619016450518e-05, "loss": 1.6772, "step": 11959 }, { "epoch": 0.6666295078312245, "grad_norm": 0.5423870086669922, "learning_rate": 2.579582723150984e-05, "loss": 1.6816, "step": 11960 }, { "epoch": 0.6666852460843877, "grad_norm": 0.5807955861091614, "learning_rate": 2.578803621418436e-05, "loss": 1.7645, "step": 11961 }, { "epoch": 0.6667409843375509, "grad_norm": 0.5688575506210327, "learning_rate": 2.5780245964721244e-05, "loss": 1.6954, "step": 11962 }, { "epoch": 0.666796722590714, "grad_norm": 0.500732958316803, "learning_rate": 2.5772456483367497e-05, "loss": 1.4321, "step": 11963 }, { "epoch": 0.6668524608438772, "grad_norm": 0.5536085367202759, "learning_rate": 2.5764667770370195e-05, "loss": 1.5843, "step": 11964 }, { "epoch": 0.6669081990970402, "grad_norm": 0.5987438559532166, "learning_rate": 2.5756879825976287e-05, "loss": 1.6635, "step": 11965 }, { "epoch": 0.6669639373502034, "grad_norm": 0.5400751233100891, "learning_rate": 2.5749092650432828e-05, "loss": 1.5231, "step": 11966 }, { "epoch": 0.6670196756033666, "grad_norm": 0.5186768770217896, "learning_rate": 2.5741306243986773e-05, "loss": 1.5618, "step": 11967 }, { "epoch": 0.6670754138565297, "grad_norm": 0.5389026403427124, "learning_rate": 2.5733520606885024e-05, "loss": 1.6768, "step": 11968 }, { "epoch": 0.6671311521096929, "grad_norm": 0.6083009839057922, "learning_rate": 2.5725735739374523e-05, "loss": 1.9559, "step": 11969 }, { "epoch": 0.667186890362856, "grad_norm": 0.5317343473434448, "learning_rate": 2.5717951641702155e-05, "loss": 1.1981, "step": 11970 }, { "epoch": 0.6672426286160191, "grad_norm": 0.5438907742500305, "learning_rate": 2.5710168314114802e-05, "loss": 1.5034, "step": 11971 }, { "epoch": 0.6672983668691823, "grad_norm": 0.5268614888191223, "learning_rate": 2.570238575685926e-05, "loss": 1.5647, "step": 11972 }, { "epoch": 0.6673541051223455, "grad_norm": 0.5814064145088196, "learning_rate": 2.5694603970182384e-05, "loss": 1.9909, "step": 11973 }, { "epoch": 0.6674098433755086, "grad_norm": 0.5964480638504028, "learning_rate": 2.568682295433099e-05, "loss": 1.7471, "step": 11974 }, { "epoch": 0.6674655816286718, "grad_norm": 0.5062904357910156, "learning_rate": 2.5679042709551793e-05, "loss": 1.376, "step": 11975 }, { "epoch": 0.6675213198818349, "grad_norm": 0.5355701446533203, "learning_rate": 2.5671263236091557e-05, "loss": 1.6064, "step": 11976 }, { "epoch": 0.667577058134998, "grad_norm": 0.5466346144676208, "learning_rate": 2.5663484534197014e-05, "loss": 1.6715, "step": 11977 }, { "epoch": 0.6676327963881612, "grad_norm": 0.5518960952758789, "learning_rate": 2.5655706604114844e-05, "loss": 1.5901, "step": 11978 }, { "epoch": 0.6676885346413244, "grad_norm": 0.547706127166748, "learning_rate": 2.5647929446091746e-05, "loss": 1.5659, "step": 11979 }, { "epoch": 0.6677442728944875, "grad_norm": 0.5523556470870972, "learning_rate": 2.5640153060374293e-05, "loss": 1.7662, "step": 11980 }, { "epoch": 0.6678000111476506, "grad_norm": 0.5731891989707947, "learning_rate": 2.563237744720921e-05, "loss": 1.5056, "step": 11981 }, { "epoch": 0.6678557494008138, "grad_norm": 0.5777943134307861, "learning_rate": 2.5624602606843017e-05, "loss": 1.5221, "step": 11982 }, { "epoch": 0.6679114876539769, "grad_norm": 0.5800961852073669, "learning_rate": 2.561682853952231e-05, "loss": 1.6958, "step": 11983 }, { "epoch": 0.6679672259071401, "grad_norm": 0.5744274854660034, "learning_rate": 2.560905524549364e-05, "loss": 1.7868, "step": 11984 }, { "epoch": 0.6680229641603033, "grad_norm": 0.5199480056762695, "learning_rate": 2.5601282725003522e-05, "loss": 1.3076, "step": 11985 }, { "epoch": 0.6680787024134663, "grad_norm": 0.6389720439910889, "learning_rate": 2.5593510978298486e-05, "loss": 1.9535, "step": 11986 }, { "epoch": 0.6681344406666295, "grad_norm": 0.5399093627929688, "learning_rate": 2.5585740005624947e-05, "loss": 1.5529, "step": 11987 }, { "epoch": 0.6681901789197926, "grad_norm": 0.5350346565246582, "learning_rate": 2.5577969807229422e-05, "loss": 1.5102, "step": 11988 }, { "epoch": 0.6682459171729558, "grad_norm": 0.5301445126533508, "learning_rate": 2.557020038335829e-05, "loss": 1.5665, "step": 11989 }, { "epoch": 0.668301655426119, "grad_norm": 0.5629336833953857, "learning_rate": 2.5562431734257987e-05, "loss": 1.5837, "step": 11990 }, { "epoch": 0.668357393679282, "grad_norm": 0.6423056125640869, "learning_rate": 2.5554663860174823e-05, "loss": 1.673, "step": 11991 }, { "epoch": 0.6684131319324452, "grad_norm": 0.5791043639183044, "learning_rate": 2.5546896761355216e-05, "loss": 1.6438, "step": 11992 }, { "epoch": 0.6684688701856084, "grad_norm": 0.5836037397384644, "learning_rate": 2.5539130438045494e-05, "loss": 1.6323, "step": 11993 }, { "epoch": 0.6685246084387715, "grad_norm": 0.56341153383255, "learning_rate": 2.5531364890491916e-05, "loss": 1.6362, "step": 11994 }, { "epoch": 0.6685803466919347, "grad_norm": 0.5776152014732361, "learning_rate": 2.5523600118940784e-05, "loss": 1.7689, "step": 11995 }, { "epoch": 0.6686360849450979, "grad_norm": 0.5938311815261841, "learning_rate": 2.551583612363835e-05, "loss": 1.7699, "step": 11996 }, { "epoch": 0.6686918231982609, "grad_norm": 0.5410308837890625, "learning_rate": 2.550807290483086e-05, "loss": 1.6291, "step": 11997 }, { "epoch": 0.6687475614514241, "grad_norm": 0.5281055569648743, "learning_rate": 2.5500310462764458e-05, "loss": 1.4974, "step": 11998 }, { "epoch": 0.6688032997045873, "grad_norm": 0.5797048211097717, "learning_rate": 2.549254879768539e-05, "loss": 1.742, "step": 11999 }, { "epoch": 0.6688590379577504, "grad_norm": 0.5304363369941711, "learning_rate": 2.54847879098398e-05, "loss": 1.6044, "step": 12000 }, { "epoch": 0.6689147762109136, "grad_norm": 0.5916433334350586, "learning_rate": 2.5477027799473786e-05, "loss": 1.6588, "step": 12001 }, { "epoch": 0.6689705144640767, "grad_norm": 0.56605064868927, "learning_rate": 2.5469268466833474e-05, "loss": 1.5768, "step": 12002 }, { "epoch": 0.6690262527172398, "grad_norm": 0.5318643450737, "learning_rate": 2.5461509912164937e-05, "loss": 1.7623, "step": 12003 }, { "epoch": 0.669081990970403, "grad_norm": 0.5695320963859558, "learning_rate": 2.5453752135714237e-05, "loss": 1.6399, "step": 12004 }, { "epoch": 0.6691377292235662, "grad_norm": 0.526165783405304, "learning_rate": 2.5445995137727428e-05, "loss": 1.6047, "step": 12005 }, { "epoch": 0.6691934674767293, "grad_norm": 0.5636263489723206, "learning_rate": 2.5438238918450453e-05, "loss": 1.6891, "step": 12006 }, { "epoch": 0.6692492057298924, "grad_norm": 0.6026769876480103, "learning_rate": 2.5430483478129374e-05, "loss": 1.6847, "step": 12007 }, { "epoch": 0.6693049439830556, "grad_norm": 0.5717709064483643, "learning_rate": 2.5422728817010088e-05, "loss": 1.7936, "step": 12008 }, { "epoch": 0.6693606822362187, "grad_norm": 0.5113479495048523, "learning_rate": 2.5414974935338553e-05, "loss": 1.5723, "step": 12009 }, { "epoch": 0.6694164204893819, "grad_norm": 0.5642980337142944, "learning_rate": 2.540722183336066e-05, "loss": 1.9559, "step": 12010 }, { "epoch": 0.669472158742545, "grad_norm": 0.5360389947891235, "learning_rate": 2.5399469511322316e-05, "loss": 1.5012, "step": 12011 }, { "epoch": 0.6695278969957081, "grad_norm": 0.5451308488845825, "learning_rate": 2.5391717969469387e-05, "loss": 1.7371, "step": 12012 }, { "epoch": 0.6695836352488713, "grad_norm": 0.5800293684005737, "learning_rate": 2.5383967208047642e-05, "loss": 1.5257, "step": 12013 }, { "epoch": 0.6696393735020344, "grad_norm": 0.5658344030380249, "learning_rate": 2.5376217227302985e-05, "loss": 1.5169, "step": 12014 }, { "epoch": 0.6696951117551976, "grad_norm": 0.5860779881477356, "learning_rate": 2.5368468027481125e-05, "loss": 1.763, "step": 12015 }, { "epoch": 0.6697508500083608, "grad_norm": 0.5342041850090027, "learning_rate": 2.5360719608827843e-05, "loss": 1.5319, "step": 12016 }, { "epoch": 0.6698065882615238, "grad_norm": 0.5477134585380554, "learning_rate": 2.535297197158889e-05, "loss": 1.5549, "step": 12017 }, { "epoch": 0.669862326514687, "grad_norm": 0.5928038358688354, "learning_rate": 2.5345225116009952e-05, "loss": 1.6838, "step": 12018 }, { "epoch": 0.6699180647678502, "grad_norm": 0.5327808260917664, "learning_rate": 2.5337479042336755e-05, "loss": 1.4308, "step": 12019 }, { "epoch": 0.6699738030210133, "grad_norm": 0.61540687084198, "learning_rate": 2.5329733750814903e-05, "loss": 1.7362, "step": 12020 }, { "epoch": 0.6700295412741765, "grad_norm": 0.5844860672950745, "learning_rate": 2.532198924169006e-05, "loss": 1.7953, "step": 12021 }, { "epoch": 0.6700852795273397, "grad_norm": 0.6544490456581116, "learning_rate": 2.531424551520784e-05, "loss": 1.9502, "step": 12022 }, { "epoch": 0.6701410177805027, "grad_norm": 0.5588658452033997, "learning_rate": 2.5306502571613843e-05, "loss": 1.5504, "step": 12023 }, { "epoch": 0.6701967560336659, "grad_norm": 0.5695081949234009, "learning_rate": 2.5298760411153567e-05, "loss": 1.5688, "step": 12024 }, { "epoch": 0.6702524942868291, "grad_norm": 0.5447390675544739, "learning_rate": 2.5291019034072616e-05, "loss": 1.6796, "step": 12025 }, { "epoch": 0.6703082325399922, "grad_norm": 0.6449052095413208, "learning_rate": 2.52832784406165e-05, "loss": 1.8635, "step": 12026 }, { "epoch": 0.6703639707931554, "grad_norm": 0.5398309230804443, "learning_rate": 2.5275538631030658e-05, "loss": 1.5262, "step": 12027 }, { "epoch": 0.6704197090463185, "grad_norm": 0.5473873615264893, "learning_rate": 2.5267799605560584e-05, "loss": 1.7629, "step": 12028 }, { "epoch": 0.6704754472994816, "grad_norm": 0.5728017687797546, "learning_rate": 2.5260061364451703e-05, "loss": 1.7794, "step": 12029 }, { "epoch": 0.6705311855526448, "grad_norm": 0.5419506430625916, "learning_rate": 2.5252323907949448e-05, "loss": 1.5772, "step": 12030 }, { "epoch": 0.670586923805808, "grad_norm": 0.5533862113952637, "learning_rate": 2.5244587236299177e-05, "loss": 1.7015, "step": 12031 }, { "epoch": 0.670642662058971, "grad_norm": 0.5854989290237427, "learning_rate": 2.5236851349746245e-05, "loss": 1.5979, "step": 12032 }, { "epoch": 0.6706984003121342, "grad_norm": 0.6313944458961487, "learning_rate": 2.5229116248536044e-05, "loss": 1.6631, "step": 12033 }, { "epoch": 0.6707541385652973, "grad_norm": 0.5602930784225464, "learning_rate": 2.5221381932913835e-05, "loss": 1.5784, "step": 12034 }, { "epoch": 0.6708098768184605, "grad_norm": 0.5706256628036499, "learning_rate": 2.5213648403124918e-05, "loss": 1.5644, "step": 12035 }, { "epoch": 0.6708656150716237, "grad_norm": 0.6049339175224304, "learning_rate": 2.520591565941456e-05, "loss": 1.7091, "step": 12036 }, { "epoch": 0.6709213533247868, "grad_norm": 0.49709540605545044, "learning_rate": 2.5198183702028e-05, "loss": 1.4581, "step": 12037 }, { "epoch": 0.6709770915779499, "grad_norm": 0.5363261103630066, "learning_rate": 2.5190452531210463e-05, "loss": 1.4818, "step": 12038 }, { "epoch": 0.6710328298311131, "grad_norm": 0.5210326910018921, "learning_rate": 2.5182722147207088e-05, "loss": 1.5246, "step": 12039 }, { "epoch": 0.6710885680842762, "grad_norm": 0.5645127892494202, "learning_rate": 2.517499255026311e-05, "loss": 1.5696, "step": 12040 }, { "epoch": 0.6711443063374394, "grad_norm": 0.5957170128822327, "learning_rate": 2.5167263740623607e-05, "loss": 1.4597, "step": 12041 }, { "epoch": 0.6712000445906026, "grad_norm": 0.5939059853553772, "learning_rate": 2.5159535718533717e-05, "loss": 1.6307, "step": 12042 }, { "epoch": 0.6712557828437656, "grad_norm": 0.6196640133857727, "learning_rate": 2.5151808484238525e-05, "loss": 1.6608, "step": 12043 }, { "epoch": 0.6713115210969288, "grad_norm": 0.5694495439529419, "learning_rate": 2.5144082037983085e-05, "loss": 1.6391, "step": 12044 }, { "epoch": 0.671367259350092, "grad_norm": 0.5964381098747253, "learning_rate": 2.513635638001247e-05, "loss": 1.6466, "step": 12045 }, { "epoch": 0.6714229976032551, "grad_norm": 0.5447733402252197, "learning_rate": 2.5128631510571643e-05, "loss": 1.6352, "step": 12046 }, { "epoch": 0.6714787358564183, "grad_norm": 0.5714775919914246, "learning_rate": 2.5120907429905617e-05, "loss": 1.6743, "step": 12047 }, { "epoch": 0.6715344741095814, "grad_norm": 0.5910129547119141, "learning_rate": 2.511318413825935e-05, "loss": 1.7514, "step": 12048 }, { "epoch": 0.6715902123627445, "grad_norm": 0.5670276284217834, "learning_rate": 2.5105461635877797e-05, "loss": 1.2842, "step": 12049 }, { "epoch": 0.6716459506159077, "grad_norm": 0.5352841019630432, "learning_rate": 2.509773992300582e-05, "loss": 1.5662, "step": 12050 }, { "epoch": 0.6717016888690709, "grad_norm": 0.5886231064796448, "learning_rate": 2.5090018999888365e-05, "loss": 1.7842, "step": 12051 }, { "epoch": 0.671757427122234, "grad_norm": 0.5294016599655151, "learning_rate": 2.508229886677029e-05, "loss": 1.5996, "step": 12052 }, { "epoch": 0.6718131653753971, "grad_norm": 0.5378506779670715, "learning_rate": 2.507457952389639e-05, "loss": 1.7818, "step": 12053 }, { "epoch": 0.6718689036285603, "grad_norm": 0.5751243233680725, "learning_rate": 2.506686097151151e-05, "loss": 1.8029, "step": 12054 }, { "epoch": 0.6719246418817234, "grad_norm": 0.6077497005462646, "learning_rate": 2.5059143209860425e-05, "loss": 1.8815, "step": 12055 }, { "epoch": 0.6719803801348866, "grad_norm": 0.5650768876075745, "learning_rate": 2.5051426239187918e-05, "loss": 1.6726, "step": 12056 }, { "epoch": 0.6720361183880497, "grad_norm": 0.5476177334785461, "learning_rate": 2.5043710059738702e-05, "loss": 1.5366, "step": 12057 }, { "epoch": 0.6720918566412128, "grad_norm": 0.58171147108078, "learning_rate": 2.503599467175747e-05, "loss": 1.8958, "step": 12058 }, { "epoch": 0.672147594894376, "grad_norm": 0.562774121761322, "learning_rate": 2.5028280075488973e-05, "loss": 1.7533, "step": 12059 }, { "epoch": 0.6722033331475391, "grad_norm": 0.542335569858551, "learning_rate": 2.5020566271177824e-05, "loss": 1.608, "step": 12060 }, { "epoch": 0.6722590714007023, "grad_norm": 0.5780958533287048, "learning_rate": 2.501285325906867e-05, "loss": 1.5929, "step": 12061 }, { "epoch": 0.6723148096538655, "grad_norm": 0.553331196308136, "learning_rate": 2.500514103940613e-05, "loss": 1.7366, "step": 12062 }, { "epoch": 0.6723705479070285, "grad_norm": 0.5768744349479675, "learning_rate": 2.499742961243478e-05, "loss": 1.9978, "step": 12063 }, { "epoch": 0.6724262861601917, "grad_norm": 0.580155074596405, "learning_rate": 2.4989718978399207e-05, "loss": 1.6921, "step": 12064 }, { "epoch": 0.6724820244133549, "grad_norm": 0.5232993364334106, "learning_rate": 2.4982009137543894e-05, "loss": 1.5997, "step": 12065 }, { "epoch": 0.672537762666518, "grad_norm": 0.5684017539024353, "learning_rate": 2.4974300090113422e-05, "loss": 1.7996, "step": 12066 }, { "epoch": 0.6725935009196812, "grad_norm": 0.5195304751396179, "learning_rate": 2.4966591836352222e-05, "loss": 1.5594, "step": 12067 }, { "epoch": 0.6726492391728444, "grad_norm": 0.6149779558181763, "learning_rate": 2.495888437650477e-05, "loss": 1.7468, "step": 12068 }, { "epoch": 0.6727049774260074, "grad_norm": 0.5937604904174805, "learning_rate": 2.495117771081551e-05, "loss": 1.8914, "step": 12069 }, { "epoch": 0.6727607156791706, "grad_norm": 0.5604000091552734, "learning_rate": 2.494347183952885e-05, "loss": 1.6262, "step": 12070 }, { "epoch": 0.6728164539323338, "grad_norm": 0.5344957113265991, "learning_rate": 2.493576676288919e-05, "loss": 1.511, "step": 12071 }, { "epoch": 0.6728721921854969, "grad_norm": 0.5278180837631226, "learning_rate": 2.4928062481140856e-05, "loss": 1.4848, "step": 12072 }, { "epoch": 0.6729279304386601, "grad_norm": 0.5628829598426819, "learning_rate": 2.4920358994528198e-05, "loss": 1.7329, "step": 12073 }, { "epoch": 0.6729836686918232, "grad_norm": 0.6097002625465393, "learning_rate": 2.4912656303295535e-05, "loss": 1.7582, "step": 12074 }, { "epoch": 0.6730394069449863, "grad_norm": 0.5815702080726624, "learning_rate": 2.4904954407687153e-05, "loss": 1.8324, "step": 12075 }, { "epoch": 0.6730951451981495, "grad_norm": 0.5267353653907776, "learning_rate": 2.4897253307947272e-05, "loss": 1.5541, "step": 12076 }, { "epoch": 0.6731508834513127, "grad_norm": 0.5497151613235474, "learning_rate": 2.4889553004320177e-05, "loss": 1.6382, "step": 12077 }, { "epoch": 0.6732066217044758, "grad_norm": 0.5231025815010071, "learning_rate": 2.488185349705007e-05, "loss": 1.6186, "step": 12078 }, { "epoch": 0.673262359957639, "grad_norm": 0.5598129630088806, "learning_rate": 2.487415478638111e-05, "loss": 1.6038, "step": 12079 }, { "epoch": 0.673318098210802, "grad_norm": 0.5685511827468872, "learning_rate": 2.4866456872557458e-05, "loss": 1.6577, "step": 12080 }, { "epoch": 0.6733738364639652, "grad_norm": 0.5880294442176819, "learning_rate": 2.4858759755823258e-05, "loss": 1.563, "step": 12081 }, { "epoch": 0.6734295747171284, "grad_norm": 0.5600868463516235, "learning_rate": 2.485106343642264e-05, "loss": 1.6662, "step": 12082 }, { "epoch": 0.6734853129702915, "grad_norm": 0.5627442002296448, "learning_rate": 2.4843367914599637e-05, "loss": 1.7301, "step": 12083 }, { "epoch": 0.6735410512234546, "grad_norm": 0.5312789082527161, "learning_rate": 2.4835673190598306e-05, "loss": 1.4877, "step": 12084 }, { "epoch": 0.6735967894766178, "grad_norm": 0.5254043936729431, "learning_rate": 2.482797926466275e-05, "loss": 1.6383, "step": 12085 }, { "epoch": 0.6736525277297809, "grad_norm": 0.5575996041297913, "learning_rate": 2.482028613703691e-05, "loss": 1.7706, "step": 12086 }, { "epoch": 0.6737082659829441, "grad_norm": 0.614926278591156, "learning_rate": 2.481259380796478e-05, "loss": 1.5105, "step": 12087 }, { "epoch": 0.6737640042361073, "grad_norm": 0.5587199926376343, "learning_rate": 2.480490227769032e-05, "loss": 1.7255, "step": 12088 }, { "epoch": 0.6738197424892703, "grad_norm": 0.5892671346664429, "learning_rate": 2.4797211546457465e-05, "loss": 1.7097, "step": 12089 }, { "epoch": 0.6738754807424335, "grad_norm": 0.6368154883384705, "learning_rate": 2.4789521614510143e-05, "loss": 1.8793, "step": 12090 }, { "epoch": 0.6739312189955967, "grad_norm": 0.5571451187133789, "learning_rate": 2.478183248209216e-05, "loss": 1.7683, "step": 12091 }, { "epoch": 0.6739869572487598, "grad_norm": 0.5345653891563416, "learning_rate": 2.4774144149447465e-05, "loss": 1.3473, "step": 12092 }, { "epoch": 0.674042695501923, "grad_norm": 0.649505078792572, "learning_rate": 2.4766456616819818e-05, "loss": 1.9293, "step": 12093 }, { "epoch": 0.6740984337550862, "grad_norm": 0.5354018807411194, "learning_rate": 2.4758769884453043e-05, "loss": 1.5863, "step": 12094 }, { "epoch": 0.6741541720082492, "grad_norm": 0.6080323457717896, "learning_rate": 2.4751083952590926e-05, "loss": 1.7642, "step": 12095 }, { "epoch": 0.6742099102614124, "grad_norm": 0.5298397541046143, "learning_rate": 2.474339882147721e-05, "loss": 1.4794, "step": 12096 }, { "epoch": 0.6742656485145756, "grad_norm": 0.5831593871116638, "learning_rate": 2.4735714491355643e-05, "loss": 1.584, "step": 12097 }, { "epoch": 0.6743213867677387, "grad_norm": 0.6232854723930359, "learning_rate": 2.47280309624699e-05, "loss": 1.828, "step": 12098 }, { "epoch": 0.6743771250209019, "grad_norm": 0.5947305560112, "learning_rate": 2.4720348235063666e-05, "loss": 1.7087, "step": 12099 }, { "epoch": 0.674432863274065, "grad_norm": 0.5919405221939087, "learning_rate": 2.4712666309380595e-05, "loss": 1.7922, "step": 12100 }, { "epoch": 0.6744886015272281, "grad_norm": 0.5434198975563049, "learning_rate": 2.470498518566433e-05, "loss": 1.3383, "step": 12101 }, { "epoch": 0.6745443397803913, "grad_norm": 0.5246424674987793, "learning_rate": 2.469730486415842e-05, "loss": 1.4675, "step": 12102 }, { "epoch": 0.6746000780335544, "grad_norm": 0.5592208504676819, "learning_rate": 2.468962534510649e-05, "loss": 1.7807, "step": 12103 }, { "epoch": 0.6746558162867176, "grad_norm": 0.5231202244758606, "learning_rate": 2.46819466287521e-05, "loss": 1.5491, "step": 12104 }, { "epoch": 0.6747115545398807, "grad_norm": 0.5387272834777832, "learning_rate": 2.467426871533873e-05, "loss": 1.7326, "step": 12105 }, { "epoch": 0.6747672927930438, "grad_norm": 0.6031918525695801, "learning_rate": 2.466659160510989e-05, "loss": 1.7699, "step": 12106 }, { "epoch": 0.674823031046207, "grad_norm": 0.548579752445221, "learning_rate": 2.4658915298309066e-05, "loss": 1.7571, "step": 12107 }, { "epoch": 0.6748787692993702, "grad_norm": 0.5778599381446838, "learning_rate": 2.4651239795179713e-05, "loss": 1.636, "step": 12108 }, { "epoch": 0.6749345075525333, "grad_norm": 0.5563526153564453, "learning_rate": 2.4643565095965204e-05, "loss": 1.4918, "step": 12109 }, { "epoch": 0.6749902458056964, "grad_norm": 0.5569801330566406, "learning_rate": 2.4635891200908996e-05, "loss": 1.6423, "step": 12110 }, { "epoch": 0.6750459840588596, "grad_norm": 0.546291172504425, "learning_rate": 2.4628218110254452e-05, "loss": 1.644, "step": 12111 }, { "epoch": 0.6751017223120227, "grad_norm": 0.5411151051521301, "learning_rate": 2.462054582424488e-05, "loss": 1.5569, "step": 12112 }, { "epoch": 0.6751574605651859, "grad_norm": 0.5745245218276978, "learning_rate": 2.4612874343123626e-05, "loss": 1.5434, "step": 12113 }, { "epoch": 0.6752131988183491, "grad_norm": 0.5502985715866089, "learning_rate": 2.460520366713398e-05, "loss": 1.6833, "step": 12114 }, { "epoch": 0.6752689370715121, "grad_norm": 0.6116489768028259, "learning_rate": 2.4597533796519206e-05, "loss": 1.7659, "step": 12115 }, { "epoch": 0.6753246753246753, "grad_norm": 0.5902003049850464, "learning_rate": 2.4589864731522578e-05, "loss": 1.6773, "step": 12116 }, { "epoch": 0.6753804135778385, "grad_norm": 0.7062128186225891, "learning_rate": 2.4582196472387255e-05, "loss": 1.7786, "step": 12117 }, { "epoch": 0.6754361518310016, "grad_norm": 0.5624451637268066, "learning_rate": 2.4574529019356494e-05, "loss": 1.7779, "step": 12118 }, { "epoch": 0.6754918900841648, "grad_norm": 0.5526938438415527, "learning_rate": 2.4566862372673415e-05, "loss": 1.7638, "step": 12119 }, { "epoch": 0.675547628337328, "grad_norm": 0.589867353439331, "learning_rate": 2.4559196532581174e-05, "loss": 1.6782, "step": 12120 }, { "epoch": 0.675603366590491, "grad_norm": 0.5674148201942444, "learning_rate": 2.4551531499322895e-05, "loss": 1.5979, "step": 12121 }, { "epoch": 0.6756591048436542, "grad_norm": 0.5661038756370544, "learning_rate": 2.4543867273141658e-05, "loss": 1.5279, "step": 12122 }, { "epoch": 0.6757148430968174, "grad_norm": 0.5659511089324951, "learning_rate": 2.4536203854280553e-05, "loss": 1.5487, "step": 12123 }, { "epoch": 0.6757705813499805, "grad_norm": 0.5999061465263367, "learning_rate": 2.452854124298257e-05, "loss": 1.6329, "step": 12124 }, { "epoch": 0.6758263196031437, "grad_norm": 0.5986047983169556, "learning_rate": 2.4520879439490763e-05, "loss": 1.8838, "step": 12125 }, { "epoch": 0.6758820578563067, "grad_norm": 0.6163796186447144, "learning_rate": 2.45132184440481e-05, "loss": 1.8276, "step": 12126 }, { "epoch": 0.6759377961094699, "grad_norm": 0.6057443618774414, "learning_rate": 2.4505558256897564e-05, "loss": 1.748, "step": 12127 }, { "epoch": 0.6759935343626331, "grad_norm": 0.5554170608520508, "learning_rate": 2.449789887828205e-05, "loss": 1.5513, "step": 12128 }, { "epoch": 0.6760492726157962, "grad_norm": 0.5506255030632019, "learning_rate": 2.4490240308444507e-05, "loss": 1.5345, "step": 12129 }, { "epoch": 0.6761050108689594, "grad_norm": 0.5533109307289124, "learning_rate": 2.448258254762783e-05, "loss": 1.754, "step": 12130 }, { "epoch": 0.6761607491221225, "grad_norm": 0.5812035202980042, "learning_rate": 2.447492559607484e-05, "loss": 1.5279, "step": 12131 }, { "epoch": 0.6762164873752856, "grad_norm": 0.5917302370071411, "learning_rate": 2.4467269454028386e-05, "loss": 1.8167, "step": 12132 }, { "epoch": 0.6762722256284488, "grad_norm": 0.5987018346786499, "learning_rate": 2.4459614121731283e-05, "loss": 1.6938, "step": 12133 }, { "epoch": 0.676327963881612, "grad_norm": 0.5785610675811768, "learning_rate": 2.445195959942632e-05, "loss": 1.6087, "step": 12134 }, { "epoch": 0.6763837021347751, "grad_norm": 0.5947317481040955, "learning_rate": 2.4444305887356218e-05, "loss": 1.8278, "step": 12135 }, { "epoch": 0.6764394403879382, "grad_norm": 0.562552273273468, "learning_rate": 2.4436652985763742e-05, "loss": 1.6288, "step": 12136 }, { "epoch": 0.6764951786411014, "grad_norm": 0.5087525248527527, "learning_rate": 2.4429000894891606e-05, "loss": 1.4703, "step": 12137 }, { "epoch": 0.6765509168942645, "grad_norm": 0.523859977722168, "learning_rate": 2.4421349614982464e-05, "loss": 1.435, "step": 12138 }, { "epoch": 0.6766066551474277, "grad_norm": 0.5445376038551331, "learning_rate": 2.441369914627897e-05, "loss": 1.5009, "step": 12139 }, { "epoch": 0.6766623934005909, "grad_norm": 0.555959939956665, "learning_rate": 2.4406049489023763e-05, "loss": 1.6002, "step": 12140 }, { "epoch": 0.6767181316537539, "grad_norm": 0.6597177982330322, "learning_rate": 2.439840064345944e-05, "loss": 2.0263, "step": 12141 }, { "epoch": 0.6767738699069171, "grad_norm": 0.5682998299598694, "learning_rate": 2.4390752609828603e-05, "loss": 1.5754, "step": 12142 }, { "epoch": 0.6768296081600803, "grad_norm": 0.5617828369140625, "learning_rate": 2.4383105388373745e-05, "loss": 1.6967, "step": 12143 }, { "epoch": 0.6768853464132434, "grad_norm": 0.544691801071167, "learning_rate": 2.4375458979337463e-05, "loss": 1.4722, "step": 12144 }, { "epoch": 0.6769410846664066, "grad_norm": 0.5128159523010254, "learning_rate": 2.4367813382962203e-05, "loss": 1.567, "step": 12145 }, { "epoch": 0.6769968229195698, "grad_norm": 0.5488656163215637, "learning_rate": 2.436016859949046e-05, "loss": 1.7543, "step": 12146 }, { "epoch": 0.6770525611727328, "grad_norm": 0.5271493196487427, "learning_rate": 2.435252462916467e-05, "loss": 1.5287, "step": 12147 }, { "epoch": 0.677108299425896, "grad_norm": 0.5668809413909912, "learning_rate": 2.4344881472227264e-05, "loss": 1.6324, "step": 12148 }, { "epoch": 0.6771640376790591, "grad_norm": 0.6095489263534546, "learning_rate": 2.4337239128920662e-05, "loss": 1.8157, "step": 12149 }, { "epoch": 0.6772197759322223, "grad_norm": 0.5673229098320007, "learning_rate": 2.4329597599487192e-05, "loss": 1.7171, "step": 12150 }, { "epoch": 0.6772755141853855, "grad_norm": 0.5574488043785095, "learning_rate": 2.432195688416921e-05, "loss": 1.816, "step": 12151 }, { "epoch": 0.6773312524385485, "grad_norm": 0.5575489401817322, "learning_rate": 2.431431698320905e-05, "loss": 1.594, "step": 12152 }, { "epoch": 0.6773869906917117, "grad_norm": 0.5522165894508362, "learning_rate": 2.4306677896849018e-05, "loss": 1.6131, "step": 12153 }, { "epoch": 0.6774427289448749, "grad_norm": 0.5478757619857788, "learning_rate": 2.4299039625331315e-05, "loss": 1.5579, "step": 12154 }, { "epoch": 0.677498467198038, "grad_norm": 0.5960223078727722, "learning_rate": 2.4291402168898252e-05, "loss": 1.7149, "step": 12155 }, { "epoch": 0.6775542054512012, "grad_norm": 0.5895914435386658, "learning_rate": 2.4283765527792034e-05, "loss": 1.7342, "step": 12156 }, { "epoch": 0.6776099437043643, "grad_norm": 0.5667082071304321, "learning_rate": 2.4276129702254826e-05, "loss": 1.5972, "step": 12157 }, { "epoch": 0.6776656819575274, "grad_norm": 0.6155690550804138, "learning_rate": 2.4268494692528798e-05, "loss": 1.8824, "step": 12158 }, { "epoch": 0.6777214202106906, "grad_norm": 0.5950416922569275, "learning_rate": 2.4260860498856098e-05, "loss": 1.6597, "step": 12159 }, { "epoch": 0.6777771584638538, "grad_norm": 0.5567697882652283, "learning_rate": 2.425322712147885e-05, "loss": 1.5585, "step": 12160 }, { "epoch": 0.6778328967170169, "grad_norm": 0.5798008441925049, "learning_rate": 2.4245594560639084e-05, "loss": 1.5505, "step": 12161 }, { "epoch": 0.67788863497018, "grad_norm": 0.5718047022819519, "learning_rate": 2.4237962816578918e-05, "loss": 1.5679, "step": 12162 }, { "epoch": 0.6779443732233432, "grad_norm": 0.597363293170929, "learning_rate": 2.4230331889540393e-05, "loss": 1.7587, "step": 12163 }, { "epoch": 0.6780001114765063, "grad_norm": 0.6661909818649292, "learning_rate": 2.4222701779765467e-05, "loss": 1.3722, "step": 12164 }, { "epoch": 0.6780558497296695, "grad_norm": 0.574291467666626, "learning_rate": 2.4215072487496153e-05, "loss": 1.6003, "step": 12165 }, { "epoch": 0.6781115879828327, "grad_norm": 0.5555253624916077, "learning_rate": 2.4207444012974402e-05, "loss": 1.8517, "step": 12166 }, { "epoch": 0.6781673262359957, "grad_norm": 0.5446553826332092, "learning_rate": 2.4199816356442166e-05, "loss": 1.5125, "step": 12167 }, { "epoch": 0.6782230644891589, "grad_norm": 0.5693860054016113, "learning_rate": 2.419218951814131e-05, "loss": 1.7082, "step": 12168 }, { "epoch": 0.6782788027423221, "grad_norm": 0.5330381989479065, "learning_rate": 2.4184563498313712e-05, "loss": 1.6407, "step": 12169 }, { "epoch": 0.6783345409954852, "grad_norm": 0.5500601530075073, "learning_rate": 2.4176938297201286e-05, "loss": 1.6078, "step": 12170 }, { "epoch": 0.6783902792486484, "grad_norm": 0.5614216923713684, "learning_rate": 2.4169313915045795e-05, "loss": 1.6395, "step": 12171 }, { "epoch": 0.6784460175018114, "grad_norm": 0.548337459564209, "learning_rate": 2.4161690352089067e-05, "loss": 1.6889, "step": 12172 }, { "epoch": 0.6785017557549746, "grad_norm": 0.535300076007843, "learning_rate": 2.4154067608572874e-05, "loss": 1.663, "step": 12173 }, { "epoch": 0.6785574940081378, "grad_norm": 0.5819778442382812, "learning_rate": 2.414644568473896e-05, "loss": 1.8053, "step": 12174 }, { "epoch": 0.6786132322613009, "grad_norm": 0.5482204556465149, "learning_rate": 2.413882458082907e-05, "loss": 1.6296, "step": 12175 }, { "epoch": 0.6786689705144641, "grad_norm": 0.5731914043426514, "learning_rate": 2.4131204297084875e-05, "loss": 1.6391, "step": 12176 }, { "epoch": 0.6787247087676272, "grad_norm": 0.6898718476295471, "learning_rate": 2.4123584833748042e-05, "loss": 1.702, "step": 12177 }, { "epoch": 0.6787804470207903, "grad_norm": 0.5492184162139893, "learning_rate": 2.4115966191060236e-05, "loss": 1.7373, "step": 12178 }, { "epoch": 0.6788361852739535, "grad_norm": 0.5639967322349548, "learning_rate": 2.4108348369263084e-05, "loss": 1.6233, "step": 12179 }, { "epoch": 0.6788919235271167, "grad_norm": 0.5644584894180298, "learning_rate": 2.4100731368598123e-05, "loss": 1.6422, "step": 12180 }, { "epoch": 0.6789476617802798, "grad_norm": 0.5759285092353821, "learning_rate": 2.409311518930698e-05, "loss": 1.6933, "step": 12181 }, { "epoch": 0.679003400033443, "grad_norm": 0.5666438937187195, "learning_rate": 2.4085499831631197e-05, "loss": 1.6532, "step": 12182 }, { "epoch": 0.6790591382866061, "grad_norm": 0.5786770582199097, "learning_rate": 2.4077885295812248e-05, "loss": 1.7707, "step": 12183 }, { "epoch": 0.6791148765397692, "grad_norm": 0.5363991260528564, "learning_rate": 2.4070271582091642e-05, "loss": 1.6073, "step": 12184 }, { "epoch": 0.6791706147929324, "grad_norm": 0.5650521516799927, "learning_rate": 2.406265869071084e-05, "loss": 1.6806, "step": 12185 }, { "epoch": 0.6792263530460956, "grad_norm": 0.5003963708877563, "learning_rate": 2.4055046621911294e-05, "loss": 1.4209, "step": 12186 }, { "epoch": 0.6792820912992587, "grad_norm": 0.6042050123214722, "learning_rate": 2.4047435375934363e-05, "loss": 1.7709, "step": 12187 }, { "epoch": 0.6793378295524218, "grad_norm": 0.5666334629058838, "learning_rate": 2.4039824953021488e-05, "loss": 1.6503, "step": 12188 }, { "epoch": 0.679393567805585, "grad_norm": 0.5441558957099915, "learning_rate": 2.403221535341403e-05, "loss": 1.6457, "step": 12189 }, { "epoch": 0.6794493060587481, "grad_norm": 0.5805729031562805, "learning_rate": 2.402460657735327e-05, "loss": 1.6228, "step": 12190 }, { "epoch": 0.6795050443119113, "grad_norm": 0.5899102687835693, "learning_rate": 2.401699862508055e-05, "loss": 1.8148, "step": 12191 }, { "epoch": 0.6795607825650745, "grad_norm": 0.5872830152511597, "learning_rate": 2.4009391496837143e-05, "loss": 1.8663, "step": 12192 }, { "epoch": 0.6796165208182375, "grad_norm": 0.6101430058479309, "learning_rate": 2.4001785192864313e-05, "loss": 1.9342, "step": 12193 }, { "epoch": 0.6796722590714007, "grad_norm": 0.5709355473518372, "learning_rate": 2.3994179713403265e-05, "loss": 1.5368, "step": 12194 }, { "epoch": 0.6797279973245638, "grad_norm": 0.5578945875167847, "learning_rate": 2.398657505869519e-05, "loss": 1.4992, "step": 12195 }, { "epoch": 0.679783735577727, "grad_norm": 0.5690076351165771, "learning_rate": 2.3978971228981323e-05, "loss": 1.5838, "step": 12196 }, { "epoch": 0.6798394738308902, "grad_norm": 0.5869070291519165, "learning_rate": 2.397136822450276e-05, "loss": 1.8293, "step": 12197 }, { "epoch": 0.6798952120840532, "grad_norm": 0.617962121963501, "learning_rate": 2.3963766045500634e-05, "loss": 1.6752, "step": 12198 }, { "epoch": 0.6799509503372164, "grad_norm": 0.5052658915519714, "learning_rate": 2.3956164692216054e-05, "loss": 1.3596, "step": 12199 }, { "epoch": 0.6800066885903796, "grad_norm": 0.6124083995819092, "learning_rate": 2.394856416489008e-05, "loss": 1.7068, "step": 12200 }, { "epoch": 0.6800624268435427, "grad_norm": 0.5866329669952393, "learning_rate": 2.3940964463763778e-05, "loss": 1.7651, "step": 12201 }, { "epoch": 0.6801181650967059, "grad_norm": 0.5338658094406128, "learning_rate": 2.393336558907811e-05, "loss": 1.4577, "step": 12202 }, { "epoch": 0.680173903349869, "grad_norm": 0.5513985753059387, "learning_rate": 2.3925767541074147e-05, "loss": 1.6466, "step": 12203 }, { "epoch": 0.6802296416030321, "grad_norm": 0.5717636346817017, "learning_rate": 2.3918170319992793e-05, "loss": 1.6213, "step": 12204 }, { "epoch": 0.6802853798561953, "grad_norm": 0.5736023187637329, "learning_rate": 2.391057392607503e-05, "loss": 1.8441, "step": 12205 }, { "epoch": 0.6803411181093585, "grad_norm": 0.6372126936912537, "learning_rate": 2.3902978359561713e-05, "loss": 1.5251, "step": 12206 }, { "epoch": 0.6803968563625216, "grad_norm": 0.5528156757354736, "learning_rate": 2.3895383620693785e-05, "loss": 1.7265, "step": 12207 }, { "epoch": 0.6804525946156847, "grad_norm": 0.5714967250823975, "learning_rate": 2.3887789709712107e-05, "loss": 1.7238, "step": 12208 }, { "epoch": 0.6805083328688479, "grad_norm": 0.6046301126480103, "learning_rate": 2.388019662685747e-05, "loss": 1.7441, "step": 12209 }, { "epoch": 0.680564071122011, "grad_norm": 0.5244828462600708, "learning_rate": 2.3872604372370717e-05, "loss": 1.5733, "step": 12210 }, { "epoch": 0.6806198093751742, "grad_norm": 0.5506595373153687, "learning_rate": 2.386501294649261e-05, "loss": 1.8439, "step": 12211 }, { "epoch": 0.6806755476283374, "grad_norm": 0.5664464235305786, "learning_rate": 2.3857422349463944e-05, "loss": 1.6925, "step": 12212 }, { "epoch": 0.6807312858815004, "grad_norm": 0.5245766043663025, "learning_rate": 2.384983258152537e-05, "loss": 1.7101, "step": 12213 }, { "epoch": 0.6807870241346636, "grad_norm": 0.5500200986862183, "learning_rate": 2.3842243642917666e-05, "loss": 1.6757, "step": 12214 }, { "epoch": 0.6808427623878268, "grad_norm": 0.540712296962738, "learning_rate": 2.38346555338815e-05, "loss": 1.5788, "step": 12215 }, { "epoch": 0.6808985006409899, "grad_norm": 0.5923953652381897, "learning_rate": 2.382706825465749e-05, "loss": 1.5688, "step": 12216 }, { "epoch": 0.6809542388941531, "grad_norm": 0.559162437915802, "learning_rate": 2.3819481805486275e-05, "loss": 1.4546, "step": 12217 }, { "epoch": 0.6810099771473161, "grad_norm": 0.5854106545448303, "learning_rate": 2.3811896186608457e-05, "loss": 1.6903, "step": 12218 }, { "epoch": 0.6810657154004793, "grad_norm": 0.5242003798484802, "learning_rate": 2.3804311398264617e-05, "loss": 1.4833, "step": 12219 }, { "epoch": 0.6811214536536425, "grad_norm": 0.5815067291259766, "learning_rate": 2.379672744069527e-05, "loss": 1.6484, "step": 12220 }, { "epoch": 0.6811771919068056, "grad_norm": 0.5998220443725586, "learning_rate": 2.3789144314140938e-05, "loss": 1.7253, "step": 12221 }, { "epoch": 0.6812329301599688, "grad_norm": 0.5479490756988525, "learning_rate": 2.378156201884217e-05, "loss": 1.7107, "step": 12222 }, { "epoch": 0.681288668413132, "grad_norm": 0.5347844362258911, "learning_rate": 2.377398055503936e-05, "loss": 1.4336, "step": 12223 }, { "epoch": 0.681344406666295, "grad_norm": 0.5410118699073792, "learning_rate": 2.376639992297299e-05, "loss": 1.4867, "step": 12224 }, { "epoch": 0.6814001449194582, "grad_norm": 0.5688346028327942, "learning_rate": 2.3758820122883456e-05, "loss": 1.7883, "step": 12225 }, { "epoch": 0.6814558831726214, "grad_norm": 0.5206215381622314, "learning_rate": 2.375124115501115e-05, "loss": 1.7039, "step": 12226 }, { "epoch": 0.6815116214257845, "grad_norm": 0.5235037803649902, "learning_rate": 2.3743663019596456e-05, "loss": 1.592, "step": 12227 }, { "epoch": 0.6815673596789477, "grad_norm": 0.6111394762992859, "learning_rate": 2.3736085716879647e-05, "loss": 1.7615, "step": 12228 }, { "epoch": 0.6816230979321108, "grad_norm": 0.5806996822357178, "learning_rate": 2.3728509247101106e-05, "loss": 1.5715, "step": 12229 }, { "epoch": 0.6816788361852739, "grad_norm": 0.5856095552444458, "learning_rate": 2.3720933610501062e-05, "loss": 1.6945, "step": 12230 }, { "epoch": 0.6817345744384371, "grad_norm": 0.563182532787323, "learning_rate": 2.37133588073198e-05, "loss": 1.5869, "step": 12231 }, { "epoch": 0.6817903126916003, "grad_norm": 0.5626211166381836, "learning_rate": 2.3705784837797502e-05, "loss": 1.5898, "step": 12232 }, { "epoch": 0.6818460509447634, "grad_norm": 0.6541900634765625, "learning_rate": 2.3698211702174423e-05, "loss": 1.6013, "step": 12233 }, { "epoch": 0.6819017891979265, "grad_norm": 0.6194508075714111, "learning_rate": 2.3690639400690735e-05, "loss": 1.6214, "step": 12234 }, { "epoch": 0.6819575274510897, "grad_norm": 0.5775251984596252, "learning_rate": 2.368306793358655e-05, "loss": 1.6553, "step": 12235 }, { "epoch": 0.6820132657042528, "grad_norm": 0.570357620716095, "learning_rate": 2.3675497301102017e-05, "loss": 1.8637, "step": 12236 }, { "epoch": 0.682069003957416, "grad_norm": 0.5307665467262268, "learning_rate": 2.3667927503477222e-05, "loss": 1.3013, "step": 12237 }, { "epoch": 0.6821247422105792, "grad_norm": 0.6126335263252258, "learning_rate": 2.3660358540952265e-05, "loss": 1.7682, "step": 12238 }, { "epoch": 0.6821804804637422, "grad_norm": 0.5725120902061462, "learning_rate": 2.3652790413767122e-05, "loss": 1.7248, "step": 12239 }, { "epoch": 0.6822362187169054, "grad_norm": 0.5724482536315918, "learning_rate": 2.3645223122161868e-05, "loss": 1.6372, "step": 12240 }, { "epoch": 0.6822919569700685, "grad_norm": 0.5620321035385132, "learning_rate": 2.3637656666376505e-05, "loss": 1.5107, "step": 12241 }, { "epoch": 0.6823476952232317, "grad_norm": 0.6563616394996643, "learning_rate": 2.3630091046650944e-05, "loss": 1.9183, "step": 12242 }, { "epoch": 0.6824034334763949, "grad_norm": 0.5810117125511169, "learning_rate": 2.3622526263225152e-05, "loss": 1.7131, "step": 12243 }, { "epoch": 0.682459171729558, "grad_norm": 0.5808402895927429, "learning_rate": 2.3614962316339033e-05, "loss": 1.8323, "step": 12244 }, { "epoch": 0.6825149099827211, "grad_norm": 0.5127190351486206, "learning_rate": 2.3607399206232493e-05, "loss": 1.46, "step": 12245 }, { "epoch": 0.6825706482358843, "grad_norm": 0.5926672220230103, "learning_rate": 2.359983693314535e-05, "loss": 1.6821, "step": 12246 }, { "epoch": 0.6826263864890474, "grad_norm": 0.5927006602287292, "learning_rate": 2.359227549731744e-05, "loss": 1.5697, "step": 12247 }, { "epoch": 0.6826821247422106, "grad_norm": 0.5811200141906738, "learning_rate": 2.358471489898862e-05, "loss": 1.5941, "step": 12248 }, { "epoch": 0.6827378629953738, "grad_norm": 0.5455745458602905, "learning_rate": 2.3577155138398616e-05, "loss": 1.6352, "step": 12249 }, { "epoch": 0.6827936012485368, "grad_norm": 0.5447341203689575, "learning_rate": 2.3569596215787187e-05, "loss": 1.4396, "step": 12250 }, { "epoch": 0.6828493395017, "grad_norm": 0.619299590587616, "learning_rate": 2.356203813139407e-05, "loss": 1.645, "step": 12251 }, { "epoch": 0.6829050777548632, "grad_norm": 0.5267062783241272, "learning_rate": 2.3554480885458964e-05, "loss": 1.4556, "step": 12252 }, { "epoch": 0.6829608160080263, "grad_norm": 0.5284720063209534, "learning_rate": 2.354692447822155e-05, "loss": 1.4566, "step": 12253 }, { "epoch": 0.6830165542611895, "grad_norm": 0.5493966937065125, "learning_rate": 2.3539368909921423e-05, "loss": 1.6032, "step": 12254 }, { "epoch": 0.6830722925143526, "grad_norm": 0.5600801110267639, "learning_rate": 2.3531814180798277e-05, "loss": 1.5002, "step": 12255 }, { "epoch": 0.6831280307675157, "grad_norm": 0.5507102608680725, "learning_rate": 2.3524260291091642e-05, "loss": 1.8472, "step": 12256 }, { "epoch": 0.6831837690206789, "grad_norm": 0.5536506772041321, "learning_rate": 2.3516707241041132e-05, "loss": 1.7751, "step": 12257 }, { "epoch": 0.6832395072738421, "grad_norm": 0.5619939565658569, "learning_rate": 2.350915503088622e-05, "loss": 1.6577, "step": 12258 }, { "epoch": 0.6832952455270052, "grad_norm": 0.5918766260147095, "learning_rate": 2.3501603660866473e-05, "loss": 1.8244, "step": 12259 }, { "epoch": 0.6833509837801683, "grad_norm": 0.5610700845718384, "learning_rate": 2.3494053131221383e-05, "loss": 1.7442, "step": 12260 }, { "epoch": 0.6834067220333315, "grad_norm": 0.5872762799263, "learning_rate": 2.3486503442190373e-05, "loss": 1.5471, "step": 12261 }, { "epoch": 0.6834624602864946, "grad_norm": 0.5529700517654419, "learning_rate": 2.347895459401288e-05, "loss": 1.5732, "step": 12262 }, { "epoch": 0.6835181985396578, "grad_norm": 0.5814720988273621, "learning_rate": 2.3471406586928323e-05, "loss": 1.642, "step": 12263 }, { "epoch": 0.6835739367928209, "grad_norm": 0.5444031953811646, "learning_rate": 2.34638594211761e-05, "loss": 1.603, "step": 12264 }, { "epoch": 0.683629675045984, "grad_norm": 0.5756646990776062, "learning_rate": 2.3456313096995498e-05, "loss": 1.7664, "step": 12265 }, { "epoch": 0.6836854132991472, "grad_norm": 0.5543645620346069, "learning_rate": 2.34487676146259e-05, "loss": 1.4581, "step": 12266 }, { "epoch": 0.6837411515523103, "grad_norm": 0.590130090713501, "learning_rate": 2.344122297430661e-05, "loss": 1.6216, "step": 12267 }, { "epoch": 0.6837968898054735, "grad_norm": 0.5462613105773926, "learning_rate": 2.343367917627686e-05, "loss": 1.641, "step": 12268 }, { "epoch": 0.6838526280586367, "grad_norm": 0.5439698100090027, "learning_rate": 2.3426136220775917e-05, "loss": 1.5376, "step": 12269 }, { "epoch": 0.6839083663117997, "grad_norm": 0.557994544506073, "learning_rate": 2.3418594108042996e-05, "loss": 1.4804, "step": 12270 }, { "epoch": 0.6839641045649629, "grad_norm": 0.5578276515007019, "learning_rate": 2.3411052838317306e-05, "loss": 1.6446, "step": 12271 }, { "epoch": 0.6840198428181261, "grad_norm": 0.5396918654441833, "learning_rate": 2.340351241183798e-05, "loss": 1.6575, "step": 12272 }, { "epoch": 0.6840755810712892, "grad_norm": 0.548381507396698, "learning_rate": 2.339597282884415e-05, "loss": 1.4676, "step": 12273 }, { "epoch": 0.6841313193244524, "grad_norm": 0.5647532343864441, "learning_rate": 2.3388434089574985e-05, "loss": 1.6655, "step": 12274 }, { "epoch": 0.6841870575776156, "grad_norm": 0.5372335910797119, "learning_rate": 2.3380896194269518e-05, "loss": 1.5272, "step": 12275 }, { "epoch": 0.6842427958307786, "grad_norm": 0.6535205245018005, "learning_rate": 2.337335914316683e-05, "loss": 1.8014, "step": 12276 }, { "epoch": 0.6842985340839418, "grad_norm": 0.579191267490387, "learning_rate": 2.3365822936505938e-05, "loss": 1.7232, "step": 12277 }, { "epoch": 0.684354272337105, "grad_norm": 0.5299929976463318, "learning_rate": 2.3358287574525878e-05, "loss": 1.4039, "step": 12278 }, { "epoch": 0.6844100105902681, "grad_norm": 0.5980880856513977, "learning_rate": 2.335075305746558e-05, "loss": 1.6005, "step": 12279 }, { "epoch": 0.6844657488434313, "grad_norm": 0.5642344951629639, "learning_rate": 2.3343219385564003e-05, "loss": 1.5314, "step": 12280 }, { "epoch": 0.6845214870965944, "grad_norm": 0.5406617522239685, "learning_rate": 2.333568655906013e-05, "loss": 1.5298, "step": 12281 }, { "epoch": 0.6845772253497575, "grad_norm": 0.5585936307907104, "learning_rate": 2.332815457819279e-05, "loss": 1.6174, "step": 12282 }, { "epoch": 0.6846329636029207, "grad_norm": 0.6313422322273254, "learning_rate": 2.332062344320088e-05, "loss": 1.4918, "step": 12283 }, { "epoch": 0.6846887018560839, "grad_norm": 0.6248939037322998, "learning_rate": 2.3313093154323246e-05, "loss": 1.8133, "step": 12284 }, { "epoch": 0.684744440109247, "grad_norm": 0.5743393301963806, "learning_rate": 2.3305563711798694e-05, "loss": 1.7663, "step": 12285 }, { "epoch": 0.6848001783624101, "grad_norm": 0.532964825630188, "learning_rate": 2.3298035115866052e-05, "loss": 1.6054, "step": 12286 }, { "epoch": 0.6848559166155732, "grad_norm": 0.587245523929596, "learning_rate": 2.3290507366764025e-05, "loss": 1.7638, "step": 12287 }, { "epoch": 0.6849116548687364, "grad_norm": 0.5927528142929077, "learning_rate": 2.3282980464731378e-05, "loss": 1.8447, "step": 12288 }, { "epoch": 0.6849673931218996, "grad_norm": 0.5583227276802063, "learning_rate": 2.3275454410006825e-05, "loss": 1.5922, "step": 12289 }, { "epoch": 0.6850231313750627, "grad_norm": 0.5567259192466736, "learning_rate": 2.326792920282906e-05, "loss": 1.5335, "step": 12290 }, { "epoch": 0.6850788696282258, "grad_norm": 0.5991070866584778, "learning_rate": 2.3260404843436685e-05, "loss": 1.7828, "step": 12291 }, { "epoch": 0.685134607881389, "grad_norm": 0.5784618854522705, "learning_rate": 2.325288133206838e-05, "loss": 1.7622, "step": 12292 }, { "epoch": 0.6851903461345521, "grad_norm": 0.6060516834259033, "learning_rate": 2.3245358668962754e-05, "loss": 1.6446, "step": 12293 }, { "epoch": 0.6852460843877153, "grad_norm": 0.5378335118293762, "learning_rate": 2.323783685435834e-05, "loss": 1.1563, "step": 12294 }, { "epoch": 0.6853018226408785, "grad_norm": 0.5524575710296631, "learning_rate": 2.323031588849371e-05, "loss": 1.4075, "step": 12295 }, { "epoch": 0.6853575608940415, "grad_norm": 0.5505098700523376, "learning_rate": 2.322279577160738e-05, "loss": 1.5879, "step": 12296 }, { "epoch": 0.6854132991472047, "grad_norm": 0.5956327319145203, "learning_rate": 2.3215276503937867e-05, "loss": 1.8357, "step": 12297 }, { "epoch": 0.6854690374003679, "grad_norm": 0.5103068351745605, "learning_rate": 2.3207758085723597e-05, "loss": 1.4444, "step": 12298 }, { "epoch": 0.685524775653531, "grad_norm": 0.5405187010765076, "learning_rate": 2.3200240517203015e-05, "loss": 1.6139, "step": 12299 }, { "epoch": 0.6855805139066942, "grad_norm": 0.5659931898117065, "learning_rate": 2.3192723798614584e-05, "loss": 1.7099, "step": 12300 }, { "epoch": 0.6856362521598574, "grad_norm": 0.553611159324646, "learning_rate": 2.318520793019664e-05, "loss": 1.602, "step": 12301 }, { "epoch": 0.6856919904130204, "grad_norm": 0.5447365045547485, "learning_rate": 2.317769291218756e-05, "loss": 1.6749, "step": 12302 }, { "epoch": 0.6857477286661836, "grad_norm": 0.5491530299186707, "learning_rate": 2.3170178744825676e-05, "loss": 1.7086, "step": 12303 }, { "epoch": 0.6858034669193468, "grad_norm": 0.5359060764312744, "learning_rate": 2.316266542834931e-05, "loss": 1.4932, "step": 12304 }, { "epoch": 0.6858592051725099, "grad_norm": 0.5571125745773315, "learning_rate": 2.3155152962996708e-05, "loss": 1.6363, "step": 12305 }, { "epoch": 0.685914943425673, "grad_norm": 0.581794023513794, "learning_rate": 2.3147641349006116e-05, "loss": 1.7378, "step": 12306 }, { "epoch": 0.6859706816788362, "grad_norm": 0.6025446057319641, "learning_rate": 2.3140130586615823e-05, "loss": 1.3766, "step": 12307 }, { "epoch": 0.6860264199319993, "grad_norm": 0.5985897183418274, "learning_rate": 2.313262067606396e-05, "loss": 1.9187, "step": 12308 }, { "epoch": 0.6860821581851625, "grad_norm": 0.6531051993370056, "learning_rate": 2.3125111617588717e-05, "loss": 1.7144, "step": 12309 }, { "epoch": 0.6861378964383256, "grad_norm": 0.5455422401428223, "learning_rate": 2.311760341142825e-05, "loss": 1.7483, "step": 12310 }, { "epoch": 0.6861936346914888, "grad_norm": 0.5412722826004028, "learning_rate": 2.3110096057820668e-05, "loss": 1.6752, "step": 12311 }, { "epoch": 0.6862493729446519, "grad_norm": 0.5647415518760681, "learning_rate": 2.310258955700408e-05, "loss": 1.5869, "step": 12312 }, { "epoch": 0.686305111197815, "grad_norm": 0.5701124668121338, "learning_rate": 2.3095083909216504e-05, "loss": 1.6082, "step": 12313 }, { "epoch": 0.6863608494509782, "grad_norm": 0.534731388092041, "learning_rate": 2.3087579114696e-05, "loss": 1.6087, "step": 12314 }, { "epoch": 0.6864165877041414, "grad_norm": 0.5065962076187134, "learning_rate": 2.3080075173680577e-05, "loss": 1.178, "step": 12315 }, { "epoch": 0.6864723259573045, "grad_norm": 0.6015399098396301, "learning_rate": 2.3072572086408233e-05, "loss": 1.6732, "step": 12316 }, { "epoch": 0.6865280642104676, "grad_norm": 0.5849504470825195, "learning_rate": 2.3065069853116872e-05, "loss": 1.6694, "step": 12317 }, { "epoch": 0.6865838024636308, "grad_norm": 0.5738614201545715, "learning_rate": 2.3057568474044473e-05, "loss": 1.6295, "step": 12318 }, { "epoch": 0.6866395407167939, "grad_norm": 0.5688014030456543, "learning_rate": 2.305006794942893e-05, "loss": 1.7854, "step": 12319 }, { "epoch": 0.6866952789699571, "grad_norm": 0.5481501817703247, "learning_rate": 2.3042568279508087e-05, "loss": 1.5614, "step": 12320 }, { "epoch": 0.6867510172231203, "grad_norm": 0.636509895324707, "learning_rate": 2.3035069464519805e-05, "loss": 1.8107, "step": 12321 }, { "epoch": 0.6868067554762833, "grad_norm": 0.5459586977958679, "learning_rate": 2.3027571504701902e-05, "loss": 1.4073, "step": 12322 }, { "epoch": 0.6868624937294465, "grad_norm": 0.5219615697860718, "learning_rate": 2.302007440029219e-05, "loss": 1.4108, "step": 12323 }, { "epoch": 0.6869182319826097, "grad_norm": 0.5494387745857239, "learning_rate": 2.3012578151528398e-05, "loss": 1.7426, "step": 12324 }, { "epoch": 0.6869739702357728, "grad_norm": 0.5496208667755127, "learning_rate": 2.3005082758648256e-05, "loss": 1.6956, "step": 12325 }, { "epoch": 0.687029708488936, "grad_norm": 0.5529760122299194, "learning_rate": 2.299758822188954e-05, "loss": 1.4693, "step": 12326 }, { "epoch": 0.6870854467420991, "grad_norm": 0.5550394654273987, "learning_rate": 2.2990094541489866e-05, "loss": 1.9276, "step": 12327 }, { "epoch": 0.6871411849952622, "grad_norm": 0.5335902571678162, "learning_rate": 2.298260171768692e-05, "loss": 1.4048, "step": 12328 }, { "epoch": 0.6871969232484254, "grad_norm": 0.6344630122184753, "learning_rate": 2.2975109750718323e-05, "loss": 1.8137, "step": 12329 }, { "epoch": 0.6872526615015886, "grad_norm": 0.5269440412521362, "learning_rate": 2.2967618640821698e-05, "loss": 1.5533, "step": 12330 }, { "epoch": 0.6873083997547517, "grad_norm": 0.649878978729248, "learning_rate": 2.296012838823458e-05, "loss": 1.8408, "step": 12331 }, { "epoch": 0.6873641380079148, "grad_norm": 0.5460097789764404, "learning_rate": 2.2952638993194515e-05, "loss": 1.6803, "step": 12332 }, { "epoch": 0.6874198762610779, "grad_norm": 0.5708609223365784, "learning_rate": 2.2945150455939084e-05, "loss": 1.5447, "step": 12333 }, { "epoch": 0.6874756145142411, "grad_norm": 0.5807245373725891, "learning_rate": 2.2937662776705728e-05, "loss": 1.7161, "step": 12334 }, { "epoch": 0.6875313527674043, "grad_norm": 0.5651794075965881, "learning_rate": 2.2930175955731914e-05, "loss": 1.5177, "step": 12335 }, { "epoch": 0.6875870910205674, "grad_norm": 0.6334015727043152, "learning_rate": 2.2922689993255093e-05, "loss": 1.9024, "step": 12336 }, { "epoch": 0.6876428292737305, "grad_norm": 0.593908965587616, "learning_rate": 2.2915204889512678e-05, "loss": 1.7149, "step": 12337 }, { "epoch": 0.6876985675268937, "grad_norm": 0.5945553183555603, "learning_rate": 2.2907720644742064e-05, "loss": 1.8041, "step": 12338 }, { "epoch": 0.6877543057800568, "grad_norm": 0.5217798948287964, "learning_rate": 2.2900237259180575e-05, "loss": 1.5429, "step": 12339 }, { "epoch": 0.68781004403322, "grad_norm": 0.5226582288742065, "learning_rate": 2.2892754733065558e-05, "loss": 1.6162, "step": 12340 }, { "epoch": 0.6878657822863832, "grad_norm": 0.5856578946113586, "learning_rate": 2.2885273066634312e-05, "loss": 1.5418, "step": 12341 }, { "epoch": 0.6879215205395462, "grad_norm": 0.5848087668418884, "learning_rate": 2.287779226012413e-05, "loss": 1.5795, "step": 12342 }, { "epoch": 0.6879772587927094, "grad_norm": 0.5924365520477295, "learning_rate": 2.287031231377221e-05, "loss": 1.5287, "step": 12343 }, { "epoch": 0.6880329970458726, "grad_norm": 0.5729358792304993, "learning_rate": 2.2862833227815834e-05, "loss": 1.8508, "step": 12344 }, { "epoch": 0.6880887352990357, "grad_norm": 0.5354797840118408, "learning_rate": 2.2855355002492173e-05, "loss": 1.8116, "step": 12345 }, { "epoch": 0.6881444735521989, "grad_norm": 0.5971417427062988, "learning_rate": 2.2847877638038378e-05, "loss": 1.911, "step": 12346 }, { "epoch": 0.6882002118053621, "grad_norm": 0.6175577044487, "learning_rate": 2.2840401134691593e-05, "loss": 1.9441, "step": 12347 }, { "epoch": 0.6882559500585251, "grad_norm": 0.5977439880371094, "learning_rate": 2.283292549268893e-05, "loss": 1.6979, "step": 12348 }, { "epoch": 0.6883116883116883, "grad_norm": 0.5494217872619629, "learning_rate": 2.2825450712267495e-05, "loss": 1.4911, "step": 12349 }, { "epoch": 0.6883674265648515, "grad_norm": 0.5619058609008789, "learning_rate": 2.2817976793664286e-05, "loss": 1.7359, "step": 12350 }, { "epoch": 0.6884231648180146, "grad_norm": 0.6751987338066101, "learning_rate": 2.2810503737116395e-05, "loss": 1.5864, "step": 12351 }, { "epoch": 0.6884789030711778, "grad_norm": 0.5647567510604858, "learning_rate": 2.2803031542860814e-05, "loss": 1.6317, "step": 12352 }, { "epoch": 0.688534641324341, "grad_norm": 0.5837883353233337, "learning_rate": 2.2795560211134488e-05, "loss": 1.7449, "step": 12353 }, { "epoch": 0.688590379577504, "grad_norm": 0.5777410864830017, "learning_rate": 2.2788089742174374e-05, "loss": 1.8073, "step": 12354 }, { "epoch": 0.6886461178306672, "grad_norm": 0.5158605575561523, "learning_rate": 2.27806201362174e-05, "loss": 1.4452, "step": 12355 }, { "epoch": 0.6887018560838304, "grad_norm": 0.5918664336204529, "learning_rate": 2.2773151393500475e-05, "loss": 1.5739, "step": 12356 }, { "epoch": 0.6887575943369935, "grad_norm": 0.5540437698364258, "learning_rate": 2.2765683514260426e-05, "loss": 1.6514, "step": 12357 }, { "epoch": 0.6888133325901566, "grad_norm": 0.556175708770752, "learning_rate": 2.2758216498734086e-05, "loss": 1.7062, "step": 12358 }, { "epoch": 0.6888690708433197, "grad_norm": 0.5426061153411865, "learning_rate": 2.275075034715833e-05, "loss": 1.731, "step": 12359 }, { "epoch": 0.6889248090964829, "grad_norm": 0.5565474033355713, "learning_rate": 2.2743285059769876e-05, "loss": 1.445, "step": 12360 }, { "epoch": 0.6889805473496461, "grad_norm": 0.5506940484046936, "learning_rate": 2.2735820636805504e-05, "loss": 1.6502, "step": 12361 }, { "epoch": 0.6890362856028092, "grad_norm": 0.563405454158783, "learning_rate": 2.2728357078501943e-05, "loss": 1.728, "step": 12362 }, { "epoch": 0.6890920238559723, "grad_norm": 0.5887188911437988, "learning_rate": 2.2720894385095887e-05, "loss": 1.7767, "step": 12363 }, { "epoch": 0.6891477621091355, "grad_norm": 0.5309818983078003, "learning_rate": 2.2713432556824033e-05, "loss": 1.4704, "step": 12364 }, { "epoch": 0.6892035003622986, "grad_norm": 0.5519055128097534, "learning_rate": 2.2705971593922985e-05, "loss": 1.5619, "step": 12365 }, { "epoch": 0.6892592386154618, "grad_norm": 0.6391844749450684, "learning_rate": 2.269851149662939e-05, "loss": 1.6688, "step": 12366 }, { "epoch": 0.689314976868625, "grad_norm": 0.5843481421470642, "learning_rate": 2.269105226517983e-05, "loss": 1.73, "step": 12367 }, { "epoch": 0.689370715121788, "grad_norm": 0.5808287262916565, "learning_rate": 2.2683593899810884e-05, "loss": 1.713, "step": 12368 }, { "epoch": 0.6894264533749512, "grad_norm": 0.5973604321479797, "learning_rate": 2.2676136400759036e-05, "loss": 1.746, "step": 12369 }, { "epoch": 0.6894821916281144, "grad_norm": 0.5638074278831482, "learning_rate": 2.2668679768260853e-05, "loss": 1.4742, "step": 12370 }, { "epoch": 0.6895379298812775, "grad_norm": 0.5505542755126953, "learning_rate": 2.2661224002552816e-05, "loss": 1.5458, "step": 12371 }, { "epoch": 0.6895936681344407, "grad_norm": 0.5930557250976562, "learning_rate": 2.2653769103871337e-05, "loss": 1.723, "step": 12372 }, { "epoch": 0.6896494063876039, "grad_norm": 0.5452224016189575, "learning_rate": 2.2646315072452862e-05, "loss": 1.5082, "step": 12373 }, { "epoch": 0.6897051446407669, "grad_norm": 0.574612557888031, "learning_rate": 2.2638861908533788e-05, "loss": 1.8428, "step": 12374 }, { "epoch": 0.6897608828939301, "grad_norm": 0.5554018616676331, "learning_rate": 2.2631409612350512e-05, "loss": 1.6155, "step": 12375 }, { "epoch": 0.6898166211470933, "grad_norm": 0.5662262439727783, "learning_rate": 2.262395818413931e-05, "loss": 1.5198, "step": 12376 }, { "epoch": 0.6898723594002564, "grad_norm": 0.6049961447715759, "learning_rate": 2.261650762413656e-05, "loss": 1.7388, "step": 12377 }, { "epoch": 0.6899280976534196, "grad_norm": 0.5534675717353821, "learning_rate": 2.2609057932578554e-05, "loss": 1.4587, "step": 12378 }, { "epoch": 0.6899838359065827, "grad_norm": 0.5847275257110596, "learning_rate": 2.260160910970151e-05, "loss": 1.4442, "step": 12379 }, { "epoch": 0.6900395741597458, "grad_norm": 0.551920473575592, "learning_rate": 2.2594161155741683e-05, "loss": 1.613, "step": 12380 }, { "epoch": 0.690095312412909, "grad_norm": 0.5585432052612305, "learning_rate": 2.2586714070935272e-05, "loss": 1.6245, "step": 12381 }, { "epoch": 0.6901510506660721, "grad_norm": 0.5355674624443054, "learning_rate": 2.257926785551848e-05, "loss": 1.5427, "step": 12382 }, { "epoch": 0.6902067889192353, "grad_norm": 0.5962349772453308, "learning_rate": 2.2571822509727426e-05, "loss": 1.6541, "step": 12383 }, { "epoch": 0.6902625271723984, "grad_norm": 0.5941932201385498, "learning_rate": 2.2564378033798217e-05, "loss": 1.6218, "step": 12384 }, { "epoch": 0.6903182654255615, "grad_norm": 0.5653877258300781, "learning_rate": 2.255693442796702e-05, "loss": 1.7158, "step": 12385 }, { "epoch": 0.6903740036787247, "grad_norm": 0.5377355813980103, "learning_rate": 2.254949169246983e-05, "loss": 1.5469, "step": 12386 }, { "epoch": 0.6904297419318879, "grad_norm": 0.5659373998641968, "learning_rate": 2.254204982754273e-05, "loss": 1.6359, "step": 12387 }, { "epoch": 0.690485480185051, "grad_norm": 0.5379758477210999, "learning_rate": 2.2534608833421712e-05, "loss": 1.5418, "step": 12388 }, { "epoch": 0.6905412184382141, "grad_norm": 0.6035560965538025, "learning_rate": 2.252716871034276e-05, "loss": 1.7552, "step": 12389 }, { "epoch": 0.6905969566913773, "grad_norm": 0.5235888957977295, "learning_rate": 2.2519729458541865e-05, "loss": 1.4821, "step": 12390 }, { "epoch": 0.6906526949445404, "grad_norm": 0.5545063018798828, "learning_rate": 2.2512291078254914e-05, "loss": 1.497, "step": 12391 }, { "epoch": 0.6907084331977036, "grad_norm": 0.5761866569519043, "learning_rate": 2.250485356971782e-05, "loss": 1.7174, "step": 12392 }, { "epoch": 0.6907641714508668, "grad_norm": 0.6089950799942017, "learning_rate": 2.249741693316647e-05, "loss": 1.8879, "step": 12393 }, { "epoch": 0.6908199097040298, "grad_norm": 0.5526731014251709, "learning_rate": 2.2489981168836717e-05, "loss": 1.7202, "step": 12394 }, { "epoch": 0.690875647957193, "grad_norm": 0.5309497117996216, "learning_rate": 2.2482546276964327e-05, "loss": 1.6372, "step": 12395 }, { "epoch": 0.6909313862103562, "grad_norm": 0.5908359885215759, "learning_rate": 2.2475112257785157e-05, "loss": 1.6641, "step": 12396 }, { "epoch": 0.6909871244635193, "grad_norm": 0.5748770833015442, "learning_rate": 2.246767911153496e-05, "loss": 1.6881, "step": 12397 }, { "epoch": 0.6910428627166825, "grad_norm": 0.543129026889801, "learning_rate": 2.246024683844944e-05, "loss": 1.5398, "step": 12398 }, { "epoch": 0.6910986009698457, "grad_norm": 0.5681257843971252, "learning_rate": 2.2452815438764318e-05, "loss": 1.814, "step": 12399 }, { "epoch": 0.6911543392230087, "grad_norm": 0.5826138854026794, "learning_rate": 2.2445384912715284e-05, "loss": 1.8071, "step": 12400 }, { "epoch": 0.6912100774761719, "grad_norm": 0.5727596879005432, "learning_rate": 2.2437955260538e-05, "loss": 1.6608, "step": 12401 }, { "epoch": 0.6912658157293351, "grad_norm": 0.6145783066749573, "learning_rate": 2.2430526482468045e-05, "loss": 1.862, "step": 12402 }, { "epoch": 0.6913215539824982, "grad_norm": 0.5567439794540405, "learning_rate": 2.2423098578741065e-05, "loss": 1.4627, "step": 12403 }, { "epoch": 0.6913772922356614, "grad_norm": 0.5916569828987122, "learning_rate": 2.2415671549592632e-05, "loss": 1.7114, "step": 12404 }, { "epoch": 0.6914330304888244, "grad_norm": 0.6020697951316833, "learning_rate": 2.2408245395258248e-05, "loss": 1.6241, "step": 12405 }, { "epoch": 0.6914887687419876, "grad_norm": 0.5648030042648315, "learning_rate": 2.2400820115973453e-05, "loss": 1.6638, "step": 12406 }, { "epoch": 0.6915445069951508, "grad_norm": 0.5384600758552551, "learning_rate": 2.239339571197373e-05, "loss": 1.4791, "step": 12407 }, { "epoch": 0.6916002452483139, "grad_norm": 0.5968599915504456, "learning_rate": 2.2385972183494552e-05, "loss": 1.7044, "step": 12408 }, { "epoch": 0.6916559835014771, "grad_norm": 0.5620965361595154, "learning_rate": 2.237854953077132e-05, "loss": 1.5771, "step": 12409 }, { "epoch": 0.6917117217546402, "grad_norm": 0.5662969946861267, "learning_rate": 2.2371127754039433e-05, "loss": 1.523, "step": 12410 }, { "epoch": 0.6917674600078033, "grad_norm": 0.5784090161323547, "learning_rate": 2.236370685353433e-05, "loss": 1.7749, "step": 12411 }, { "epoch": 0.6918231982609665, "grad_norm": 0.5956070423126221, "learning_rate": 2.2356286829491284e-05, "loss": 1.7331, "step": 12412 }, { "epoch": 0.6918789365141297, "grad_norm": 0.5325424075126648, "learning_rate": 2.2348867682145653e-05, "loss": 1.299, "step": 12413 }, { "epoch": 0.6919346747672928, "grad_norm": 0.6309555768966675, "learning_rate": 2.2341449411732724e-05, "loss": 1.7712, "step": 12414 }, { "epoch": 0.6919904130204559, "grad_norm": 0.5682843327522278, "learning_rate": 2.2334032018487772e-05, "loss": 1.8, "step": 12415 }, { "epoch": 0.6920461512736191, "grad_norm": 0.5643319487571716, "learning_rate": 2.2326615502646002e-05, "loss": 1.7482, "step": 12416 }, { "epoch": 0.6921018895267822, "grad_norm": 0.6528568267822266, "learning_rate": 2.2319199864442648e-05, "loss": 1.867, "step": 12417 }, { "epoch": 0.6921576277799454, "grad_norm": 0.5698180198669434, "learning_rate": 2.2311785104112876e-05, "loss": 1.627, "step": 12418 }, { "epoch": 0.6922133660331086, "grad_norm": 0.5689524412155151, "learning_rate": 2.230437122189185e-05, "loss": 1.5884, "step": 12419 }, { "epoch": 0.6922691042862716, "grad_norm": 0.5394712686538696, "learning_rate": 2.2296958218014712e-05, "loss": 1.5532, "step": 12420 }, { "epoch": 0.6923248425394348, "grad_norm": 0.5593292713165283, "learning_rate": 2.22895460927165e-05, "loss": 1.5991, "step": 12421 }, { "epoch": 0.692380580792598, "grad_norm": 0.5734406113624573, "learning_rate": 2.2282134846232343e-05, "loss": 1.6068, "step": 12422 }, { "epoch": 0.6924363190457611, "grad_norm": 0.5365568399429321, "learning_rate": 2.227472447879728e-05, "loss": 1.4699, "step": 12423 }, { "epoch": 0.6924920572989243, "grad_norm": 0.5808716416358948, "learning_rate": 2.2267314990646294e-05, "loss": 1.7881, "step": 12424 }, { "epoch": 0.6925477955520875, "grad_norm": 0.5327333807945251, "learning_rate": 2.225990638201438e-05, "loss": 1.4128, "step": 12425 }, { "epoch": 0.6926035338052505, "grad_norm": 0.6283466219902039, "learning_rate": 2.2252498653136493e-05, "loss": 1.6069, "step": 12426 }, { "epoch": 0.6926592720584137, "grad_norm": 0.5366610884666443, "learning_rate": 2.22450918042476e-05, "loss": 1.6399, "step": 12427 }, { "epoch": 0.6927150103115768, "grad_norm": 0.548111081123352, "learning_rate": 2.2237685835582527e-05, "loss": 1.5405, "step": 12428 }, { "epoch": 0.69277074856474, "grad_norm": 0.5569949150085449, "learning_rate": 2.2230280747376216e-05, "loss": 1.7644, "step": 12429 }, { "epoch": 0.6928264868179032, "grad_norm": 0.5301342010498047, "learning_rate": 2.2222876539863508e-05, "loss": 1.5258, "step": 12430 }, { "epoch": 0.6928822250710662, "grad_norm": 0.5644490122795105, "learning_rate": 2.221547321327918e-05, "loss": 1.8579, "step": 12431 }, { "epoch": 0.6929379633242294, "grad_norm": 0.529535710811615, "learning_rate": 2.2208070767858057e-05, "loss": 1.5001, "step": 12432 }, { "epoch": 0.6929937015773926, "grad_norm": 0.5771368145942688, "learning_rate": 2.2200669203834885e-05, "loss": 1.6712, "step": 12433 }, { "epoch": 0.6930494398305557, "grad_norm": 0.5676137208938599, "learning_rate": 2.2193268521444428e-05, "loss": 1.631, "step": 12434 }, { "epoch": 0.6931051780837189, "grad_norm": 0.6103230714797974, "learning_rate": 2.2185868720921342e-05, "loss": 1.7087, "step": 12435 }, { "epoch": 0.693160916336882, "grad_norm": 0.6129918694496155, "learning_rate": 2.217846980250032e-05, "loss": 1.7335, "step": 12436 }, { "epoch": 0.6932166545900451, "grad_norm": 0.6118063926696777, "learning_rate": 2.2171071766416064e-05, "loss": 1.8468, "step": 12437 }, { "epoch": 0.6932723928432083, "grad_norm": 0.5562924146652222, "learning_rate": 2.2163674612903135e-05, "loss": 1.5575, "step": 12438 }, { "epoch": 0.6933281310963715, "grad_norm": 0.6027613282203674, "learning_rate": 2.215627834219615e-05, "loss": 1.7773, "step": 12439 }, { "epoch": 0.6933838693495346, "grad_norm": 0.5739839673042297, "learning_rate": 2.2148882954529676e-05, "loss": 1.7082, "step": 12440 }, { "epoch": 0.6934396076026977, "grad_norm": 0.5857069492340088, "learning_rate": 2.2141488450138277e-05, "loss": 1.7633, "step": 12441 }, { "epoch": 0.6934953458558609, "grad_norm": 0.5265825986862183, "learning_rate": 2.2134094829256408e-05, "loss": 1.4418, "step": 12442 }, { "epoch": 0.693551084109024, "grad_norm": 0.5409064888954163, "learning_rate": 2.2126702092118572e-05, "loss": 1.6666, "step": 12443 }, { "epoch": 0.6936068223621872, "grad_norm": 0.5915992259979248, "learning_rate": 2.2119310238959268e-05, "loss": 1.8063, "step": 12444 }, { "epoch": 0.6936625606153504, "grad_norm": 0.5871009826660156, "learning_rate": 2.2111919270012866e-05, "loss": 1.8061, "step": 12445 }, { "epoch": 0.6937182988685134, "grad_norm": 0.5730018019676208, "learning_rate": 2.2104529185513806e-05, "loss": 1.7359, "step": 12446 }, { "epoch": 0.6937740371216766, "grad_norm": 0.5645169019699097, "learning_rate": 2.2097139985696404e-05, "loss": 1.7572, "step": 12447 }, { "epoch": 0.6938297753748398, "grad_norm": 0.5949046015739441, "learning_rate": 2.208975167079505e-05, "loss": 1.8654, "step": 12448 }, { "epoch": 0.6938855136280029, "grad_norm": 0.5888786911964417, "learning_rate": 2.2082364241044068e-05, "loss": 1.625, "step": 12449 }, { "epoch": 0.6939412518811661, "grad_norm": 0.5714291930198669, "learning_rate": 2.2074977696677703e-05, "loss": 1.685, "step": 12450 }, { "epoch": 0.6939969901343291, "grad_norm": 0.5251734256744385, "learning_rate": 2.2067592037930224e-05, "loss": 1.6458, "step": 12451 }, { "epoch": 0.6940527283874923, "grad_norm": 0.5464848279953003, "learning_rate": 2.2060207265035876e-05, "loss": 1.6008, "step": 12452 }, { "epoch": 0.6941084666406555, "grad_norm": 0.5456926226615906, "learning_rate": 2.205282337822887e-05, "loss": 1.4996, "step": 12453 }, { "epoch": 0.6941642048938186, "grad_norm": 0.5967133641242981, "learning_rate": 2.2045440377743325e-05, "loss": 1.8717, "step": 12454 }, { "epoch": 0.6942199431469818, "grad_norm": 0.5450711846351624, "learning_rate": 2.2038058263813443e-05, "loss": 1.7107, "step": 12455 }, { "epoch": 0.694275681400145, "grad_norm": 0.5266870856285095, "learning_rate": 2.203067703667334e-05, "loss": 1.4656, "step": 12456 }, { "epoch": 0.694331419653308, "grad_norm": 0.6569809317588806, "learning_rate": 2.202329669655708e-05, "loss": 1.8354, "step": 12457 }, { "epoch": 0.6943871579064712, "grad_norm": 0.5468927621841431, "learning_rate": 2.2015917243698725e-05, "loss": 1.6724, "step": 12458 }, { "epoch": 0.6944428961596344, "grad_norm": 0.6082443594932556, "learning_rate": 2.2008538678332314e-05, "loss": 1.7463, "step": 12459 }, { "epoch": 0.6944986344127975, "grad_norm": 0.5773779153823853, "learning_rate": 2.200116100069188e-05, "loss": 1.743, "step": 12460 }, { "epoch": 0.6945543726659607, "grad_norm": 0.5488123297691345, "learning_rate": 2.1993784211011353e-05, "loss": 1.6518, "step": 12461 }, { "epoch": 0.6946101109191238, "grad_norm": 0.5357816219329834, "learning_rate": 2.1986408309524682e-05, "loss": 1.4703, "step": 12462 }, { "epoch": 0.6946658491722869, "grad_norm": 0.5495067238807678, "learning_rate": 2.197903329646585e-05, "loss": 1.6126, "step": 12463 }, { "epoch": 0.6947215874254501, "grad_norm": 0.5771341323852539, "learning_rate": 2.1971659172068688e-05, "loss": 1.8363, "step": 12464 }, { "epoch": 0.6947773256786133, "grad_norm": 0.5454638004302979, "learning_rate": 2.196428593656708e-05, "loss": 1.3474, "step": 12465 }, { "epoch": 0.6948330639317764, "grad_norm": 0.6014922857284546, "learning_rate": 2.1956913590194867e-05, "loss": 1.7261, "step": 12466 }, { "epoch": 0.6948888021849395, "grad_norm": 0.5554134249687195, "learning_rate": 2.1949542133185864e-05, "loss": 1.6184, "step": 12467 }, { "epoch": 0.6949445404381027, "grad_norm": 0.6078512072563171, "learning_rate": 2.194217156577383e-05, "loss": 1.6595, "step": 12468 }, { "epoch": 0.6950002786912658, "grad_norm": 0.5782285928726196, "learning_rate": 2.1934801888192496e-05, "loss": 1.4192, "step": 12469 }, { "epoch": 0.695056016944429, "grad_norm": 0.6169813275337219, "learning_rate": 2.1927433100675652e-05, "loss": 1.9271, "step": 12470 }, { "epoch": 0.6951117551975922, "grad_norm": 0.5804049968719482, "learning_rate": 2.1920065203456946e-05, "loss": 1.8332, "step": 12471 }, { "epoch": 0.6951674934507552, "grad_norm": 0.6012964248657227, "learning_rate": 2.191269819677007e-05, "loss": 1.8357, "step": 12472 }, { "epoch": 0.6952232317039184, "grad_norm": 0.6622440814971924, "learning_rate": 2.1905332080848606e-05, "loss": 1.9264, "step": 12473 }, { "epoch": 0.6952789699570815, "grad_norm": 0.544611394405365, "learning_rate": 2.1897966855926227e-05, "loss": 1.7122, "step": 12474 }, { "epoch": 0.6953347082102447, "grad_norm": 0.5682854652404785, "learning_rate": 2.189060252223651e-05, "loss": 1.5732, "step": 12475 }, { "epoch": 0.6953904464634079, "grad_norm": 0.5614532232284546, "learning_rate": 2.1883239080012973e-05, "loss": 1.6042, "step": 12476 }, { "epoch": 0.6954461847165709, "grad_norm": 0.55224609375, "learning_rate": 2.1875876529489165e-05, "loss": 1.5583, "step": 12477 }, { "epoch": 0.6955019229697341, "grad_norm": 0.5405192971229553, "learning_rate": 2.1868514870898572e-05, "loss": 1.6155, "step": 12478 }, { "epoch": 0.6955576612228973, "grad_norm": 0.5644908547401428, "learning_rate": 2.186115410447469e-05, "loss": 1.506, "step": 12479 }, { "epoch": 0.6956133994760604, "grad_norm": 0.5841819047927856, "learning_rate": 2.1853794230450903e-05, "loss": 1.5715, "step": 12480 }, { "epoch": 0.6956691377292236, "grad_norm": 0.5464922785758972, "learning_rate": 2.1846435249060677e-05, "loss": 1.6175, "step": 12481 }, { "epoch": 0.6957248759823867, "grad_norm": 0.5380191802978516, "learning_rate": 2.18390771605374e-05, "loss": 1.4722, "step": 12482 }, { "epoch": 0.6957806142355498, "grad_norm": 0.6160181760787964, "learning_rate": 2.1831719965114383e-05, "loss": 1.6391, "step": 12483 }, { "epoch": 0.695836352488713, "grad_norm": 0.551240861415863, "learning_rate": 2.1824363663024976e-05, "loss": 1.6116, "step": 12484 }, { "epoch": 0.6958920907418762, "grad_norm": 0.555523157119751, "learning_rate": 2.181700825450248e-05, "loss": 1.7712, "step": 12485 }, { "epoch": 0.6959478289950393, "grad_norm": 0.5367977619171143, "learning_rate": 2.1809653739780182e-05, "loss": 1.5029, "step": 12486 }, { "epoch": 0.6960035672482024, "grad_norm": 0.5227271914482117, "learning_rate": 2.180230011909129e-05, "loss": 1.5279, "step": 12487 }, { "epoch": 0.6960593055013656, "grad_norm": 0.5195460915565491, "learning_rate": 2.1794947392669013e-05, "loss": 1.4994, "step": 12488 }, { "epoch": 0.6961150437545287, "grad_norm": 0.6149149537086487, "learning_rate": 2.1787595560746593e-05, "loss": 1.7903, "step": 12489 }, { "epoch": 0.6961707820076919, "grad_norm": 0.587485671043396, "learning_rate": 2.178024462355714e-05, "loss": 1.5443, "step": 12490 }, { "epoch": 0.6962265202608551, "grad_norm": 0.550566554069519, "learning_rate": 2.1772894581333792e-05, "loss": 1.5959, "step": 12491 }, { "epoch": 0.6962822585140181, "grad_norm": 0.5332329869270325, "learning_rate": 2.176554543430965e-05, "loss": 1.4327, "step": 12492 }, { "epoch": 0.6963379967671813, "grad_norm": 0.5670337677001953, "learning_rate": 2.175819718271781e-05, "loss": 1.7718, "step": 12493 }, { "epoch": 0.6963937350203445, "grad_norm": 0.5738561153411865, "learning_rate": 2.1750849826791275e-05, "loss": 1.5627, "step": 12494 }, { "epoch": 0.6964494732735076, "grad_norm": 0.5175594687461853, "learning_rate": 2.1743503366763058e-05, "loss": 1.6201, "step": 12495 }, { "epoch": 0.6965052115266708, "grad_norm": 0.5937666296958923, "learning_rate": 2.173615780286621e-05, "loss": 1.6484, "step": 12496 }, { "epoch": 0.6965609497798338, "grad_norm": 0.5607738494873047, "learning_rate": 2.172881313533362e-05, "loss": 1.5867, "step": 12497 }, { "epoch": 0.696616688032997, "grad_norm": 0.5806588530540466, "learning_rate": 2.1721469364398274e-05, "loss": 1.6964, "step": 12498 }, { "epoch": 0.6966724262861602, "grad_norm": 0.5884150862693787, "learning_rate": 2.1714126490292998e-05, "loss": 1.4694, "step": 12499 }, { "epoch": 0.6967281645393233, "grad_norm": 0.5738844275474548, "learning_rate": 2.1706784513250734e-05, "loss": 1.679, "step": 12500 }, { "epoch": 0.6967839027924865, "grad_norm": 0.5930630564689636, "learning_rate": 2.1699443433504326e-05, "loss": 1.8925, "step": 12501 }, { "epoch": 0.6968396410456497, "grad_norm": 0.5870788097381592, "learning_rate": 2.1692103251286544e-05, "loss": 1.5665, "step": 12502 }, { "epoch": 0.6968953792988127, "grad_norm": 0.5544155836105347, "learning_rate": 2.1684763966830208e-05, "loss": 1.5741, "step": 12503 }, { "epoch": 0.6969511175519759, "grad_norm": 0.5461851358413696, "learning_rate": 2.167742558036806e-05, "loss": 1.6109, "step": 12504 }, { "epoch": 0.6970068558051391, "grad_norm": 0.5209200382232666, "learning_rate": 2.1670088092132866e-05, "loss": 1.5966, "step": 12505 }, { "epoch": 0.6970625940583022, "grad_norm": 0.5700559020042419, "learning_rate": 2.1662751502357265e-05, "loss": 1.7803, "step": 12506 }, { "epoch": 0.6971183323114654, "grad_norm": 0.5122175216674805, "learning_rate": 2.1655415811273988e-05, "loss": 1.366, "step": 12507 }, { "epoch": 0.6971740705646285, "grad_norm": 0.5897361636161804, "learning_rate": 2.1648081019115675e-05, "loss": 1.7152, "step": 12508 }, { "epoch": 0.6972298088177916, "grad_norm": 0.5518897771835327, "learning_rate": 2.1640747126114915e-05, "loss": 1.6061, "step": 12509 }, { "epoch": 0.6972855470709548, "grad_norm": 0.5426011085510254, "learning_rate": 2.163341413250431e-05, "loss": 1.455, "step": 12510 }, { "epoch": 0.697341285324118, "grad_norm": 0.5575090646743774, "learning_rate": 2.1626082038516415e-05, "loss": 1.554, "step": 12511 }, { "epoch": 0.6973970235772811, "grad_norm": 0.5110504627227783, "learning_rate": 2.161875084438379e-05, "loss": 1.4238, "step": 12512 }, { "epoch": 0.6974527618304442, "grad_norm": 0.5228980779647827, "learning_rate": 2.1611420550338894e-05, "loss": 1.6579, "step": 12513 }, { "epoch": 0.6975085000836074, "grad_norm": 0.5784720778465271, "learning_rate": 2.1604091156614204e-05, "loss": 1.7723, "step": 12514 }, { "epoch": 0.6975642383367705, "grad_norm": 0.546317994594574, "learning_rate": 2.1596762663442218e-05, "loss": 1.5309, "step": 12515 }, { "epoch": 0.6976199765899337, "grad_norm": 0.5592935085296631, "learning_rate": 2.1589435071055296e-05, "loss": 1.6055, "step": 12516 }, { "epoch": 0.6976757148430969, "grad_norm": 0.5744695067405701, "learning_rate": 2.1582108379685856e-05, "loss": 1.8028, "step": 12517 }, { "epoch": 0.69773145309626, "grad_norm": 0.5620167255401611, "learning_rate": 2.1574782589566244e-05, "loss": 1.6126, "step": 12518 }, { "epoch": 0.6977871913494231, "grad_norm": 0.5813114047050476, "learning_rate": 2.1567457700928822e-05, "loss": 1.6897, "step": 12519 }, { "epoch": 0.6978429296025862, "grad_norm": 0.5728158950805664, "learning_rate": 2.1560133714005848e-05, "loss": 1.5911, "step": 12520 }, { "epoch": 0.6978986678557494, "grad_norm": 0.5162991881370544, "learning_rate": 2.1552810629029596e-05, "loss": 1.7061, "step": 12521 }, { "epoch": 0.6979544061089126, "grad_norm": 0.5759060978889465, "learning_rate": 2.154548844623237e-05, "loss": 1.5237, "step": 12522 }, { "epoch": 0.6980101443620756, "grad_norm": 0.5483187437057495, "learning_rate": 2.1538167165846333e-05, "loss": 1.7261, "step": 12523 }, { "epoch": 0.6980658826152388, "grad_norm": 0.56321120262146, "learning_rate": 2.1530846788103686e-05, "loss": 1.7511, "step": 12524 }, { "epoch": 0.698121620868402, "grad_norm": 0.5477744936943054, "learning_rate": 2.1523527313236598e-05, "loss": 1.6178, "step": 12525 }, { "epoch": 0.6981773591215651, "grad_norm": 0.5206699371337891, "learning_rate": 2.1516208741477207e-05, "loss": 1.436, "step": 12526 }, { "epoch": 0.6982330973747283, "grad_norm": 0.5443151593208313, "learning_rate": 2.1508891073057587e-05, "loss": 1.4729, "step": 12527 }, { "epoch": 0.6982888356278915, "grad_norm": 0.6137494444847107, "learning_rate": 2.1501574308209828e-05, "loss": 1.9092, "step": 12528 }, { "epoch": 0.6983445738810545, "grad_norm": 0.6065635085105896, "learning_rate": 2.1494258447165973e-05, "loss": 1.6061, "step": 12529 }, { "epoch": 0.6984003121342177, "grad_norm": 0.6359501481056213, "learning_rate": 2.1486943490158034e-05, "loss": 1.828, "step": 12530 }, { "epoch": 0.6984560503873809, "grad_norm": 0.5409738421440125, "learning_rate": 2.1479629437418032e-05, "loss": 1.8147, "step": 12531 }, { "epoch": 0.698511788640544, "grad_norm": 0.5747645497322083, "learning_rate": 2.1472316289177856e-05, "loss": 1.6662, "step": 12532 }, { "epoch": 0.6985675268937072, "grad_norm": 0.5804151892662048, "learning_rate": 2.1465004045669505e-05, "loss": 1.5294, "step": 12533 }, { "epoch": 0.6986232651468703, "grad_norm": 0.574507474899292, "learning_rate": 2.145769270712487e-05, "loss": 1.7331, "step": 12534 }, { "epoch": 0.6986790034000334, "grad_norm": 0.5345951318740845, "learning_rate": 2.1450382273775788e-05, "loss": 1.5622, "step": 12535 }, { "epoch": 0.6987347416531966, "grad_norm": 0.6589462161064148, "learning_rate": 2.144307274585413e-05, "loss": 1.418, "step": 12536 }, { "epoch": 0.6987904799063598, "grad_norm": 0.5831825137138367, "learning_rate": 2.14357641235917e-05, "loss": 1.5661, "step": 12537 }, { "epoch": 0.6988462181595229, "grad_norm": 0.5969269275665283, "learning_rate": 2.1428456407220315e-05, "loss": 1.8971, "step": 12538 }, { "epoch": 0.698901956412686, "grad_norm": 0.5822701454162598, "learning_rate": 2.142114959697169e-05, "loss": 1.5624, "step": 12539 }, { "epoch": 0.6989576946658492, "grad_norm": 0.5579544901847839, "learning_rate": 2.1413843693077552e-05, "loss": 1.7243, "step": 12540 }, { "epoch": 0.6990134329190123, "grad_norm": 0.5481868982315063, "learning_rate": 2.140653869576966e-05, "loss": 1.5812, "step": 12541 }, { "epoch": 0.6990691711721755, "grad_norm": 0.5613032579421997, "learning_rate": 2.1399234605279634e-05, "loss": 1.6288, "step": 12542 }, { "epoch": 0.6991249094253386, "grad_norm": 0.5468133091926575, "learning_rate": 2.1391931421839127e-05, "loss": 1.6193, "step": 12543 }, { "epoch": 0.6991806476785017, "grad_norm": 0.5905917882919312, "learning_rate": 2.1384629145679765e-05, "loss": 1.8533, "step": 12544 }, { "epoch": 0.6992363859316649, "grad_norm": 0.5613247752189636, "learning_rate": 2.137732777703314e-05, "loss": 1.8032, "step": 12545 }, { "epoch": 0.699292124184828, "grad_norm": 0.5663119554519653, "learning_rate": 2.137002731613078e-05, "loss": 1.8345, "step": 12546 }, { "epoch": 0.6993478624379912, "grad_norm": 0.5337582230567932, "learning_rate": 2.1362727763204216e-05, "loss": 1.6405, "step": 12547 }, { "epoch": 0.6994036006911544, "grad_norm": 0.5438380241394043, "learning_rate": 2.1355429118484986e-05, "loss": 1.4639, "step": 12548 }, { "epoch": 0.6994593389443174, "grad_norm": 0.5389162302017212, "learning_rate": 2.1348131382204527e-05, "loss": 1.6657, "step": 12549 }, { "epoch": 0.6995150771974806, "grad_norm": 0.574306845664978, "learning_rate": 2.1340834554594287e-05, "loss": 1.7891, "step": 12550 }, { "epoch": 0.6995708154506438, "grad_norm": 0.653531014919281, "learning_rate": 2.1333538635885674e-05, "loss": 1.9755, "step": 12551 }, { "epoch": 0.6996265537038069, "grad_norm": 0.54327791929245, "learning_rate": 2.13262436263101e-05, "loss": 1.4889, "step": 12552 }, { "epoch": 0.6996822919569701, "grad_norm": 0.5144495964050293, "learning_rate": 2.131894952609888e-05, "loss": 1.3855, "step": 12553 }, { "epoch": 0.6997380302101333, "grad_norm": 0.6167160272598267, "learning_rate": 2.131165633548336e-05, "loss": 1.8536, "step": 12554 }, { "epoch": 0.6997937684632963, "grad_norm": 0.5398876070976257, "learning_rate": 2.1304364054694835e-05, "loss": 1.6022, "step": 12555 }, { "epoch": 0.6998495067164595, "grad_norm": 0.5648753046989441, "learning_rate": 2.129707268396458e-05, "loss": 1.812, "step": 12556 }, { "epoch": 0.6999052449696227, "grad_norm": 0.5736165642738342, "learning_rate": 2.1289782223523848e-05, "loss": 1.7548, "step": 12557 }, { "epoch": 0.6999609832227858, "grad_norm": 0.5434161424636841, "learning_rate": 2.1282492673603788e-05, "loss": 1.5542, "step": 12558 }, { "epoch": 0.700016721475949, "grad_norm": 0.5680014491081238, "learning_rate": 2.1275204034435647e-05, "loss": 1.7433, "step": 12559 }, { "epoch": 0.7000724597291121, "grad_norm": 0.6389971375465393, "learning_rate": 2.1267916306250573e-05, "loss": 1.7956, "step": 12560 }, { "epoch": 0.7001281979822752, "grad_norm": 0.5255822539329529, "learning_rate": 2.126062948927966e-05, "loss": 1.1928, "step": 12561 }, { "epoch": 0.7001839362354384, "grad_norm": 0.5520752668380737, "learning_rate": 2.1253343583754016e-05, "loss": 1.6835, "step": 12562 }, { "epoch": 0.7002396744886016, "grad_norm": 0.6200222373008728, "learning_rate": 2.124605858990471e-05, "loss": 1.7763, "step": 12563 }, { "epoch": 0.7002954127417647, "grad_norm": 0.5540696382522583, "learning_rate": 2.1238774507962795e-05, "loss": 1.5703, "step": 12564 }, { "epoch": 0.7003511509949278, "grad_norm": 0.5841526389122009, "learning_rate": 2.123149133815925e-05, "loss": 1.7078, "step": 12565 }, { "epoch": 0.7004068892480909, "grad_norm": 0.568084716796875, "learning_rate": 2.1224209080725042e-05, "loss": 1.6979, "step": 12566 }, { "epoch": 0.7004626275012541, "grad_norm": 0.5143046379089355, "learning_rate": 2.1216927735891183e-05, "loss": 1.3949, "step": 12567 }, { "epoch": 0.7005183657544173, "grad_norm": 0.5790497064590454, "learning_rate": 2.1209647303888546e-05, "loss": 1.6486, "step": 12568 }, { "epoch": 0.7005741040075804, "grad_norm": 0.5869383811950684, "learning_rate": 2.1202367784948036e-05, "loss": 1.7111, "step": 12569 }, { "epoch": 0.7006298422607435, "grad_norm": 0.5924579501152039, "learning_rate": 2.119508917930052e-05, "loss": 1.5828, "step": 12570 }, { "epoch": 0.7006855805139067, "grad_norm": 0.5691964030265808, "learning_rate": 2.1187811487176845e-05, "loss": 1.499, "step": 12571 }, { "epoch": 0.7007413187670698, "grad_norm": 0.6078161001205444, "learning_rate": 2.1180534708807787e-05, "loss": 1.6988, "step": 12572 }, { "epoch": 0.700797057020233, "grad_norm": 0.539812445640564, "learning_rate": 2.117325884442412e-05, "loss": 1.3717, "step": 12573 }, { "epoch": 0.7008527952733962, "grad_norm": 0.5669495463371277, "learning_rate": 2.1165983894256647e-05, "loss": 1.5043, "step": 12574 }, { "epoch": 0.7009085335265592, "grad_norm": 0.5549720525741577, "learning_rate": 2.1158709858536037e-05, "loss": 1.602, "step": 12575 }, { "epoch": 0.7009642717797224, "grad_norm": 0.5545222759246826, "learning_rate": 2.115143673749299e-05, "loss": 1.6733, "step": 12576 }, { "epoch": 0.7010200100328856, "grad_norm": 0.6436394453048706, "learning_rate": 2.114416453135817e-05, "loss": 1.8525, "step": 12577 }, { "epoch": 0.7010757482860487, "grad_norm": 0.5488054156303406, "learning_rate": 2.1136893240362226e-05, "loss": 1.7004, "step": 12578 }, { "epoch": 0.7011314865392119, "grad_norm": 0.5736593008041382, "learning_rate": 2.112962286473573e-05, "loss": 1.5932, "step": 12579 }, { "epoch": 0.701187224792375, "grad_norm": 0.5578902363777161, "learning_rate": 2.1122353404709274e-05, "loss": 1.5578, "step": 12580 }, { "epoch": 0.7012429630455381, "grad_norm": 0.5767555236816406, "learning_rate": 2.1115084860513395e-05, "loss": 1.8148, "step": 12581 }, { "epoch": 0.7012987012987013, "grad_norm": 0.6967010498046875, "learning_rate": 2.1107817232378618e-05, "loss": 1.7272, "step": 12582 }, { "epoch": 0.7013544395518645, "grad_norm": 0.5739030838012695, "learning_rate": 2.110055052053544e-05, "loss": 1.6468, "step": 12583 }, { "epoch": 0.7014101778050276, "grad_norm": 0.5442588925361633, "learning_rate": 2.1093284725214268e-05, "loss": 1.5593, "step": 12584 }, { "epoch": 0.7014659160581908, "grad_norm": 0.5849565863609314, "learning_rate": 2.1086019846645582e-05, "loss": 1.6528, "step": 12585 }, { "epoch": 0.7015216543113539, "grad_norm": 0.6619828343391418, "learning_rate": 2.1078755885059786e-05, "loss": 1.9402, "step": 12586 }, { "epoch": 0.701577392564517, "grad_norm": 0.5421179533004761, "learning_rate": 2.1071492840687218e-05, "loss": 1.6465, "step": 12587 }, { "epoch": 0.7016331308176802, "grad_norm": 0.5201606154441833, "learning_rate": 2.1064230713758225e-05, "loss": 1.5343, "step": 12588 }, { "epoch": 0.7016888690708433, "grad_norm": 0.5774264931678772, "learning_rate": 2.1056969504503134e-05, "loss": 1.7163, "step": 12589 }, { "epoch": 0.7017446073240065, "grad_norm": 0.5549290776252747, "learning_rate": 2.104970921315223e-05, "loss": 1.6034, "step": 12590 }, { "epoch": 0.7018003455771696, "grad_norm": 0.5619807243347168, "learning_rate": 2.1042449839935747e-05, "loss": 1.7492, "step": 12591 }, { "epoch": 0.7018560838303327, "grad_norm": 0.6009867787361145, "learning_rate": 2.1035191385083895e-05, "loss": 1.8769, "step": 12592 }, { "epoch": 0.7019118220834959, "grad_norm": 0.5830333232879639, "learning_rate": 2.1027933848826942e-05, "loss": 1.7465, "step": 12593 }, { "epoch": 0.7019675603366591, "grad_norm": 0.6390556693077087, "learning_rate": 2.1020677231394982e-05, "loss": 1.7793, "step": 12594 }, { "epoch": 0.7020232985898222, "grad_norm": 0.583836555480957, "learning_rate": 2.1013421533018184e-05, "loss": 1.5743, "step": 12595 }, { "epoch": 0.7020790368429853, "grad_norm": 0.5856710076332092, "learning_rate": 2.1006166753926648e-05, "loss": 1.6089, "step": 12596 }, { "epoch": 0.7021347750961485, "grad_norm": 0.5670978426933289, "learning_rate": 2.0998912894350477e-05, "loss": 1.6831, "step": 12597 }, { "epoch": 0.7021905133493116, "grad_norm": 0.610052764415741, "learning_rate": 2.0991659954519682e-05, "loss": 1.787, "step": 12598 }, { "epoch": 0.7022462516024748, "grad_norm": 0.5299352407455444, "learning_rate": 2.0984407934664287e-05, "loss": 1.5232, "step": 12599 }, { "epoch": 0.702301989855638, "grad_norm": 0.5819052457809448, "learning_rate": 2.097715683501433e-05, "loss": 1.7159, "step": 12600 }, { "epoch": 0.702357728108801, "grad_norm": 0.5537623167037964, "learning_rate": 2.0969906655799732e-05, "loss": 1.6153, "step": 12601 }, { "epoch": 0.7024134663619642, "grad_norm": 0.5379827618598938, "learning_rate": 2.0962657397250433e-05, "loss": 1.5843, "step": 12602 }, { "epoch": 0.7024692046151274, "grad_norm": 0.628884494304657, "learning_rate": 2.0955409059596348e-05, "loss": 1.6255, "step": 12603 }, { "epoch": 0.7025249428682905, "grad_norm": 0.5172703266143799, "learning_rate": 2.094816164306736e-05, "loss": 1.5451, "step": 12604 }, { "epoch": 0.7025806811214537, "grad_norm": 0.5739989876747131, "learning_rate": 2.094091514789328e-05, "loss": 1.4511, "step": 12605 }, { "epoch": 0.7026364193746168, "grad_norm": 0.5497764945030212, "learning_rate": 2.093366957430395e-05, "loss": 1.5672, "step": 12606 }, { "epoch": 0.7026921576277799, "grad_norm": 0.5838956832885742, "learning_rate": 2.092642492252915e-05, "loss": 1.636, "step": 12607 }, { "epoch": 0.7027478958809431, "grad_norm": 0.624302327632904, "learning_rate": 2.0919181192798644e-05, "loss": 1.7725, "step": 12608 }, { "epoch": 0.7028036341341063, "grad_norm": 0.5599181056022644, "learning_rate": 2.091193838534217e-05, "loss": 1.467, "step": 12609 }, { "epoch": 0.7028593723872694, "grad_norm": 0.5655273199081421, "learning_rate": 2.090469650038938e-05, "loss": 1.5876, "step": 12610 }, { "epoch": 0.7029151106404325, "grad_norm": 0.5795032978057861, "learning_rate": 2.089745553817e-05, "loss": 1.698, "step": 12611 }, { "epoch": 0.7029708488935956, "grad_norm": 0.5137896537780762, "learning_rate": 2.0890215498913668e-05, "loss": 1.5782, "step": 12612 }, { "epoch": 0.7030265871467588, "grad_norm": 0.569449245929718, "learning_rate": 2.0882976382849962e-05, "loss": 1.7965, "step": 12613 }, { "epoch": 0.703082325399922, "grad_norm": 0.6196072101593018, "learning_rate": 2.0875738190208483e-05, "loss": 1.6878, "step": 12614 }, { "epoch": 0.7031380636530851, "grad_norm": 0.578255295753479, "learning_rate": 2.0868500921218775e-05, "loss": 1.5877, "step": 12615 }, { "epoch": 0.7031938019062483, "grad_norm": 0.5548200607299805, "learning_rate": 2.0861264576110395e-05, "loss": 1.54, "step": 12616 }, { "epoch": 0.7032495401594114, "grad_norm": 0.591273844242096, "learning_rate": 2.085402915511277e-05, "loss": 1.9004, "step": 12617 }, { "epoch": 0.7033052784125745, "grad_norm": 0.5834256410598755, "learning_rate": 2.0846794658455433e-05, "loss": 1.7008, "step": 12618 }, { "epoch": 0.7033610166657377, "grad_norm": 0.5561612248420715, "learning_rate": 2.0839561086367802e-05, "loss": 1.6724, "step": 12619 }, { "epoch": 0.7034167549189009, "grad_norm": 0.6206260323524475, "learning_rate": 2.0832328439079268e-05, "loss": 1.4036, "step": 12620 }, { "epoch": 0.703472493172064, "grad_norm": 0.6796298027038574, "learning_rate": 2.082509671681921e-05, "loss": 1.6769, "step": 12621 }, { "epoch": 0.7035282314252271, "grad_norm": 0.578867495059967, "learning_rate": 2.0817865919816988e-05, "loss": 1.7971, "step": 12622 }, { "epoch": 0.7035839696783903, "grad_norm": 0.5739205479621887, "learning_rate": 2.081063604830193e-05, "loss": 1.7429, "step": 12623 }, { "epoch": 0.7036397079315534, "grad_norm": 0.5878620147705078, "learning_rate": 2.0803407102503293e-05, "loss": 1.6767, "step": 12624 }, { "epoch": 0.7036954461847166, "grad_norm": 0.5952854156494141, "learning_rate": 2.0796179082650336e-05, "loss": 1.8749, "step": 12625 }, { "epoch": 0.7037511844378798, "grad_norm": 0.5622190833091736, "learning_rate": 2.0788951988972345e-05, "loss": 1.6783, "step": 12626 }, { "epoch": 0.7038069226910428, "grad_norm": 0.514674186706543, "learning_rate": 2.0781725821698466e-05, "loss": 1.3523, "step": 12627 }, { "epoch": 0.703862660944206, "grad_norm": 0.5285819172859192, "learning_rate": 2.0774500581057892e-05, "loss": 1.5957, "step": 12628 }, { "epoch": 0.7039183991973692, "grad_norm": 0.6194326877593994, "learning_rate": 2.076727626727976e-05, "loss": 1.7849, "step": 12629 }, { "epoch": 0.7039741374505323, "grad_norm": 0.588029682636261, "learning_rate": 2.0760052880593213e-05, "loss": 1.7172, "step": 12630 }, { "epoch": 0.7040298757036955, "grad_norm": 0.5699478983879089, "learning_rate": 2.0752830421227277e-05, "loss": 1.72, "step": 12631 }, { "epoch": 0.7040856139568586, "grad_norm": 0.5345055460929871, "learning_rate": 2.0745608889411044e-05, "loss": 1.5975, "step": 12632 }, { "epoch": 0.7041413522100217, "grad_norm": 0.5528733134269714, "learning_rate": 2.0738388285373533e-05, "loss": 1.6038, "step": 12633 }, { "epoch": 0.7041970904631849, "grad_norm": 0.5432607531547546, "learning_rate": 2.0731168609343737e-05, "loss": 1.5777, "step": 12634 }, { "epoch": 0.704252828716348, "grad_norm": 0.5677303671836853, "learning_rate": 2.072394986155064e-05, "loss": 1.7384, "step": 12635 }, { "epoch": 0.7043085669695112, "grad_norm": 0.5520053505897522, "learning_rate": 2.071673204222313e-05, "loss": 1.5779, "step": 12636 }, { "epoch": 0.7043643052226743, "grad_norm": 0.5608752965927124, "learning_rate": 2.070951515159016e-05, "loss": 1.6983, "step": 12637 }, { "epoch": 0.7044200434758374, "grad_norm": 0.5688676238059998, "learning_rate": 2.0702299189880613e-05, "loss": 1.8005, "step": 12638 }, { "epoch": 0.7044757817290006, "grad_norm": 0.5453701019287109, "learning_rate": 2.0695084157323303e-05, "loss": 1.5594, "step": 12639 }, { "epoch": 0.7045315199821638, "grad_norm": 0.5923493504524231, "learning_rate": 2.0687870054147062e-05, "loss": 1.7295, "step": 12640 }, { "epoch": 0.7045872582353269, "grad_norm": 0.5711904764175415, "learning_rate": 2.068065688058068e-05, "loss": 1.7237, "step": 12641 }, { "epoch": 0.70464299648849, "grad_norm": 0.5311852097511292, "learning_rate": 2.067344463685294e-05, "loss": 1.3802, "step": 12642 }, { "epoch": 0.7046987347416532, "grad_norm": 0.6140351295471191, "learning_rate": 2.0666233323192515e-05, "loss": 1.6697, "step": 12643 }, { "epoch": 0.7047544729948163, "grad_norm": 0.5683553218841553, "learning_rate": 2.0659022939828154e-05, "loss": 1.604, "step": 12644 }, { "epoch": 0.7048102112479795, "grad_norm": 0.5510280132293701, "learning_rate": 2.0651813486988535e-05, "loss": 1.7428, "step": 12645 }, { "epoch": 0.7048659495011427, "grad_norm": 0.5744211077690125, "learning_rate": 2.0644604964902264e-05, "loss": 1.7455, "step": 12646 }, { "epoch": 0.7049216877543057, "grad_norm": 0.5572615265846252, "learning_rate": 2.063739737379797e-05, "loss": 1.5011, "step": 12647 }, { "epoch": 0.7049774260074689, "grad_norm": 0.5855537056922913, "learning_rate": 2.063019071390423e-05, "loss": 1.7493, "step": 12648 }, { "epoch": 0.7050331642606321, "grad_norm": 0.5825347304344177, "learning_rate": 2.062298498544963e-05, "loss": 1.741, "step": 12649 }, { "epoch": 0.7050889025137952, "grad_norm": 0.5738754868507385, "learning_rate": 2.0615780188662642e-05, "loss": 1.6665, "step": 12650 }, { "epoch": 0.7051446407669584, "grad_norm": 0.5652052760124207, "learning_rate": 2.0608576323771767e-05, "loss": 1.4688, "step": 12651 }, { "epoch": 0.7052003790201216, "grad_norm": 0.5375339388847351, "learning_rate": 2.0601373391005525e-05, "loss": 1.584, "step": 12652 }, { "epoch": 0.7052561172732846, "grad_norm": 0.5722372531890869, "learning_rate": 2.0594171390592294e-05, "loss": 1.729, "step": 12653 }, { "epoch": 0.7053118555264478, "grad_norm": 0.5661969184875488, "learning_rate": 2.0586970322760498e-05, "loss": 1.7496, "step": 12654 }, { "epoch": 0.705367593779611, "grad_norm": 0.5383875966072083, "learning_rate": 2.057977018773851e-05, "loss": 1.5804, "step": 12655 }, { "epoch": 0.7054233320327741, "grad_norm": 0.5743109583854675, "learning_rate": 2.057257098575471e-05, "loss": 1.5512, "step": 12656 }, { "epoch": 0.7054790702859373, "grad_norm": 0.622540295124054, "learning_rate": 2.0565372717037356e-05, "loss": 1.7573, "step": 12657 }, { "epoch": 0.7055348085391003, "grad_norm": 0.629591166973114, "learning_rate": 2.0558175381814766e-05, "loss": 1.9962, "step": 12658 }, { "epoch": 0.7055905467922635, "grad_norm": 0.5481202006340027, "learning_rate": 2.0550978980315194e-05, "loss": 1.6104, "step": 12659 }, { "epoch": 0.7056462850454267, "grad_norm": 0.5914950370788574, "learning_rate": 2.0543783512766873e-05, "loss": 1.6372, "step": 12660 }, { "epoch": 0.7057020232985898, "grad_norm": 0.553175687789917, "learning_rate": 2.0536588979398013e-05, "loss": 1.5143, "step": 12661 }, { "epoch": 0.705757761551753, "grad_norm": 0.5907781720161438, "learning_rate": 2.0529395380436727e-05, "loss": 1.7892, "step": 12662 }, { "epoch": 0.7058134998049161, "grad_norm": 0.5549091696739197, "learning_rate": 2.052220271611124e-05, "loss": 1.6492, "step": 12663 }, { "epoch": 0.7058692380580792, "grad_norm": 0.5427672266960144, "learning_rate": 2.051501098664959e-05, "loss": 1.6053, "step": 12664 }, { "epoch": 0.7059249763112424, "grad_norm": 0.5447436571121216, "learning_rate": 2.050782019227988e-05, "loss": 1.475, "step": 12665 }, { "epoch": 0.7059807145644056, "grad_norm": 0.5734901428222656, "learning_rate": 2.0500630333230168e-05, "loss": 1.7296, "step": 12666 }, { "epoch": 0.7060364528175687, "grad_norm": 0.580880343914032, "learning_rate": 2.0493441409728466e-05, "loss": 1.7089, "step": 12667 }, { "epoch": 0.7060921910707318, "grad_norm": 0.5470337271690369, "learning_rate": 2.0486253422002784e-05, "loss": 1.5953, "step": 12668 }, { "epoch": 0.706147929323895, "grad_norm": 0.5712233781814575, "learning_rate": 2.047906637028103e-05, "loss": 1.7162, "step": 12669 }, { "epoch": 0.7062036675770581, "grad_norm": 0.6004396080970764, "learning_rate": 2.047188025479119e-05, "loss": 1.5502, "step": 12670 }, { "epoch": 0.7062594058302213, "grad_norm": 0.5722224116325378, "learning_rate": 2.046469507576117e-05, "loss": 1.9175, "step": 12671 }, { "epoch": 0.7063151440833845, "grad_norm": 0.5458431839942932, "learning_rate": 2.0457510833418796e-05, "loss": 1.6203, "step": 12672 }, { "epoch": 0.7063708823365475, "grad_norm": 0.5887515544891357, "learning_rate": 2.045032752799194e-05, "loss": 1.7017, "step": 12673 }, { "epoch": 0.7064266205897107, "grad_norm": 0.580519437789917, "learning_rate": 2.0443145159708405e-05, "loss": 1.7859, "step": 12674 }, { "epoch": 0.7064823588428739, "grad_norm": 0.5798778533935547, "learning_rate": 2.0435963728795992e-05, "loss": 1.7632, "step": 12675 }, { "epoch": 0.706538097096037, "grad_norm": 0.5585095286369324, "learning_rate": 2.0428783235482424e-05, "loss": 1.5312, "step": 12676 }, { "epoch": 0.7065938353492002, "grad_norm": 0.5716522336006165, "learning_rate": 2.042160367999542e-05, "loss": 1.5461, "step": 12677 }, { "epoch": 0.7066495736023634, "grad_norm": 0.5700616240501404, "learning_rate": 2.041442506256273e-05, "loss": 1.7262, "step": 12678 }, { "epoch": 0.7067053118555264, "grad_norm": 0.5960610508918762, "learning_rate": 2.0407247383411966e-05, "loss": 1.7994, "step": 12679 }, { "epoch": 0.7067610501086896, "grad_norm": 0.5809378027915955, "learning_rate": 2.0400070642770775e-05, "loss": 1.7502, "step": 12680 }, { "epoch": 0.7068167883618527, "grad_norm": 0.6252628564834595, "learning_rate": 2.0392894840866767e-05, "loss": 1.8929, "step": 12681 }, { "epoch": 0.7068725266150159, "grad_norm": 0.6130125522613525, "learning_rate": 2.0385719977927526e-05, "loss": 1.8354, "step": 12682 }, { "epoch": 0.7069282648681791, "grad_norm": 0.5739142894744873, "learning_rate": 2.0378546054180568e-05, "loss": 1.7345, "step": 12683 }, { "epoch": 0.7069840031213421, "grad_norm": 0.5880386829376221, "learning_rate": 2.0371373069853424e-05, "loss": 1.9439, "step": 12684 }, { "epoch": 0.7070397413745053, "grad_norm": 0.5849983096122742, "learning_rate": 2.036420102517358e-05, "loss": 1.5638, "step": 12685 }, { "epoch": 0.7070954796276685, "grad_norm": 0.545464277267456, "learning_rate": 2.035702992036849e-05, "loss": 1.5163, "step": 12686 }, { "epoch": 0.7071512178808316, "grad_norm": 0.5759094953536987, "learning_rate": 2.0349859755665595e-05, "loss": 1.6277, "step": 12687 }, { "epoch": 0.7072069561339948, "grad_norm": 0.5686140060424805, "learning_rate": 2.0342690531292248e-05, "loss": 1.4882, "step": 12688 }, { "epoch": 0.7072626943871579, "grad_norm": 0.549177348613739, "learning_rate": 2.0335522247475874e-05, "loss": 1.611, "step": 12689 }, { "epoch": 0.707318432640321, "grad_norm": 0.6295575499534607, "learning_rate": 2.0328354904443764e-05, "loss": 1.8073, "step": 12690 }, { "epoch": 0.7073741708934842, "grad_norm": 0.5919781923294067, "learning_rate": 2.0321188502423232e-05, "loss": 1.6561, "step": 12691 }, { "epoch": 0.7074299091466474, "grad_norm": 0.5448305606842041, "learning_rate": 2.0314023041641568e-05, "loss": 1.7064, "step": 12692 }, { "epoch": 0.7074856473998105, "grad_norm": 0.5615071058273315, "learning_rate": 2.030685852232601e-05, "loss": 1.511, "step": 12693 }, { "epoch": 0.7075413856529736, "grad_norm": 0.5193641781806946, "learning_rate": 2.0299694944703796e-05, "loss": 1.4849, "step": 12694 }, { "epoch": 0.7075971239061368, "grad_norm": 0.5853648781776428, "learning_rate": 2.0292532309002054e-05, "loss": 1.5751, "step": 12695 }, { "epoch": 0.7076528621592999, "grad_norm": 0.559391975402832, "learning_rate": 2.0285370615448002e-05, "loss": 1.5558, "step": 12696 }, { "epoch": 0.7077086004124631, "grad_norm": 0.6228542923927307, "learning_rate": 2.027820986426876e-05, "loss": 1.3527, "step": 12697 }, { "epoch": 0.7077643386656263, "grad_norm": 0.5739468932151794, "learning_rate": 2.0271050055691393e-05, "loss": 1.4686, "step": 12698 }, { "epoch": 0.7078200769187893, "grad_norm": 0.5727941393852234, "learning_rate": 2.026389118994299e-05, "loss": 1.6368, "step": 12699 }, { "epoch": 0.7078758151719525, "grad_norm": 0.5730375051498413, "learning_rate": 2.0256733267250583e-05, "loss": 1.6877, "step": 12700 }, { "epoch": 0.7079315534251157, "grad_norm": 0.5445471405982971, "learning_rate": 2.02495762878412e-05, "loss": 1.5749, "step": 12701 }, { "epoch": 0.7079872916782788, "grad_norm": 0.5488109588623047, "learning_rate": 2.024242025194178e-05, "loss": 1.5247, "step": 12702 }, { "epoch": 0.708043029931442, "grad_norm": 0.5618939399719238, "learning_rate": 2.0235265159779277e-05, "loss": 1.7531, "step": 12703 }, { "epoch": 0.708098768184605, "grad_norm": 0.5433917045593262, "learning_rate": 2.022811101158066e-05, "loss": 1.7212, "step": 12704 }, { "epoch": 0.7081545064377682, "grad_norm": 0.5437905788421631, "learning_rate": 2.0220957807572756e-05, "loss": 1.6991, "step": 12705 }, { "epoch": 0.7082102446909314, "grad_norm": 0.5957657098770142, "learning_rate": 2.0213805547982446e-05, "loss": 1.6026, "step": 12706 }, { "epoch": 0.7082659829440945, "grad_norm": 0.5926949381828308, "learning_rate": 2.020665423303656e-05, "loss": 1.8268, "step": 12707 }, { "epoch": 0.7083217211972577, "grad_norm": 0.5730268359184265, "learning_rate": 2.0199503862961917e-05, "loss": 1.6651, "step": 12708 }, { "epoch": 0.7083774594504209, "grad_norm": 0.5360450744628906, "learning_rate": 2.019235443798524e-05, "loss": 1.6086, "step": 12709 }, { "epoch": 0.7084331977035839, "grad_norm": 0.5588325262069702, "learning_rate": 2.0185205958333275e-05, "loss": 1.6118, "step": 12710 }, { "epoch": 0.7084889359567471, "grad_norm": 0.5698480606079102, "learning_rate": 2.0178058424232776e-05, "loss": 1.6655, "step": 12711 }, { "epoch": 0.7085446742099103, "grad_norm": 0.5079599618911743, "learning_rate": 2.017091183591037e-05, "loss": 1.4887, "step": 12712 }, { "epoch": 0.7086004124630734, "grad_norm": 0.5740954875946045, "learning_rate": 2.0163766193592753e-05, "loss": 1.7438, "step": 12713 }, { "epoch": 0.7086561507162366, "grad_norm": 0.5531185865402222, "learning_rate": 2.0156621497506472e-05, "loss": 1.4655, "step": 12714 }, { "epoch": 0.7087118889693997, "grad_norm": 0.5318362712860107, "learning_rate": 2.0149477747878194e-05, "loss": 1.6594, "step": 12715 }, { "epoch": 0.7087676272225628, "grad_norm": 0.5267673134803772, "learning_rate": 2.0142334944934426e-05, "loss": 1.583, "step": 12716 }, { "epoch": 0.708823365475726, "grad_norm": 0.5608228445053101, "learning_rate": 2.013519308890171e-05, "loss": 1.5247, "step": 12717 }, { "epoch": 0.7088791037288892, "grad_norm": 0.5941587090492249, "learning_rate": 2.0128052180006546e-05, "loss": 1.6722, "step": 12718 }, { "epoch": 0.7089348419820523, "grad_norm": 0.5631396174430847, "learning_rate": 2.0120912218475396e-05, "loss": 1.6948, "step": 12719 }, { "epoch": 0.7089905802352154, "grad_norm": 0.578289270401001, "learning_rate": 2.011377320453473e-05, "loss": 1.6412, "step": 12720 }, { "epoch": 0.7090463184883786, "grad_norm": 0.5368520021438599, "learning_rate": 2.0106635138410883e-05, "loss": 1.6345, "step": 12721 }, { "epoch": 0.7091020567415417, "grad_norm": 0.5559724569320679, "learning_rate": 2.0099498020330303e-05, "loss": 1.6459, "step": 12722 }, { "epoch": 0.7091577949947049, "grad_norm": 0.5772035717964172, "learning_rate": 2.0092361850519336e-05, "loss": 1.5565, "step": 12723 }, { "epoch": 0.7092135332478681, "grad_norm": 0.653834879398346, "learning_rate": 2.0085226629204256e-05, "loss": 1.6026, "step": 12724 }, { "epoch": 0.7092692715010311, "grad_norm": 0.6090754270553589, "learning_rate": 2.0078092356611372e-05, "loss": 1.6818, "step": 12725 }, { "epoch": 0.7093250097541943, "grad_norm": 0.5616610646247864, "learning_rate": 2.0070959032966942e-05, "loss": 1.6622, "step": 12726 }, { "epoch": 0.7093807480073574, "grad_norm": 0.6165484189987183, "learning_rate": 2.0063826658497203e-05, "loss": 1.8945, "step": 12727 }, { "epoch": 0.7094364862605206, "grad_norm": 0.5442148447036743, "learning_rate": 2.0056695233428335e-05, "loss": 1.4223, "step": 12728 }, { "epoch": 0.7094922245136838, "grad_norm": 0.5319198966026306, "learning_rate": 2.0049564757986488e-05, "loss": 1.5615, "step": 12729 }, { "epoch": 0.7095479627668468, "grad_norm": 0.6317080855369568, "learning_rate": 2.0042435232397867e-05, "loss": 1.9871, "step": 12730 }, { "epoch": 0.70960370102001, "grad_norm": 0.5435596704483032, "learning_rate": 2.0035306656888515e-05, "loss": 1.4064, "step": 12731 }, { "epoch": 0.7096594392731732, "grad_norm": 0.5366087555885315, "learning_rate": 2.0028179031684523e-05, "loss": 1.6376, "step": 12732 }, { "epoch": 0.7097151775263363, "grad_norm": 0.593706488609314, "learning_rate": 2.002105235701195e-05, "loss": 1.7685, "step": 12733 }, { "epoch": 0.7097709157794995, "grad_norm": 0.61496901512146, "learning_rate": 2.0013926633096825e-05, "loss": 1.6685, "step": 12734 }, { "epoch": 0.7098266540326627, "grad_norm": 0.49844062328338623, "learning_rate": 2.0006801860165098e-05, "loss": 1.5448, "step": 12735 }, { "epoch": 0.7098823922858257, "grad_norm": 0.610573947429657, "learning_rate": 1.9999678038442727e-05, "loss": 1.8328, "step": 12736 }, { "epoch": 0.7099381305389889, "grad_norm": 0.5731762051582336, "learning_rate": 1.9992555168155687e-05, "loss": 1.3826, "step": 12737 }, { "epoch": 0.7099938687921521, "grad_norm": 0.5378715991973877, "learning_rate": 1.998543324952982e-05, "loss": 1.4891, "step": 12738 }, { "epoch": 0.7100496070453152, "grad_norm": 0.5719186067581177, "learning_rate": 1.997831228279104e-05, "loss": 1.6531, "step": 12739 }, { "epoch": 0.7101053452984784, "grad_norm": 0.5602190494537354, "learning_rate": 1.9971192268165116e-05, "loss": 1.7154, "step": 12740 }, { "epoch": 0.7101610835516415, "grad_norm": 0.5402939915657043, "learning_rate": 1.9964073205877924e-05, "loss": 1.5835, "step": 12741 }, { "epoch": 0.7102168218048046, "grad_norm": 0.6443590521812439, "learning_rate": 1.99569550961552e-05, "loss": 1.8135, "step": 12742 }, { "epoch": 0.7102725600579678, "grad_norm": 0.563509464263916, "learning_rate": 1.9949837939222693e-05, "loss": 1.6409, "step": 12743 }, { "epoch": 0.710328298311131, "grad_norm": 0.4864160716533661, "learning_rate": 1.994272173530612e-05, "loss": 1.0639, "step": 12744 }, { "epoch": 0.710384036564294, "grad_norm": 0.5689197182655334, "learning_rate": 1.993560648463117e-05, "loss": 1.7668, "step": 12745 }, { "epoch": 0.7104397748174572, "grad_norm": 0.6037869453430176, "learning_rate": 1.9928492187423514e-05, "loss": 1.4886, "step": 12746 }, { "epoch": 0.7104955130706204, "grad_norm": 0.6127498149871826, "learning_rate": 1.9921378843908716e-05, "loss": 1.9898, "step": 12747 }, { "epoch": 0.7105512513237835, "grad_norm": 0.5575817227363586, "learning_rate": 1.991426645431243e-05, "loss": 1.6636, "step": 12748 }, { "epoch": 0.7106069895769467, "grad_norm": 0.5344414114952087, "learning_rate": 1.9907155018860217e-05, "loss": 1.6165, "step": 12749 }, { "epoch": 0.7106627278301098, "grad_norm": 0.5407127737998962, "learning_rate": 1.9900044537777586e-05, "loss": 1.6894, "step": 12750 }, { "epoch": 0.7107184660832729, "grad_norm": 0.531772792339325, "learning_rate": 1.9892935011290037e-05, "loss": 1.4837, "step": 12751 }, { "epoch": 0.7107742043364361, "grad_norm": 0.5555554628372192, "learning_rate": 1.9885826439623052e-05, "loss": 1.6393, "step": 12752 }, { "epoch": 0.7108299425895992, "grad_norm": 0.5575926899909973, "learning_rate": 1.9878718823002097e-05, "loss": 1.6043, "step": 12753 }, { "epoch": 0.7108856808427624, "grad_norm": 0.5810105204582214, "learning_rate": 1.9871612161652542e-05, "loss": 1.6273, "step": 12754 }, { "epoch": 0.7109414190959256, "grad_norm": 0.5523950457572937, "learning_rate": 1.9864506455799768e-05, "loss": 1.6094, "step": 12755 }, { "epoch": 0.7109971573490886, "grad_norm": 0.572114109992981, "learning_rate": 1.9857401705669186e-05, "loss": 1.7298, "step": 12756 }, { "epoch": 0.7110528956022518, "grad_norm": 0.5734782814979553, "learning_rate": 1.9850297911486067e-05, "loss": 1.649, "step": 12757 }, { "epoch": 0.711108633855415, "grad_norm": 0.5278222560882568, "learning_rate": 1.98431950734757e-05, "loss": 1.4049, "step": 12758 }, { "epoch": 0.7111643721085781, "grad_norm": 0.5516873002052307, "learning_rate": 1.983609319186337e-05, "loss": 1.5951, "step": 12759 }, { "epoch": 0.7112201103617413, "grad_norm": 0.5528345108032227, "learning_rate": 1.982899226687431e-05, "loss": 1.7238, "step": 12760 }, { "epoch": 0.7112758486149044, "grad_norm": 0.589545726776123, "learning_rate": 1.9821892298733686e-05, "loss": 1.7273, "step": 12761 }, { "epoch": 0.7113315868680675, "grad_norm": 0.5374835729598999, "learning_rate": 1.9814793287666673e-05, "loss": 1.7202, "step": 12762 }, { "epoch": 0.7113873251212307, "grad_norm": 0.5903745889663696, "learning_rate": 1.9807695233898455e-05, "loss": 1.6107, "step": 12763 }, { "epoch": 0.7114430633743939, "grad_norm": 0.578032910823822, "learning_rate": 1.98005981376541e-05, "loss": 1.5467, "step": 12764 }, { "epoch": 0.711498801627557, "grad_norm": 0.5979136824607849, "learning_rate": 1.9793501999158708e-05, "loss": 1.7664, "step": 12765 }, { "epoch": 0.7115545398807201, "grad_norm": 0.5675146579742432, "learning_rate": 1.9786406818637286e-05, "loss": 1.6013, "step": 12766 }, { "epoch": 0.7116102781338833, "grad_norm": 0.5453364253044128, "learning_rate": 1.977931259631492e-05, "loss": 1.4807, "step": 12767 }, { "epoch": 0.7116660163870464, "grad_norm": 0.622312605381012, "learning_rate": 1.977221933241654e-05, "loss": 1.9146, "step": 12768 }, { "epoch": 0.7117217546402096, "grad_norm": 0.5752536058425903, "learning_rate": 1.9765127027167117e-05, "loss": 1.4818, "step": 12769 }, { "epoch": 0.7117774928933728, "grad_norm": 0.5456924438476562, "learning_rate": 1.9758035680791593e-05, "loss": 1.6821, "step": 12770 }, { "epoch": 0.7118332311465358, "grad_norm": 0.594129741191864, "learning_rate": 1.975094529351485e-05, "loss": 1.5839, "step": 12771 }, { "epoch": 0.711888969399699, "grad_norm": 0.5474662780761719, "learning_rate": 1.9743855865561772e-05, "loss": 1.7335, "step": 12772 }, { "epoch": 0.7119447076528621, "grad_norm": 0.5566896200180054, "learning_rate": 1.9736767397157147e-05, "loss": 1.4159, "step": 12773 }, { "epoch": 0.7120004459060253, "grad_norm": 0.5809720754623413, "learning_rate": 1.9729679888525847e-05, "loss": 1.6998, "step": 12774 }, { "epoch": 0.7120561841591885, "grad_norm": 0.5639328956604004, "learning_rate": 1.9722593339892605e-05, "loss": 1.6072, "step": 12775 }, { "epoch": 0.7121119224123516, "grad_norm": 0.5665844082832336, "learning_rate": 1.971550775148216e-05, "loss": 1.4811, "step": 12776 }, { "epoch": 0.7121676606655147, "grad_norm": 0.5987708568572998, "learning_rate": 1.9708423123519242e-05, "loss": 1.7223, "step": 12777 }, { "epoch": 0.7122233989186779, "grad_norm": 0.5482421517372131, "learning_rate": 1.9701339456228534e-05, "loss": 1.7612, "step": 12778 }, { "epoch": 0.712279137171841, "grad_norm": 0.5657587051391602, "learning_rate": 1.96942567498347e-05, "loss": 1.7783, "step": 12779 }, { "epoch": 0.7123348754250042, "grad_norm": 0.5460767149925232, "learning_rate": 1.968717500456233e-05, "loss": 1.5199, "step": 12780 }, { "epoch": 0.7123906136781674, "grad_norm": 0.5805169343948364, "learning_rate": 1.9680094220636018e-05, "loss": 1.6642, "step": 12781 }, { "epoch": 0.7124463519313304, "grad_norm": 0.5397613048553467, "learning_rate": 1.967301439828037e-05, "loss": 1.5273, "step": 12782 }, { "epoch": 0.7125020901844936, "grad_norm": 0.5457704067230225, "learning_rate": 1.966593553771987e-05, "loss": 1.484, "step": 12783 }, { "epoch": 0.7125578284376568, "grad_norm": 0.5834569334983826, "learning_rate": 1.965885763917904e-05, "loss": 1.798, "step": 12784 }, { "epoch": 0.7126135666908199, "grad_norm": 0.5535709857940674, "learning_rate": 1.9651780702882338e-05, "loss": 1.4035, "step": 12785 }, { "epoch": 0.7126693049439831, "grad_norm": 0.535655677318573, "learning_rate": 1.964470472905423e-05, "loss": 1.5595, "step": 12786 }, { "epoch": 0.7127250431971462, "grad_norm": 0.5838567614555359, "learning_rate": 1.9637629717919094e-05, "loss": 1.6134, "step": 12787 }, { "epoch": 0.7127807814503093, "grad_norm": 0.6011456847190857, "learning_rate": 1.963055566970129e-05, "loss": 1.9148, "step": 12788 }, { "epoch": 0.7128365197034725, "grad_norm": 0.5572181344032288, "learning_rate": 1.9623482584625237e-05, "loss": 1.3516, "step": 12789 }, { "epoch": 0.7128922579566357, "grad_norm": 0.5829541087150574, "learning_rate": 1.9616410462915186e-05, "loss": 1.6736, "step": 12790 }, { "epoch": 0.7129479962097988, "grad_norm": 0.5342071652412415, "learning_rate": 1.960933930479545e-05, "loss": 1.6827, "step": 12791 }, { "epoch": 0.713003734462962, "grad_norm": 0.5617251396179199, "learning_rate": 1.9602269110490273e-05, "loss": 1.7693, "step": 12792 }, { "epoch": 0.7130594727161251, "grad_norm": 0.6102042198181152, "learning_rate": 1.9595199880223912e-05, "loss": 1.7255, "step": 12793 }, { "epoch": 0.7131152109692882, "grad_norm": 0.5527377724647522, "learning_rate": 1.9588131614220522e-05, "loss": 1.5599, "step": 12794 }, { "epoch": 0.7131709492224514, "grad_norm": 0.5990374684333801, "learning_rate": 1.958106431270429e-05, "loss": 1.7379, "step": 12795 }, { "epoch": 0.7132266874756145, "grad_norm": 0.5425254702568054, "learning_rate": 1.957399797589933e-05, "loss": 1.6852, "step": 12796 }, { "epoch": 0.7132824257287776, "grad_norm": 0.5994154810905457, "learning_rate": 1.956693260402977e-05, "loss": 1.8283, "step": 12797 }, { "epoch": 0.7133381639819408, "grad_norm": 0.5741962790489197, "learning_rate": 1.955986819731968e-05, "loss": 1.8184, "step": 12798 }, { "epoch": 0.7133939022351039, "grad_norm": 0.5813300609588623, "learning_rate": 1.9552804755993065e-05, "loss": 1.8002, "step": 12799 }, { "epoch": 0.7134496404882671, "grad_norm": 0.534512460231781, "learning_rate": 1.9545742280273993e-05, "loss": 1.4038, "step": 12800 }, { "epoch": 0.7135053787414303, "grad_norm": 0.5778892636299133, "learning_rate": 1.9538680770386398e-05, "loss": 1.6931, "step": 12801 }, { "epoch": 0.7135611169945933, "grad_norm": 0.5787971615791321, "learning_rate": 1.9531620226554248e-05, "loss": 1.7565, "step": 12802 }, { "epoch": 0.7136168552477565, "grad_norm": 0.5496509671211243, "learning_rate": 1.9524560649001462e-05, "loss": 1.7543, "step": 12803 }, { "epoch": 0.7136725935009197, "grad_norm": 0.6067994832992554, "learning_rate": 1.951750203795193e-05, "loss": 1.7114, "step": 12804 }, { "epoch": 0.7137283317540828, "grad_norm": 0.5479700565338135, "learning_rate": 1.9510444393629525e-05, "loss": 1.4792, "step": 12805 }, { "epoch": 0.713784070007246, "grad_norm": 0.5829119086265564, "learning_rate": 1.9503387716258038e-05, "loss": 1.6419, "step": 12806 }, { "epoch": 0.7138398082604092, "grad_norm": 0.529517650604248, "learning_rate": 1.9496332006061262e-05, "loss": 1.5288, "step": 12807 }, { "epoch": 0.7138955465135722, "grad_norm": 0.5489634871482849, "learning_rate": 1.9489277263263028e-05, "loss": 1.7274, "step": 12808 }, { "epoch": 0.7139512847667354, "grad_norm": 0.6540934443473816, "learning_rate": 1.9482223488087016e-05, "loss": 2.0038, "step": 12809 }, { "epoch": 0.7140070230198986, "grad_norm": 0.5693274736404419, "learning_rate": 1.9475170680756938e-05, "loss": 1.6409, "step": 12810 }, { "epoch": 0.7140627612730617, "grad_norm": 0.6074066758155823, "learning_rate": 1.9468118841496476e-05, "loss": 1.8189, "step": 12811 }, { "epoch": 0.7141184995262249, "grad_norm": 0.5593070983886719, "learning_rate": 1.9461067970529286e-05, "loss": 1.5253, "step": 12812 }, { "epoch": 0.714174237779388, "grad_norm": 0.5670613646507263, "learning_rate": 1.9454018068078948e-05, "loss": 1.5391, "step": 12813 }, { "epoch": 0.7142299760325511, "grad_norm": 0.5611968040466309, "learning_rate": 1.944696913436905e-05, "loss": 1.7251, "step": 12814 }, { "epoch": 0.7142857142857143, "grad_norm": 0.5555674433708191, "learning_rate": 1.9439921169623183e-05, "loss": 1.6815, "step": 12815 }, { "epoch": 0.7143414525388775, "grad_norm": 0.5796941518783569, "learning_rate": 1.943287417406482e-05, "loss": 1.5858, "step": 12816 }, { "epoch": 0.7143971907920406, "grad_norm": 0.5309176445007324, "learning_rate": 1.9425828147917475e-05, "loss": 1.6417, "step": 12817 }, { "epoch": 0.7144529290452037, "grad_norm": 0.6035979986190796, "learning_rate": 1.9418783091404597e-05, "loss": 1.7085, "step": 12818 }, { "epoch": 0.7145086672983668, "grad_norm": 0.5622169375419617, "learning_rate": 1.941173900474964e-05, "loss": 1.7192, "step": 12819 }, { "epoch": 0.71456440555153, "grad_norm": 0.6361518502235413, "learning_rate": 1.940469588817596e-05, "loss": 1.7091, "step": 12820 }, { "epoch": 0.7146201438046932, "grad_norm": 0.6154270172119141, "learning_rate": 1.9397653741906947e-05, "loss": 1.7204, "step": 12821 }, { "epoch": 0.7146758820578563, "grad_norm": 0.6581857800483704, "learning_rate": 1.939061256616593e-05, "loss": 1.8386, "step": 12822 }, { "epoch": 0.7147316203110194, "grad_norm": 0.5279804468154907, "learning_rate": 1.9383572361176216e-05, "loss": 1.4818, "step": 12823 }, { "epoch": 0.7147873585641826, "grad_norm": 0.5774812698364258, "learning_rate": 1.93765331271611e-05, "loss": 1.6351, "step": 12824 }, { "epoch": 0.7148430968173457, "grad_norm": 0.5667797327041626, "learning_rate": 1.9369494864343768e-05, "loss": 1.7558, "step": 12825 }, { "epoch": 0.7148988350705089, "grad_norm": 0.5321443676948547, "learning_rate": 1.9362457572947508e-05, "loss": 1.4818, "step": 12826 }, { "epoch": 0.7149545733236721, "grad_norm": 0.5456085801124573, "learning_rate": 1.935542125319545e-05, "loss": 1.5985, "step": 12827 }, { "epoch": 0.7150103115768351, "grad_norm": 0.5820759534835815, "learning_rate": 1.9348385905310757e-05, "loss": 1.6352, "step": 12828 }, { "epoch": 0.7150660498299983, "grad_norm": 0.610159695148468, "learning_rate": 1.934135152951655e-05, "loss": 1.6083, "step": 12829 }, { "epoch": 0.7151217880831615, "grad_norm": 0.6192707419395447, "learning_rate": 1.9334318126035922e-05, "loss": 1.6754, "step": 12830 }, { "epoch": 0.7151775263363246, "grad_norm": 0.5779080986976624, "learning_rate": 1.9327285695091946e-05, "loss": 1.695, "step": 12831 }, { "epoch": 0.7152332645894878, "grad_norm": 0.652459979057312, "learning_rate": 1.932025423690762e-05, "loss": 1.9642, "step": 12832 }, { "epoch": 0.715289002842651, "grad_norm": 0.5195798277854919, "learning_rate": 1.9313223751705935e-05, "loss": 1.3916, "step": 12833 }, { "epoch": 0.715344741095814, "grad_norm": 0.5294322967529297, "learning_rate": 1.9306194239709906e-05, "loss": 1.4148, "step": 12834 }, { "epoch": 0.7154004793489772, "grad_norm": 0.5848262906074524, "learning_rate": 1.9299165701142426e-05, "loss": 1.3771, "step": 12835 }, { "epoch": 0.7154562176021404, "grad_norm": 0.5794307589530945, "learning_rate": 1.9292138136226413e-05, "loss": 1.6937, "step": 12836 }, { "epoch": 0.7155119558553035, "grad_norm": 0.5708346366882324, "learning_rate": 1.928511154518473e-05, "loss": 1.6335, "step": 12837 }, { "epoch": 0.7155676941084667, "grad_norm": 0.6344782710075378, "learning_rate": 1.927808592824026e-05, "loss": 1.7754, "step": 12838 }, { "epoch": 0.7156234323616298, "grad_norm": 0.5648434162139893, "learning_rate": 1.9271061285615755e-05, "loss": 1.607, "step": 12839 }, { "epoch": 0.7156791706147929, "grad_norm": 0.5946949124336243, "learning_rate": 1.926403761753401e-05, "loss": 1.6942, "step": 12840 }, { "epoch": 0.7157349088679561, "grad_norm": 0.6035356521606445, "learning_rate": 1.925701492421782e-05, "loss": 1.7635, "step": 12841 }, { "epoch": 0.7157906471211192, "grad_norm": 0.5887398719787598, "learning_rate": 1.924999320588986e-05, "loss": 1.5762, "step": 12842 }, { "epoch": 0.7158463853742824, "grad_norm": 0.6360691785812378, "learning_rate": 1.924297246277283e-05, "loss": 1.8578, "step": 12843 }, { "epoch": 0.7159021236274455, "grad_norm": 0.5708274841308594, "learning_rate": 1.9235952695089388e-05, "loss": 1.5414, "step": 12844 }, { "epoch": 0.7159578618806086, "grad_norm": 0.6127935647964478, "learning_rate": 1.9228933903062173e-05, "loss": 1.4997, "step": 12845 }, { "epoch": 0.7160136001337718, "grad_norm": 0.5536811947822571, "learning_rate": 1.9221916086913756e-05, "loss": 1.6019, "step": 12846 }, { "epoch": 0.716069338386935, "grad_norm": 0.55485600233078, "learning_rate": 1.9214899246866707e-05, "loss": 1.4985, "step": 12847 }, { "epoch": 0.7161250766400981, "grad_norm": 0.6536714434623718, "learning_rate": 1.9207883383143566e-05, "loss": 1.9859, "step": 12848 }, { "epoch": 0.7161808148932612, "grad_norm": 0.6306371688842773, "learning_rate": 1.9200868495966827e-05, "loss": 1.8885, "step": 12849 }, { "epoch": 0.7162365531464244, "grad_norm": 0.6592320799827576, "learning_rate": 1.9193854585558996e-05, "loss": 1.8515, "step": 12850 }, { "epoch": 0.7162922913995875, "grad_norm": 0.5459424257278442, "learning_rate": 1.9186841652142446e-05, "loss": 1.6183, "step": 12851 }, { "epoch": 0.7163480296527507, "grad_norm": 0.5748673677444458, "learning_rate": 1.917982969593966e-05, "loss": 1.5724, "step": 12852 }, { "epoch": 0.7164037679059139, "grad_norm": 0.5920608043670654, "learning_rate": 1.9172818717172964e-05, "loss": 1.7062, "step": 12853 }, { "epoch": 0.7164595061590769, "grad_norm": 0.5399507880210876, "learning_rate": 1.9165808716064727e-05, "loss": 1.6388, "step": 12854 }, { "epoch": 0.7165152444122401, "grad_norm": 0.5645083785057068, "learning_rate": 1.9158799692837258e-05, "loss": 1.5902, "step": 12855 }, { "epoch": 0.7165709826654033, "grad_norm": 0.5651130676269531, "learning_rate": 1.9151791647712848e-05, "loss": 1.5341, "step": 12856 }, { "epoch": 0.7166267209185664, "grad_norm": 0.5224648118019104, "learning_rate": 1.9144784580913765e-05, "loss": 1.5641, "step": 12857 }, { "epoch": 0.7166824591717296, "grad_norm": 0.5261692404747009, "learning_rate": 1.91377784926622e-05, "loss": 1.4185, "step": 12858 }, { "epoch": 0.7167381974248928, "grad_norm": 0.6022654175758362, "learning_rate": 1.9130773383180344e-05, "loss": 1.6281, "step": 12859 }, { "epoch": 0.7167939356780558, "grad_norm": 0.5448938012123108, "learning_rate": 1.912376925269041e-05, "loss": 1.4896, "step": 12860 }, { "epoch": 0.716849673931219, "grad_norm": 0.5428690910339355, "learning_rate": 1.911676610141448e-05, "loss": 1.6033, "step": 12861 }, { "epoch": 0.7169054121843822, "grad_norm": 0.6159693598747253, "learning_rate": 1.9109763929574665e-05, "loss": 1.5138, "step": 12862 }, { "epoch": 0.7169611504375453, "grad_norm": 0.5566955208778381, "learning_rate": 1.910276273739304e-05, "loss": 1.728, "step": 12863 }, { "epoch": 0.7170168886907085, "grad_norm": 0.6169360876083374, "learning_rate": 1.909576252509165e-05, "loss": 1.8363, "step": 12864 }, { "epoch": 0.7170726269438715, "grad_norm": 0.5535723567008972, "learning_rate": 1.9088763292892468e-05, "loss": 1.6138, "step": 12865 }, { "epoch": 0.7171283651970347, "grad_norm": 0.6125819683074951, "learning_rate": 1.908176504101748e-05, "loss": 1.6805, "step": 12866 }, { "epoch": 0.7171841034501979, "grad_norm": 0.5870202779769897, "learning_rate": 1.9074767769688674e-05, "loss": 1.5874, "step": 12867 }, { "epoch": 0.717239841703361, "grad_norm": 0.5767966508865356, "learning_rate": 1.9067771479127905e-05, "loss": 1.7405, "step": 12868 }, { "epoch": 0.7172955799565242, "grad_norm": 0.5747946500778198, "learning_rate": 1.9060776169557083e-05, "loss": 1.9751, "step": 12869 }, { "epoch": 0.7173513182096873, "grad_norm": 0.5575464963912964, "learning_rate": 1.9053781841198044e-05, "loss": 1.701, "step": 12870 }, { "epoch": 0.7174070564628504, "grad_norm": 0.5241334438323975, "learning_rate": 1.9046788494272638e-05, "loss": 1.5356, "step": 12871 }, { "epoch": 0.7174627947160136, "grad_norm": 0.5647209882736206, "learning_rate": 1.903979612900262e-05, "loss": 1.6373, "step": 12872 }, { "epoch": 0.7175185329691768, "grad_norm": 0.5827178359031677, "learning_rate": 1.903280474560975e-05, "loss": 1.919, "step": 12873 }, { "epoch": 0.7175742712223399, "grad_norm": 0.5812021493911743, "learning_rate": 1.902581434431576e-05, "loss": 1.6801, "step": 12874 }, { "epoch": 0.717630009475503, "grad_norm": 0.5697082281112671, "learning_rate": 1.9018824925342353e-05, "loss": 1.7047, "step": 12875 }, { "epoch": 0.7176857477286662, "grad_norm": 0.5812304019927979, "learning_rate": 1.9011836488911207e-05, "loss": 1.5699, "step": 12876 }, { "epoch": 0.7177414859818293, "grad_norm": 0.5891488790512085, "learning_rate": 1.9004849035243894e-05, "loss": 1.8281, "step": 12877 }, { "epoch": 0.7177972242349925, "grad_norm": 0.6002638339996338, "learning_rate": 1.8997862564562092e-05, "loss": 1.6098, "step": 12878 }, { "epoch": 0.7178529624881557, "grad_norm": 0.5769315958023071, "learning_rate": 1.8990877077087315e-05, "loss": 1.6627, "step": 12879 }, { "epoch": 0.7179087007413187, "grad_norm": 0.5559478998184204, "learning_rate": 1.8983892573041124e-05, "loss": 1.5231, "step": 12880 }, { "epoch": 0.7179644389944819, "grad_norm": 0.599773645401001, "learning_rate": 1.897690905264502e-05, "loss": 1.5775, "step": 12881 }, { "epoch": 0.7180201772476451, "grad_norm": 0.5190117955207825, "learning_rate": 1.8969926516120486e-05, "loss": 1.5025, "step": 12882 }, { "epoch": 0.7180759155008082, "grad_norm": 0.5551081895828247, "learning_rate": 1.8962944963688982e-05, "loss": 1.6357, "step": 12883 }, { "epoch": 0.7181316537539714, "grad_norm": 0.5973671078681946, "learning_rate": 1.8955964395571875e-05, "loss": 1.617, "step": 12884 }, { "epoch": 0.7181873920071346, "grad_norm": 0.6069487929344177, "learning_rate": 1.894898481199059e-05, "loss": 1.7125, "step": 12885 }, { "epoch": 0.7182431302602976, "grad_norm": 0.5540785193443298, "learning_rate": 1.8942006213166486e-05, "loss": 1.5926, "step": 12886 }, { "epoch": 0.7182988685134608, "grad_norm": 0.552204966545105, "learning_rate": 1.8935028599320846e-05, "loss": 1.5145, "step": 12887 }, { "epoch": 0.7183546067666239, "grad_norm": 0.6157098412513733, "learning_rate": 1.8928051970674975e-05, "loss": 1.7493, "step": 12888 }, { "epoch": 0.7184103450197871, "grad_norm": 0.5683028697967529, "learning_rate": 1.892107632745014e-05, "loss": 1.5814, "step": 12889 }, { "epoch": 0.7184660832729503, "grad_norm": 0.5874137282371521, "learning_rate": 1.8914101669867572e-05, "loss": 1.5899, "step": 12890 }, { "epoch": 0.7185218215261133, "grad_norm": 0.5777448415756226, "learning_rate": 1.8907127998148444e-05, "loss": 1.6587, "step": 12891 }, { "epoch": 0.7185775597792765, "grad_norm": 0.5604439973831177, "learning_rate": 1.8900155312513913e-05, "loss": 1.4609, "step": 12892 }, { "epoch": 0.7186332980324397, "grad_norm": 0.5519274473190308, "learning_rate": 1.8893183613185163e-05, "loss": 1.715, "step": 12893 }, { "epoch": 0.7186890362856028, "grad_norm": 0.5561261177062988, "learning_rate": 1.8886212900383248e-05, "loss": 1.5037, "step": 12894 }, { "epoch": 0.718744774538766, "grad_norm": 0.5496982932090759, "learning_rate": 1.887924317432925e-05, "loss": 1.3882, "step": 12895 }, { "epoch": 0.7188005127919291, "grad_norm": 0.5935930013656616, "learning_rate": 1.887227443524422e-05, "loss": 1.6411, "step": 12896 }, { "epoch": 0.7188562510450922, "grad_norm": 0.6104579567909241, "learning_rate": 1.886530668334917e-05, "loss": 1.7263, "step": 12897 }, { "epoch": 0.7189119892982554, "grad_norm": 0.544337272644043, "learning_rate": 1.8858339918865046e-05, "loss": 1.5848, "step": 12898 }, { "epoch": 0.7189677275514186, "grad_norm": 0.6195954084396362, "learning_rate": 1.885137414201281e-05, "loss": 1.8026, "step": 12899 }, { "epoch": 0.7190234658045817, "grad_norm": 0.5602339506149292, "learning_rate": 1.884440935301338e-05, "loss": 1.6852, "step": 12900 }, { "epoch": 0.7190792040577448, "grad_norm": 0.5330663919448853, "learning_rate": 1.883744555208764e-05, "loss": 1.5815, "step": 12901 }, { "epoch": 0.719134942310908, "grad_norm": 0.5787651538848877, "learning_rate": 1.8830482739456452e-05, "loss": 1.6217, "step": 12902 }, { "epoch": 0.7191906805640711, "grad_norm": 0.5768993496894836, "learning_rate": 1.8823520915340583e-05, "loss": 1.7787, "step": 12903 }, { "epoch": 0.7192464188172343, "grad_norm": 0.5117707848548889, "learning_rate": 1.8816560079960892e-05, "loss": 1.2857, "step": 12904 }, { "epoch": 0.7193021570703975, "grad_norm": 0.5444961786270142, "learning_rate": 1.8809600233538087e-05, "loss": 1.6263, "step": 12905 }, { "epoch": 0.7193578953235605, "grad_norm": 0.5877333283424377, "learning_rate": 1.8802641376292914e-05, "loss": 1.3656, "step": 12906 }, { "epoch": 0.7194136335767237, "grad_norm": 0.7059000134468079, "learning_rate": 1.8795683508446055e-05, "loss": 1.7087, "step": 12907 }, { "epoch": 0.7194693718298869, "grad_norm": 0.6280462145805359, "learning_rate": 1.878872663021819e-05, "loss": 1.7132, "step": 12908 }, { "epoch": 0.71952511008305, "grad_norm": 0.5358414649963379, "learning_rate": 1.8781770741829956e-05, "loss": 1.6527, "step": 12909 }, { "epoch": 0.7195808483362132, "grad_norm": 0.5640277862548828, "learning_rate": 1.8774815843501904e-05, "loss": 1.7389, "step": 12910 }, { "epoch": 0.7196365865893762, "grad_norm": 0.5553831458091736, "learning_rate": 1.8767861935454673e-05, "loss": 1.5739, "step": 12911 }, { "epoch": 0.7196923248425394, "grad_norm": 0.5298663973808289, "learning_rate": 1.8760909017908746e-05, "loss": 1.3315, "step": 12912 }, { "epoch": 0.7197480630957026, "grad_norm": 0.5556603670120239, "learning_rate": 1.875395709108465e-05, "loss": 1.634, "step": 12913 }, { "epoch": 0.7198038013488657, "grad_norm": 0.5391923189163208, "learning_rate": 1.874700615520286e-05, "loss": 1.6764, "step": 12914 }, { "epoch": 0.7198595396020289, "grad_norm": 0.5856571197509766, "learning_rate": 1.8740056210483815e-05, "loss": 1.7273, "step": 12915 }, { "epoch": 0.719915277855192, "grad_norm": 0.5557060837745667, "learning_rate": 1.873310725714795e-05, "loss": 1.6144, "step": 12916 }, { "epoch": 0.7199710161083551, "grad_norm": 0.5560556650161743, "learning_rate": 1.8726159295415603e-05, "loss": 1.6216, "step": 12917 }, { "epoch": 0.7200267543615183, "grad_norm": 0.6109077334403992, "learning_rate": 1.8719212325507123e-05, "loss": 1.44, "step": 12918 }, { "epoch": 0.7200824926146815, "grad_norm": 0.5736623406410217, "learning_rate": 1.871226634764289e-05, "loss": 1.7272, "step": 12919 }, { "epoch": 0.7201382308678446, "grad_norm": 0.535057783126831, "learning_rate": 1.870532136204313e-05, "loss": 1.4406, "step": 12920 }, { "epoch": 0.7201939691210077, "grad_norm": 0.570833683013916, "learning_rate": 1.8698377368928115e-05, "loss": 1.6393, "step": 12921 }, { "epoch": 0.7202497073741709, "grad_norm": 0.567415177822113, "learning_rate": 1.8691434368518067e-05, "loss": 1.7118, "step": 12922 }, { "epoch": 0.720305445627334, "grad_norm": 0.5809144377708435, "learning_rate": 1.8684492361033196e-05, "loss": 1.7196, "step": 12923 }, { "epoch": 0.7203611838804972, "grad_norm": 0.6149061322212219, "learning_rate": 1.8677551346693633e-05, "loss": 1.5354, "step": 12924 }, { "epoch": 0.7204169221336604, "grad_norm": 0.5699290037155151, "learning_rate": 1.867061132571951e-05, "loss": 1.7636, "step": 12925 }, { "epoch": 0.7204726603868234, "grad_norm": 0.5781373977661133, "learning_rate": 1.8663672298330942e-05, "loss": 1.6949, "step": 12926 }, { "epoch": 0.7205283986399866, "grad_norm": 0.5494027733802795, "learning_rate": 1.865673426474798e-05, "loss": 1.684, "step": 12927 }, { "epoch": 0.7205841368931498, "grad_norm": 0.5682995915412903, "learning_rate": 1.864979722519068e-05, "loss": 1.6678, "step": 12928 }, { "epoch": 0.7206398751463129, "grad_norm": 0.5997836589813232, "learning_rate": 1.8642861179878994e-05, "loss": 1.7897, "step": 12929 }, { "epoch": 0.7206956133994761, "grad_norm": 0.6006888151168823, "learning_rate": 1.8635926129032964e-05, "loss": 1.7266, "step": 12930 }, { "epoch": 0.7207513516526393, "grad_norm": 0.5405071377754211, "learning_rate": 1.8628992072872476e-05, "loss": 1.606, "step": 12931 }, { "epoch": 0.7208070899058023, "grad_norm": 0.5881284475326538, "learning_rate": 1.862205901161745e-05, "loss": 1.7615, "step": 12932 }, { "epoch": 0.7208628281589655, "grad_norm": 0.5610661506652832, "learning_rate": 1.8615126945487766e-05, "loss": 1.8626, "step": 12933 }, { "epoch": 0.7209185664121286, "grad_norm": 0.6191084980964661, "learning_rate": 1.8608195874703266e-05, "loss": 1.871, "step": 12934 }, { "epoch": 0.7209743046652918, "grad_norm": 0.5465794205665588, "learning_rate": 1.8601265799483786e-05, "loss": 1.4462, "step": 12935 }, { "epoch": 0.721030042918455, "grad_norm": 0.5514585375785828, "learning_rate": 1.8594336720049055e-05, "loss": 1.4693, "step": 12936 }, { "epoch": 0.721085781171618, "grad_norm": 0.49483048915863037, "learning_rate": 1.8587408636618887e-05, "loss": 1.3201, "step": 12937 }, { "epoch": 0.7211415194247812, "grad_norm": 0.5280016660690308, "learning_rate": 1.8580481549412953e-05, "loss": 1.5373, "step": 12938 }, { "epoch": 0.7211972576779444, "grad_norm": 0.5409990549087524, "learning_rate": 1.857355545865096e-05, "loss": 1.5566, "step": 12939 }, { "epoch": 0.7212529959311075, "grad_norm": 0.6028059124946594, "learning_rate": 1.856663036455255e-05, "loss": 1.9095, "step": 12940 }, { "epoch": 0.7213087341842707, "grad_norm": 0.5442488789558411, "learning_rate": 1.8559706267337362e-05, "loss": 1.7033, "step": 12941 }, { "epoch": 0.7213644724374338, "grad_norm": 0.5725643038749695, "learning_rate": 1.8552783167224995e-05, "loss": 1.7649, "step": 12942 }, { "epoch": 0.7214202106905969, "grad_norm": 0.5693257451057434, "learning_rate": 1.8545861064434984e-05, "loss": 1.6757, "step": 12943 }, { "epoch": 0.7214759489437601, "grad_norm": 0.5688108801841736, "learning_rate": 1.853893995918685e-05, "loss": 1.5795, "step": 12944 }, { "epoch": 0.7215316871969233, "grad_norm": 0.500092089176178, "learning_rate": 1.8532019851700143e-05, "loss": 1.3856, "step": 12945 }, { "epoch": 0.7215874254500864, "grad_norm": 0.5776916146278381, "learning_rate": 1.852510074219428e-05, "loss": 1.83, "step": 12946 }, { "epoch": 0.7216431637032495, "grad_norm": 0.5361211895942688, "learning_rate": 1.851818263088871e-05, "loss": 1.3333, "step": 12947 }, { "epoch": 0.7216989019564127, "grad_norm": 0.5286733508110046, "learning_rate": 1.851126551800283e-05, "loss": 1.2956, "step": 12948 }, { "epoch": 0.7217546402095758, "grad_norm": 0.5599008798599243, "learning_rate": 1.8504349403756038e-05, "loss": 1.5162, "step": 12949 }, { "epoch": 0.721810378462739, "grad_norm": 0.5262306928634644, "learning_rate": 1.8497434288367633e-05, "loss": 1.5998, "step": 12950 }, { "epoch": 0.7218661167159022, "grad_norm": 0.5595152378082275, "learning_rate": 1.8490520172056942e-05, "loss": 1.6553, "step": 12951 }, { "epoch": 0.7219218549690652, "grad_norm": 0.5566936731338501, "learning_rate": 1.8483607055043233e-05, "loss": 1.5902, "step": 12952 }, { "epoch": 0.7219775932222284, "grad_norm": 0.5860758423805237, "learning_rate": 1.847669493754576e-05, "loss": 1.6285, "step": 12953 }, { "epoch": 0.7220333314753916, "grad_norm": 0.5415453314781189, "learning_rate": 1.8469783819783735e-05, "loss": 1.625, "step": 12954 }, { "epoch": 0.7220890697285547, "grad_norm": 0.5949093103408813, "learning_rate": 1.8462873701976314e-05, "loss": 1.7366, "step": 12955 }, { "epoch": 0.7221448079817179, "grad_norm": 0.5670189261436462, "learning_rate": 1.8455964584342693e-05, "loss": 1.608, "step": 12956 }, { "epoch": 0.722200546234881, "grad_norm": 0.5760493278503418, "learning_rate": 1.8449056467101945e-05, "loss": 1.7061, "step": 12957 }, { "epoch": 0.7222562844880441, "grad_norm": 0.6208779215812683, "learning_rate": 1.8442149350473172e-05, "loss": 1.5652, "step": 12958 }, { "epoch": 0.7223120227412073, "grad_norm": 0.5377376079559326, "learning_rate": 1.843524323467542e-05, "loss": 1.6385, "step": 12959 }, { "epoch": 0.7223677609943704, "grad_norm": 0.6057771444320679, "learning_rate": 1.8428338119927724e-05, "loss": 1.7745, "step": 12960 }, { "epoch": 0.7224234992475336, "grad_norm": 0.5822296142578125, "learning_rate": 1.8421434006449084e-05, "loss": 1.6446, "step": 12961 }, { "epoch": 0.7224792375006968, "grad_norm": 0.5849522948265076, "learning_rate": 1.8414530894458403e-05, "loss": 1.6425, "step": 12962 }, { "epoch": 0.7225349757538598, "grad_norm": 0.5624440312385559, "learning_rate": 1.8407628784174686e-05, "loss": 1.6815, "step": 12963 }, { "epoch": 0.722590714007023, "grad_norm": 0.5463077425956726, "learning_rate": 1.8400727675816765e-05, "loss": 1.6791, "step": 12964 }, { "epoch": 0.7226464522601862, "grad_norm": 0.6184215545654297, "learning_rate": 1.8393827569603528e-05, "loss": 1.8262, "step": 12965 }, { "epoch": 0.7227021905133493, "grad_norm": 0.6199098825454712, "learning_rate": 1.8386928465753807e-05, "loss": 1.7666, "step": 12966 }, { "epoch": 0.7227579287665125, "grad_norm": 0.6008428335189819, "learning_rate": 1.838003036448639e-05, "loss": 1.5913, "step": 12967 }, { "epoch": 0.7228136670196756, "grad_norm": 0.5535486936569214, "learning_rate": 1.8373133266020078e-05, "loss": 1.6933, "step": 12968 }, { "epoch": 0.7228694052728387, "grad_norm": 0.5395800471305847, "learning_rate": 1.836623717057356e-05, "loss": 1.6757, "step": 12969 }, { "epoch": 0.7229251435260019, "grad_norm": 0.5903302431106567, "learning_rate": 1.8359342078365544e-05, "loss": 1.605, "step": 12970 }, { "epoch": 0.7229808817791651, "grad_norm": 0.6031521558761597, "learning_rate": 1.8352447989614758e-05, "loss": 1.6577, "step": 12971 }, { "epoch": 0.7230366200323282, "grad_norm": 0.500883162021637, "learning_rate": 1.834555490453978e-05, "loss": 1.5251, "step": 12972 }, { "epoch": 0.7230923582854913, "grad_norm": 0.604284942150116, "learning_rate": 1.8338662823359248e-05, "loss": 1.6013, "step": 12973 }, { "epoch": 0.7231480965386545, "grad_norm": 0.554581344127655, "learning_rate": 1.8331771746291728e-05, "loss": 1.5824, "step": 12974 }, { "epoch": 0.7232038347918176, "grad_norm": 0.5634434819221497, "learning_rate": 1.8324881673555788e-05, "loss": 1.7972, "step": 12975 }, { "epoch": 0.7232595730449808, "grad_norm": 0.550441563129425, "learning_rate": 1.831799260536991e-05, "loss": 1.6554, "step": 12976 }, { "epoch": 0.723315311298144, "grad_norm": 0.5207143425941467, "learning_rate": 1.8311104541952567e-05, "loss": 1.3969, "step": 12977 }, { "epoch": 0.723371049551307, "grad_norm": 0.558920681476593, "learning_rate": 1.8304217483522263e-05, "loss": 1.5403, "step": 12978 }, { "epoch": 0.7234267878044702, "grad_norm": 0.5989134311676025, "learning_rate": 1.8297331430297365e-05, "loss": 1.7002, "step": 12979 }, { "epoch": 0.7234825260576333, "grad_norm": 0.5259067416191101, "learning_rate": 1.829044638249629e-05, "loss": 1.4977, "step": 12980 }, { "epoch": 0.7235382643107965, "grad_norm": 0.527930498123169, "learning_rate": 1.8283562340337342e-05, "loss": 1.5423, "step": 12981 }, { "epoch": 0.7235940025639597, "grad_norm": 0.5931378602981567, "learning_rate": 1.8276679304038912e-05, "loss": 1.7416, "step": 12982 }, { "epoch": 0.7236497408171227, "grad_norm": 0.5570964813232422, "learning_rate": 1.826979727381924e-05, "loss": 1.6216, "step": 12983 }, { "epoch": 0.7237054790702859, "grad_norm": 0.5443962812423706, "learning_rate": 1.8262916249896595e-05, "loss": 1.4643, "step": 12984 }, { "epoch": 0.7237612173234491, "grad_norm": 0.5586367249488831, "learning_rate": 1.825603623248921e-05, "loss": 1.7197, "step": 12985 }, { "epoch": 0.7238169555766122, "grad_norm": 0.5415465831756592, "learning_rate": 1.8249157221815273e-05, "loss": 1.5418, "step": 12986 }, { "epoch": 0.7238726938297754, "grad_norm": 0.6045337915420532, "learning_rate": 1.8242279218092968e-05, "loss": 1.2166, "step": 12987 }, { "epoch": 0.7239284320829386, "grad_norm": 0.5467269420623779, "learning_rate": 1.8235402221540367e-05, "loss": 1.6258, "step": 12988 }, { "epoch": 0.7239841703361016, "grad_norm": 0.5268842577934265, "learning_rate": 1.8228526232375643e-05, "loss": 1.6773, "step": 12989 }, { "epoch": 0.7240399085892648, "grad_norm": 0.5661937594413757, "learning_rate": 1.822165125081681e-05, "loss": 1.6878, "step": 12990 }, { "epoch": 0.724095646842428, "grad_norm": 0.554506242275238, "learning_rate": 1.8214777277081917e-05, "loss": 1.732, "step": 12991 }, { "epoch": 0.7241513850955911, "grad_norm": 0.6218104958534241, "learning_rate": 1.8207904311388973e-05, "loss": 1.6358, "step": 12992 }, { "epoch": 0.7242071233487543, "grad_norm": 0.5697388648986816, "learning_rate": 1.8201032353955937e-05, "loss": 1.7553, "step": 12993 }, { "epoch": 0.7242628616019174, "grad_norm": 0.5874882936477661, "learning_rate": 1.8194161405000777e-05, "loss": 1.7667, "step": 12994 }, { "epoch": 0.7243185998550805, "grad_norm": 0.5762251615524292, "learning_rate": 1.8187291464741357e-05, "loss": 1.5637, "step": 12995 }, { "epoch": 0.7243743381082437, "grad_norm": 0.5835250020027161, "learning_rate": 1.8180422533395552e-05, "loss": 1.7027, "step": 12996 }, { "epoch": 0.7244300763614069, "grad_norm": 0.5812956690788269, "learning_rate": 1.817355461118126e-05, "loss": 1.4391, "step": 12997 }, { "epoch": 0.72448581461457, "grad_norm": 0.5332396030426025, "learning_rate": 1.8166687698316236e-05, "loss": 1.5179, "step": 12998 }, { "epoch": 0.7245415528677331, "grad_norm": 0.6374024152755737, "learning_rate": 1.815982179501828e-05, "loss": 1.9367, "step": 12999 }, { "epoch": 0.7245972911208963, "grad_norm": 0.5459944605827332, "learning_rate": 1.815295690150513e-05, "loss": 1.4403, "step": 13000 }, { "epoch": 0.7246530293740594, "grad_norm": 0.5092973709106445, "learning_rate": 1.814609301799453e-05, "loss": 1.611, "step": 13001 }, { "epoch": 0.7247087676272226, "grad_norm": 0.5163784623146057, "learning_rate": 1.8139230144704116e-05, "loss": 1.5506, "step": 13002 }, { "epoch": 0.7247645058803857, "grad_norm": 0.5897939205169678, "learning_rate": 1.8132368281851547e-05, "loss": 1.7454, "step": 13003 }, { "epoch": 0.7248202441335488, "grad_norm": 0.5630401372909546, "learning_rate": 1.8125507429654488e-05, "loss": 1.7844, "step": 13004 }, { "epoch": 0.724875982386712, "grad_norm": 0.6095412969589233, "learning_rate": 1.8118647588330472e-05, "loss": 1.4738, "step": 13005 }, { "epoch": 0.7249317206398751, "grad_norm": 0.5806434154510498, "learning_rate": 1.8111788758097092e-05, "loss": 1.7141, "step": 13006 }, { "epoch": 0.7249874588930383, "grad_norm": 0.5746235251426697, "learning_rate": 1.8104930939171814e-05, "loss": 1.4829, "step": 13007 }, { "epoch": 0.7250431971462015, "grad_norm": 0.5625148415565491, "learning_rate": 1.809807413177221e-05, "loss": 1.5404, "step": 13008 }, { "epoch": 0.7250989353993645, "grad_norm": 0.6030070781707764, "learning_rate": 1.8091218336115667e-05, "loss": 1.6544, "step": 13009 }, { "epoch": 0.7251546736525277, "grad_norm": 0.5468196272850037, "learning_rate": 1.8084363552419643e-05, "loss": 1.6339, "step": 13010 }, { "epoch": 0.7252104119056909, "grad_norm": 0.5445948839187622, "learning_rate": 1.807750978090152e-05, "loss": 1.5828, "step": 13011 }, { "epoch": 0.725266150158854, "grad_norm": 0.5647769570350647, "learning_rate": 1.807065702177867e-05, "loss": 1.5901, "step": 13012 }, { "epoch": 0.7253218884120172, "grad_norm": 0.54178386926651, "learning_rate": 1.8063805275268437e-05, "loss": 1.7264, "step": 13013 }, { "epoch": 0.7253776266651804, "grad_norm": 0.5340712070465088, "learning_rate": 1.8056954541588063e-05, "loss": 1.559, "step": 13014 }, { "epoch": 0.7254333649183434, "grad_norm": 0.6358417272567749, "learning_rate": 1.8050104820954883e-05, "loss": 1.857, "step": 13015 }, { "epoch": 0.7254891031715066, "grad_norm": 0.627558171749115, "learning_rate": 1.8043256113586078e-05, "loss": 1.9097, "step": 13016 }, { "epoch": 0.7255448414246698, "grad_norm": 0.562868595123291, "learning_rate": 1.8036408419698873e-05, "loss": 1.6233, "step": 13017 }, { "epoch": 0.7256005796778329, "grad_norm": 0.5979735851287842, "learning_rate": 1.802956173951043e-05, "loss": 1.7397, "step": 13018 }, { "epoch": 0.725656317930996, "grad_norm": 0.5467450022697449, "learning_rate": 1.8022716073237887e-05, "loss": 1.5947, "step": 13019 }, { "epoch": 0.7257120561841592, "grad_norm": 0.49903130531311035, "learning_rate": 1.8015871421098373e-05, "loss": 1.4622, "step": 13020 }, { "epoch": 0.7257677944373223, "grad_norm": 0.5474138855934143, "learning_rate": 1.8009027783308914e-05, "loss": 1.7291, "step": 13021 }, { "epoch": 0.7258235326904855, "grad_norm": 0.563923716545105, "learning_rate": 1.8002185160086575e-05, "loss": 1.5213, "step": 13022 }, { "epoch": 0.7258792709436487, "grad_norm": 0.594559907913208, "learning_rate": 1.7995343551648365e-05, "loss": 1.7677, "step": 13023 }, { "epoch": 0.7259350091968118, "grad_norm": 0.5416660308837891, "learning_rate": 1.798850295821125e-05, "loss": 1.4572, "step": 13024 }, { "epoch": 0.7259907474499749, "grad_norm": 0.5499664545059204, "learning_rate": 1.7981663379992187e-05, "loss": 1.6194, "step": 13025 }, { "epoch": 0.726046485703138, "grad_norm": 0.5548843145370483, "learning_rate": 1.797482481720808e-05, "loss": 1.5044, "step": 13026 }, { "epoch": 0.7261022239563012, "grad_norm": 0.5904132127761841, "learning_rate": 1.796798727007583e-05, "loss": 1.5585, "step": 13027 }, { "epoch": 0.7261579622094644, "grad_norm": 0.585654079914093, "learning_rate": 1.7961150738812244e-05, "loss": 1.6835, "step": 13028 }, { "epoch": 0.7262137004626275, "grad_norm": 0.5477873682975769, "learning_rate": 1.7954315223634143e-05, "loss": 1.7269, "step": 13029 }, { "epoch": 0.7262694387157906, "grad_norm": 0.5600523948669434, "learning_rate": 1.794748072475836e-05, "loss": 1.6663, "step": 13030 }, { "epoch": 0.7263251769689538, "grad_norm": 0.6076684594154358, "learning_rate": 1.7940647242401586e-05, "loss": 1.7742, "step": 13031 }, { "epoch": 0.7263809152221169, "grad_norm": 0.5797076225280762, "learning_rate": 1.7933814776780583e-05, "loss": 1.7405, "step": 13032 }, { "epoch": 0.7264366534752801, "grad_norm": 0.5418017506599426, "learning_rate": 1.7926983328111978e-05, "loss": 1.6648, "step": 13033 }, { "epoch": 0.7264923917284433, "grad_norm": 0.5510844588279724, "learning_rate": 1.7920152896612503e-05, "loss": 1.6446, "step": 13034 }, { "epoch": 0.7265481299816063, "grad_norm": 0.5720747113227844, "learning_rate": 1.7913323482498718e-05, "loss": 1.7264, "step": 13035 }, { "epoch": 0.7266038682347695, "grad_norm": 0.561935544013977, "learning_rate": 1.7906495085987236e-05, "loss": 1.5724, "step": 13036 }, { "epoch": 0.7266596064879327, "grad_norm": 0.5294128060340881, "learning_rate": 1.789966770729461e-05, "loss": 1.6304, "step": 13037 }, { "epoch": 0.7267153447410958, "grad_norm": 0.5671653151512146, "learning_rate": 1.789284134663737e-05, "loss": 1.6261, "step": 13038 }, { "epoch": 0.726771082994259, "grad_norm": 0.5587400794029236, "learning_rate": 1.788601600423202e-05, "loss": 1.6302, "step": 13039 }, { "epoch": 0.7268268212474222, "grad_norm": 0.5392343997955322, "learning_rate": 1.787919168029497e-05, "loss": 1.5139, "step": 13040 }, { "epoch": 0.7268825595005852, "grad_norm": 0.5418177247047424, "learning_rate": 1.787236837504272e-05, "loss": 1.5585, "step": 13041 }, { "epoch": 0.7269382977537484, "grad_norm": 0.5475856065750122, "learning_rate": 1.786554608869161e-05, "loss": 1.5051, "step": 13042 }, { "epoch": 0.7269940360069116, "grad_norm": 0.5837298631668091, "learning_rate": 1.785872482145802e-05, "loss": 1.8636, "step": 13043 }, { "epoch": 0.7270497742600747, "grad_norm": 0.517890453338623, "learning_rate": 1.7851904573558276e-05, "loss": 1.5822, "step": 13044 }, { "epoch": 0.7271055125132379, "grad_norm": 0.5840612649917603, "learning_rate": 1.784508534520869e-05, "loss": 1.5041, "step": 13045 }, { "epoch": 0.727161250766401, "grad_norm": 0.5422665476799011, "learning_rate": 1.7838267136625535e-05, "loss": 1.7019, "step": 13046 }, { "epoch": 0.7272169890195641, "grad_norm": 0.577457845211029, "learning_rate": 1.7831449948025015e-05, "loss": 1.7343, "step": 13047 }, { "epoch": 0.7272727272727273, "grad_norm": 0.603726327419281, "learning_rate": 1.7824633779623347e-05, "loss": 1.9108, "step": 13048 }, { "epoch": 0.7273284655258904, "grad_norm": 0.5421007871627808, "learning_rate": 1.78178186316367e-05, "loss": 1.559, "step": 13049 }, { "epoch": 0.7273842037790536, "grad_norm": 0.5838912129402161, "learning_rate": 1.7811004504281208e-05, "loss": 1.8311, "step": 13050 }, { "epoch": 0.7274399420322167, "grad_norm": 0.5752909779548645, "learning_rate": 1.7804191397772984e-05, "loss": 1.585, "step": 13051 }, { "epoch": 0.7274956802853798, "grad_norm": 0.5657978653907776, "learning_rate": 1.7797379312328088e-05, "loss": 1.4848, "step": 13052 }, { "epoch": 0.727551418538543, "grad_norm": 0.5181905031204224, "learning_rate": 1.7790568248162586e-05, "loss": 1.5032, "step": 13053 }, { "epoch": 0.7276071567917062, "grad_norm": 0.5629306435585022, "learning_rate": 1.7783758205492452e-05, "loss": 1.728, "step": 13054 }, { "epoch": 0.7276628950448693, "grad_norm": 0.5550503134727478, "learning_rate": 1.777694918453365e-05, "loss": 1.4794, "step": 13055 }, { "epoch": 0.7277186332980324, "grad_norm": 0.5529603362083435, "learning_rate": 1.777014118550218e-05, "loss": 1.5339, "step": 13056 }, { "epoch": 0.7277743715511956, "grad_norm": 0.6003782153129578, "learning_rate": 1.7763334208613908e-05, "loss": 1.8527, "step": 13057 }, { "epoch": 0.7278301098043587, "grad_norm": 0.5447100400924683, "learning_rate": 1.775652825408472e-05, "loss": 1.4837, "step": 13058 }, { "epoch": 0.7278858480575219, "grad_norm": 0.5227007269859314, "learning_rate": 1.7749723322130462e-05, "loss": 1.4804, "step": 13059 }, { "epoch": 0.7279415863106851, "grad_norm": 0.6335617899894714, "learning_rate": 1.7742919412966964e-05, "loss": 1.8064, "step": 13060 }, { "epoch": 0.7279973245638481, "grad_norm": 0.5494779348373413, "learning_rate": 1.7736116526809975e-05, "loss": 1.6725, "step": 13061 }, { "epoch": 0.7280530628170113, "grad_norm": 0.5667814612388611, "learning_rate": 1.7729314663875257e-05, "loss": 1.4949, "step": 13062 }, { "epoch": 0.7281088010701745, "grad_norm": 0.5563710331916809, "learning_rate": 1.7722513824378527e-05, "loss": 1.4885, "step": 13063 }, { "epoch": 0.7281645393233376, "grad_norm": 0.559414267539978, "learning_rate": 1.7715714008535472e-05, "loss": 1.653, "step": 13064 }, { "epoch": 0.7282202775765008, "grad_norm": 0.5678215026855469, "learning_rate": 1.770891521656175e-05, "loss": 1.6643, "step": 13065 }, { "epoch": 0.728276015829664, "grad_norm": 0.589455246925354, "learning_rate": 1.7702117448672933e-05, "loss": 1.8701, "step": 13066 }, { "epoch": 0.728331754082827, "grad_norm": 0.5894622802734375, "learning_rate": 1.7695320705084677e-05, "loss": 1.8521, "step": 13067 }, { "epoch": 0.7283874923359902, "grad_norm": 0.5983284711837769, "learning_rate": 1.7688524986012484e-05, "loss": 1.67, "step": 13068 }, { "epoch": 0.7284432305891534, "grad_norm": 0.5873123407363892, "learning_rate": 1.7681730291671888e-05, "loss": 1.6678, "step": 13069 }, { "epoch": 0.7284989688423165, "grad_norm": 0.6033545732498169, "learning_rate": 1.7674936622278377e-05, "loss": 1.6773, "step": 13070 }, { "epoch": 0.7285547070954796, "grad_norm": 0.5835305452346802, "learning_rate": 1.7668143978047408e-05, "loss": 1.6948, "step": 13071 }, { "epoch": 0.7286104453486427, "grad_norm": 0.6241502165794373, "learning_rate": 1.7661352359194423e-05, "loss": 1.9477, "step": 13072 }, { "epoch": 0.7286661836018059, "grad_norm": 0.5314739346504211, "learning_rate": 1.7654561765934772e-05, "loss": 1.1618, "step": 13073 }, { "epoch": 0.7287219218549691, "grad_norm": 0.5321511626243591, "learning_rate": 1.7647772198483842e-05, "loss": 1.5494, "step": 13074 }, { "epoch": 0.7287776601081322, "grad_norm": 0.5920379161834717, "learning_rate": 1.7640983657056948e-05, "loss": 1.6923, "step": 13075 }, { "epoch": 0.7288333983612953, "grad_norm": 0.5376768708229065, "learning_rate": 1.7634196141869386e-05, "loss": 1.6827, "step": 13076 }, { "epoch": 0.7288891366144585, "grad_norm": 0.6414303183555603, "learning_rate": 1.7627409653136417e-05, "loss": 1.7496, "step": 13077 }, { "epoch": 0.7289448748676216, "grad_norm": 0.5750012397766113, "learning_rate": 1.7620624191073266e-05, "loss": 1.5867, "step": 13078 }, { "epoch": 0.7290006131207848, "grad_norm": 0.5717658996582031, "learning_rate": 1.761383975589515e-05, "loss": 1.8191, "step": 13079 }, { "epoch": 0.729056351373948, "grad_norm": 0.6010684967041016, "learning_rate": 1.7607056347817196e-05, "loss": 1.8822, "step": 13080 }, { "epoch": 0.729112089627111, "grad_norm": 0.6027198433876038, "learning_rate": 1.7600273967054535e-05, "loss": 1.7475, "step": 13081 }, { "epoch": 0.7291678278802742, "grad_norm": 0.5768362879753113, "learning_rate": 1.759349261382231e-05, "loss": 1.6764, "step": 13082 }, { "epoch": 0.7292235661334374, "grad_norm": 0.5655965209007263, "learning_rate": 1.7586712288335543e-05, "loss": 1.6163, "step": 13083 }, { "epoch": 0.7292793043866005, "grad_norm": 0.525158166885376, "learning_rate": 1.7579932990809277e-05, "loss": 1.5605, "step": 13084 }, { "epoch": 0.7293350426397637, "grad_norm": 0.5401511192321777, "learning_rate": 1.7573154721458518e-05, "loss": 1.458, "step": 13085 }, { "epoch": 0.7293907808929269, "grad_norm": 0.499323308467865, "learning_rate": 1.7566377480498246e-05, "loss": 1.5316, "step": 13086 }, { "epoch": 0.7294465191460899, "grad_norm": 0.5240613222122192, "learning_rate": 1.755960126814336e-05, "loss": 1.3185, "step": 13087 }, { "epoch": 0.7295022573992531, "grad_norm": 0.5412852168083191, "learning_rate": 1.755282608460878e-05, "loss": 1.59, "step": 13088 }, { "epoch": 0.7295579956524163, "grad_norm": 0.5833172798156738, "learning_rate": 1.754605193010938e-05, "loss": 1.7575, "step": 13089 }, { "epoch": 0.7296137339055794, "grad_norm": 0.5948725342750549, "learning_rate": 1.7539278804859993e-05, "loss": 1.988, "step": 13090 }, { "epoch": 0.7296694721587426, "grad_norm": 0.592042863368988, "learning_rate": 1.7532506709075436e-05, "loss": 1.6315, "step": 13091 }, { "epoch": 0.7297252104119057, "grad_norm": 0.5844925045967102, "learning_rate": 1.7525735642970438e-05, "loss": 1.8533, "step": 13092 }, { "epoch": 0.7297809486650688, "grad_norm": 0.59089195728302, "learning_rate": 1.7518965606759797e-05, "loss": 1.7435, "step": 13093 }, { "epoch": 0.729836686918232, "grad_norm": 0.60077303647995, "learning_rate": 1.7512196600658175e-05, "loss": 1.6991, "step": 13094 }, { "epoch": 0.7298924251713951, "grad_norm": 0.5530768632888794, "learning_rate": 1.750542862488026e-05, "loss": 1.6129, "step": 13095 }, { "epoch": 0.7299481634245583, "grad_norm": 0.6510162949562073, "learning_rate": 1.7498661679640693e-05, "loss": 1.555, "step": 13096 }, { "epoch": 0.7300039016777214, "grad_norm": 0.5623936653137207, "learning_rate": 1.749189576515408e-05, "loss": 1.5851, "step": 13097 }, { "epoch": 0.7300596399308845, "grad_norm": 0.5465413331985474, "learning_rate": 1.7485130881635014e-05, "loss": 1.6828, "step": 13098 }, { "epoch": 0.7301153781840477, "grad_norm": 0.5557692646980286, "learning_rate": 1.747836702929801e-05, "loss": 1.5023, "step": 13099 }, { "epoch": 0.7301711164372109, "grad_norm": 0.6196216940879822, "learning_rate": 1.7471604208357584e-05, "loss": 1.7822, "step": 13100 }, { "epoch": 0.730226854690374, "grad_norm": 0.5655481219291687, "learning_rate": 1.746484241902822e-05, "loss": 1.5506, "step": 13101 }, { "epoch": 0.7302825929435371, "grad_norm": 0.5573101043701172, "learning_rate": 1.7458081661524363e-05, "loss": 1.7494, "step": 13102 }, { "epoch": 0.7303383311967003, "grad_norm": 0.5817492008209229, "learning_rate": 1.745132193606042e-05, "loss": 1.6126, "step": 13103 }, { "epoch": 0.7303940694498634, "grad_norm": 0.5593802332878113, "learning_rate": 1.7444563242850774e-05, "loss": 1.6587, "step": 13104 }, { "epoch": 0.7304498077030266, "grad_norm": 0.5610882639884949, "learning_rate": 1.743780558210979e-05, "loss": 1.7012, "step": 13105 }, { "epoch": 0.7305055459561898, "grad_norm": 0.5741089582443237, "learning_rate": 1.743104895405175e-05, "loss": 1.6279, "step": 13106 }, { "epoch": 0.7305612842093528, "grad_norm": 0.5729717016220093, "learning_rate": 1.742429335889092e-05, "loss": 1.6917, "step": 13107 }, { "epoch": 0.730617022462516, "grad_norm": 0.5948959589004517, "learning_rate": 1.7417538796841615e-05, "loss": 1.5668, "step": 13108 }, { "epoch": 0.7306727607156792, "grad_norm": 0.5032156705856323, "learning_rate": 1.741078526811799e-05, "loss": 1.3751, "step": 13109 }, { "epoch": 0.7307284989688423, "grad_norm": 0.5447957515716553, "learning_rate": 1.7404032772934246e-05, "loss": 1.5854, "step": 13110 }, { "epoch": 0.7307842372220055, "grad_norm": 0.5654783248901367, "learning_rate": 1.7397281311504544e-05, "loss": 1.646, "step": 13111 }, { "epoch": 0.7308399754751687, "grad_norm": 0.602711021900177, "learning_rate": 1.7390530884043e-05, "loss": 1.796, "step": 13112 }, { "epoch": 0.7308957137283317, "grad_norm": 0.5649969577789307, "learning_rate": 1.738378149076368e-05, "loss": 1.5496, "step": 13113 }, { "epoch": 0.7309514519814949, "grad_norm": 0.5492765307426453, "learning_rate": 1.7377033131880638e-05, "loss": 1.5582, "step": 13114 }, { "epoch": 0.7310071902346581, "grad_norm": 0.7160940170288086, "learning_rate": 1.7370285807607905e-05, "loss": 1.5616, "step": 13115 }, { "epoch": 0.7310629284878212, "grad_norm": 0.5868720412254333, "learning_rate": 1.736353951815946e-05, "loss": 1.6777, "step": 13116 }, { "epoch": 0.7311186667409844, "grad_norm": 0.6325905323028564, "learning_rate": 1.7356794263749275e-05, "loss": 1.5789, "step": 13117 }, { "epoch": 0.7311744049941474, "grad_norm": 0.573526918888092, "learning_rate": 1.735005004459122e-05, "loss": 1.4678, "step": 13118 }, { "epoch": 0.7312301432473106, "grad_norm": 0.5424122214317322, "learning_rate": 1.7343306860899243e-05, "loss": 1.6012, "step": 13119 }, { "epoch": 0.7312858815004738, "grad_norm": 0.5311015844345093, "learning_rate": 1.733656471288716e-05, "loss": 1.3831, "step": 13120 }, { "epoch": 0.7313416197536369, "grad_norm": 0.7135401368141174, "learning_rate": 1.73298236007688e-05, "loss": 1.5432, "step": 13121 }, { "epoch": 0.7313973580068001, "grad_norm": 0.5333171486854553, "learning_rate": 1.732308352475796e-05, "loss": 1.6044, "step": 13122 }, { "epoch": 0.7314530962599632, "grad_norm": 0.5196841359138489, "learning_rate": 1.7316344485068392e-05, "loss": 1.4039, "step": 13123 }, { "epoch": 0.7315088345131263, "grad_norm": 0.5499348044395447, "learning_rate": 1.7309606481913826e-05, "loss": 1.6396, "step": 13124 }, { "epoch": 0.7315645727662895, "grad_norm": 0.5782787203788757, "learning_rate": 1.730286951550792e-05, "loss": 1.7854, "step": 13125 }, { "epoch": 0.7316203110194527, "grad_norm": 0.5685500502586365, "learning_rate": 1.7296133586064382e-05, "loss": 1.8116, "step": 13126 }, { "epoch": 0.7316760492726158, "grad_norm": 0.5851349234580994, "learning_rate": 1.7289398693796795e-05, "loss": 1.8243, "step": 13127 }, { "epoch": 0.7317317875257789, "grad_norm": 0.5307192206382751, "learning_rate": 1.7282664838918766e-05, "loss": 1.4941, "step": 13128 }, { "epoch": 0.7317875257789421, "grad_norm": 0.5350309014320374, "learning_rate": 1.7275932021643853e-05, "loss": 1.4587, "step": 13129 }, { "epoch": 0.7318432640321052, "grad_norm": 0.5509794354438782, "learning_rate": 1.726920024218558e-05, "loss": 1.7593, "step": 13130 }, { "epoch": 0.7318990022852684, "grad_norm": 0.6042940020561218, "learning_rate": 1.726246950075746e-05, "loss": 1.7246, "step": 13131 }, { "epoch": 0.7319547405384316, "grad_norm": 0.5225052237510681, "learning_rate": 1.7255739797572916e-05, "loss": 1.4019, "step": 13132 }, { "epoch": 0.7320104787915946, "grad_norm": 0.5401765704154968, "learning_rate": 1.72490111328454e-05, "loss": 1.4506, "step": 13133 }, { "epoch": 0.7320662170447578, "grad_norm": 0.5437548160552979, "learning_rate": 1.7242283506788292e-05, "loss": 1.4996, "step": 13134 }, { "epoch": 0.732121955297921, "grad_norm": 0.6444903612136841, "learning_rate": 1.7235556919614964e-05, "loss": 1.8975, "step": 13135 }, { "epoch": 0.7321776935510841, "grad_norm": 0.5493695735931396, "learning_rate": 1.722883137153874e-05, "loss": 1.7209, "step": 13136 }, { "epoch": 0.7322334318042473, "grad_norm": 0.5016687512397766, "learning_rate": 1.7222106862772912e-05, "loss": 1.3764, "step": 13137 }, { "epoch": 0.7322891700574105, "grad_norm": 0.5542362928390503, "learning_rate": 1.7215383393530767e-05, "loss": 1.4915, "step": 13138 }, { "epoch": 0.7323449083105735, "grad_norm": 0.571007251739502, "learning_rate": 1.7208660964025498e-05, "loss": 1.6684, "step": 13139 }, { "epoch": 0.7324006465637367, "grad_norm": 0.5865726470947266, "learning_rate": 1.720193957447031e-05, "loss": 1.6384, "step": 13140 }, { "epoch": 0.7324563848168998, "grad_norm": 0.5695785880088806, "learning_rate": 1.719521922507838e-05, "loss": 1.7146, "step": 13141 }, { "epoch": 0.732512123070063, "grad_norm": 0.5801404118537903, "learning_rate": 1.7188499916062823e-05, "loss": 1.6765, "step": 13142 }, { "epoch": 0.7325678613232262, "grad_norm": 0.5730370879173279, "learning_rate": 1.718178164763677e-05, "loss": 1.7715, "step": 13143 }, { "epoch": 0.7326235995763892, "grad_norm": 0.6010292172431946, "learning_rate": 1.717506442001322e-05, "loss": 1.8965, "step": 13144 }, { "epoch": 0.7326793378295524, "grad_norm": 0.5768089890480042, "learning_rate": 1.716834823340528e-05, "loss": 1.6592, "step": 13145 }, { "epoch": 0.7327350760827156, "grad_norm": 0.5558800101280212, "learning_rate": 1.7161633088025892e-05, "loss": 1.6016, "step": 13146 }, { "epoch": 0.7327908143358787, "grad_norm": 0.5557398200035095, "learning_rate": 1.715491898408804e-05, "loss": 1.5946, "step": 13147 }, { "epoch": 0.7328465525890419, "grad_norm": 0.5502184629440308, "learning_rate": 1.7148205921804665e-05, "loss": 1.5196, "step": 13148 }, { "epoch": 0.732902290842205, "grad_norm": 0.5514625906944275, "learning_rate": 1.7141493901388657e-05, "loss": 1.5667, "step": 13149 }, { "epoch": 0.7329580290953681, "grad_norm": 0.5282281041145325, "learning_rate": 1.71347829230529e-05, "loss": 1.3212, "step": 13150 }, { "epoch": 0.7330137673485313, "grad_norm": 0.5706415176391602, "learning_rate": 1.7128072987010173e-05, "loss": 1.8084, "step": 13151 }, { "epoch": 0.7330695056016945, "grad_norm": 0.5897558331489563, "learning_rate": 1.7121364093473352e-05, "loss": 1.9594, "step": 13152 }, { "epoch": 0.7331252438548576, "grad_norm": 0.5287189483642578, "learning_rate": 1.7114656242655153e-05, "loss": 1.6426, "step": 13153 }, { "epoch": 0.7331809821080207, "grad_norm": 0.5570682287216187, "learning_rate": 1.7107949434768317e-05, "loss": 1.4455, "step": 13154 }, { "epoch": 0.7332367203611839, "grad_norm": 0.529339075088501, "learning_rate": 1.710124367002555e-05, "loss": 1.5018, "step": 13155 }, { "epoch": 0.733292458614347, "grad_norm": 0.584701657295227, "learning_rate": 1.7094538948639527e-05, "loss": 1.6713, "step": 13156 }, { "epoch": 0.7333481968675102, "grad_norm": 0.5863745808601379, "learning_rate": 1.7087835270822893e-05, "loss": 1.6141, "step": 13157 }, { "epoch": 0.7334039351206734, "grad_norm": 0.6347394585609436, "learning_rate": 1.708113263678821e-05, "loss": 1.7676, "step": 13158 }, { "epoch": 0.7334596733738364, "grad_norm": 0.5613558292388916, "learning_rate": 1.7074431046748075e-05, "loss": 1.5305, "step": 13159 }, { "epoch": 0.7335154116269996, "grad_norm": 0.559970498085022, "learning_rate": 1.7067730500915015e-05, "loss": 1.5329, "step": 13160 }, { "epoch": 0.7335711498801628, "grad_norm": 0.6007326245307922, "learning_rate": 1.7061030999501538e-05, "loss": 1.8066, "step": 13161 }, { "epoch": 0.7336268881333259, "grad_norm": 0.5830183029174805, "learning_rate": 1.705433254272011e-05, "loss": 1.6414, "step": 13162 }, { "epoch": 0.7336826263864891, "grad_norm": 0.5798273682594299, "learning_rate": 1.7047635130783163e-05, "loss": 1.6874, "step": 13163 }, { "epoch": 0.7337383646396521, "grad_norm": 0.576889157295227, "learning_rate": 1.704093876390312e-05, "loss": 1.8305, "step": 13164 }, { "epoch": 0.7337941028928153, "grad_norm": 0.5485324263572693, "learning_rate": 1.7034243442292326e-05, "loss": 1.5281, "step": 13165 }, { "epoch": 0.7338498411459785, "grad_norm": 0.5446223616600037, "learning_rate": 1.702754916616312e-05, "loss": 1.4261, "step": 13166 }, { "epoch": 0.7339055793991416, "grad_norm": 0.558986485004425, "learning_rate": 1.702085593572781e-05, "loss": 1.4768, "step": 13167 }, { "epoch": 0.7339613176523048, "grad_norm": 0.6452115178108215, "learning_rate": 1.701416375119867e-05, "loss": 1.8325, "step": 13168 }, { "epoch": 0.734017055905468, "grad_norm": 0.6113860607147217, "learning_rate": 1.7007472612787957e-05, "loss": 1.8015, "step": 13169 }, { "epoch": 0.734072794158631, "grad_norm": 0.526680588722229, "learning_rate": 1.7000782520707815e-05, "loss": 1.5654, "step": 13170 }, { "epoch": 0.7341285324117942, "grad_norm": 0.5179544687271118, "learning_rate": 1.6994093475170485e-05, "loss": 1.5492, "step": 13171 }, { "epoch": 0.7341842706649574, "grad_norm": 0.5642718076705933, "learning_rate": 1.6987405476388056e-05, "loss": 1.753, "step": 13172 }, { "epoch": 0.7342400089181205, "grad_norm": 0.5657768845558167, "learning_rate": 1.6980718524572648e-05, "loss": 1.68, "step": 13173 }, { "epoch": 0.7342957471712837, "grad_norm": 0.5769280195236206, "learning_rate": 1.6974032619936338e-05, "loss": 1.6085, "step": 13174 }, { "epoch": 0.7343514854244468, "grad_norm": 0.5651370882987976, "learning_rate": 1.6967347762691154e-05, "loss": 1.6717, "step": 13175 }, { "epoch": 0.7344072236776099, "grad_norm": 0.5511763691902161, "learning_rate": 1.6960663953049123e-05, "loss": 1.5811, "step": 13176 }, { "epoch": 0.7344629619307731, "grad_norm": 0.5351390242576599, "learning_rate": 1.6953981191222162e-05, "loss": 1.6795, "step": 13177 }, { "epoch": 0.7345187001839363, "grad_norm": 0.5856584906578064, "learning_rate": 1.6947299477422284e-05, "loss": 1.7929, "step": 13178 }, { "epoch": 0.7345744384370994, "grad_norm": 0.5638580322265625, "learning_rate": 1.6940618811861335e-05, "loss": 1.6411, "step": 13179 }, { "epoch": 0.7346301766902625, "grad_norm": 0.519822895526886, "learning_rate": 1.6933939194751215e-05, "loss": 1.5319, "step": 13180 }, { "epoch": 0.7346859149434257, "grad_norm": 0.5416386723518372, "learning_rate": 1.6927260626303748e-05, "loss": 1.5279, "step": 13181 }, { "epoch": 0.7347416531965888, "grad_norm": 0.6365106105804443, "learning_rate": 1.6920583106730748e-05, "loss": 1.8239, "step": 13182 }, { "epoch": 0.734797391449752, "grad_norm": 0.6207970380783081, "learning_rate": 1.6913906636244005e-05, "loss": 1.7993, "step": 13183 }, { "epoch": 0.7348531297029152, "grad_norm": 0.5355508923530579, "learning_rate": 1.690723121505522e-05, "loss": 1.5527, "step": 13184 }, { "epoch": 0.7349088679560782, "grad_norm": 0.5439286231994629, "learning_rate": 1.6900556843376115e-05, "loss": 1.6684, "step": 13185 }, { "epoch": 0.7349646062092414, "grad_norm": 0.5732739567756653, "learning_rate": 1.6893883521418362e-05, "loss": 1.5172, "step": 13186 }, { "epoch": 0.7350203444624045, "grad_norm": 0.6353051066398621, "learning_rate": 1.6887211249393608e-05, "loss": 1.6473, "step": 13187 }, { "epoch": 0.7350760827155677, "grad_norm": 0.6798067092895508, "learning_rate": 1.6880540027513448e-05, "loss": 1.8738, "step": 13188 }, { "epoch": 0.7351318209687309, "grad_norm": 0.6208623051643372, "learning_rate": 1.687386985598946e-05, "loss": 1.8411, "step": 13189 }, { "epoch": 0.7351875592218939, "grad_norm": 0.5615735650062561, "learning_rate": 1.6867200735033196e-05, "loss": 1.5319, "step": 13190 }, { "epoch": 0.7352432974750571, "grad_norm": 0.5641026496887207, "learning_rate": 1.6860532664856133e-05, "loss": 1.5069, "step": 13191 }, { "epoch": 0.7352990357282203, "grad_norm": 0.5726016163825989, "learning_rate": 1.6853865645669752e-05, "loss": 1.6411, "step": 13192 }, { "epoch": 0.7353547739813834, "grad_norm": 0.5372188687324524, "learning_rate": 1.6847199677685505e-05, "loss": 1.6466, "step": 13193 }, { "epoch": 0.7354105122345466, "grad_norm": 0.5255815386772156, "learning_rate": 1.6840534761114786e-05, "loss": 1.4866, "step": 13194 }, { "epoch": 0.7354662504877097, "grad_norm": 0.5993079543113708, "learning_rate": 1.683387089616899e-05, "loss": 1.903, "step": 13195 }, { "epoch": 0.7355219887408728, "grad_norm": 0.588141918182373, "learning_rate": 1.68272080830594e-05, "loss": 1.8349, "step": 13196 }, { "epoch": 0.735577726994036, "grad_norm": 0.5988585948944092, "learning_rate": 1.6820546321997395e-05, "loss": 1.7329, "step": 13197 }, { "epoch": 0.7356334652471992, "grad_norm": 0.5887940526008606, "learning_rate": 1.6813885613194195e-05, "loss": 1.6086, "step": 13198 }, { "epoch": 0.7356892035003623, "grad_norm": 0.5614736080169678, "learning_rate": 1.6807225956861054e-05, "loss": 1.5956, "step": 13199 }, { "epoch": 0.7357449417535254, "grad_norm": 0.5350954532623291, "learning_rate": 1.6800567353209178e-05, "loss": 1.5, "step": 13200 }, { "epoch": 0.7358006800066886, "grad_norm": 0.5915472507476807, "learning_rate": 1.6793909802449737e-05, "loss": 1.764, "step": 13201 }, { "epoch": 0.7358564182598517, "grad_norm": 0.5408633351325989, "learning_rate": 1.6787253304793892e-05, "loss": 1.5798, "step": 13202 }, { "epoch": 0.7359121565130149, "grad_norm": 0.5959146618843079, "learning_rate": 1.6780597860452695e-05, "loss": 1.7875, "step": 13203 }, { "epoch": 0.7359678947661781, "grad_norm": 0.6157255172729492, "learning_rate": 1.6773943469637282e-05, "loss": 1.7364, "step": 13204 }, { "epoch": 0.7360236330193412, "grad_norm": 0.5907375812530518, "learning_rate": 1.676729013255865e-05, "loss": 1.5179, "step": 13205 }, { "epoch": 0.7360793712725043, "grad_norm": 0.571946918964386, "learning_rate": 1.6760637849427812e-05, "loss": 1.6597, "step": 13206 }, { "epoch": 0.7361351095256675, "grad_norm": 0.594362199306488, "learning_rate": 1.675398662045574e-05, "loss": 1.7935, "step": 13207 }, { "epoch": 0.7361908477788306, "grad_norm": 0.5478150844573975, "learning_rate": 1.6747336445853373e-05, "loss": 1.5229, "step": 13208 }, { "epoch": 0.7362465860319938, "grad_norm": 0.5203835368156433, "learning_rate": 1.6740687325831638e-05, "loss": 1.4077, "step": 13209 }, { "epoch": 0.7363023242851569, "grad_norm": 0.5903517007827759, "learning_rate": 1.673403926060137e-05, "loss": 1.641, "step": 13210 }, { "epoch": 0.73635806253832, "grad_norm": 0.5217337608337402, "learning_rate": 1.672739225037342e-05, "loss": 1.3975, "step": 13211 }, { "epoch": 0.7364138007914832, "grad_norm": 0.5792795419692993, "learning_rate": 1.6720746295358596e-05, "loss": 1.8485, "step": 13212 }, { "epoch": 0.7364695390446463, "grad_norm": 0.5703185200691223, "learning_rate": 1.6714101395767673e-05, "loss": 1.7343, "step": 13213 }, { "epoch": 0.7365252772978095, "grad_norm": 0.5775966644287109, "learning_rate": 1.670745755181138e-05, "loss": 1.5785, "step": 13214 }, { "epoch": 0.7365810155509727, "grad_norm": 0.5719923973083496, "learning_rate": 1.670081476370042e-05, "loss": 1.7871, "step": 13215 }, { "epoch": 0.7366367538041357, "grad_norm": 0.5493507981300354, "learning_rate": 1.669417303164549e-05, "loss": 1.707, "step": 13216 }, { "epoch": 0.7366924920572989, "grad_norm": 0.5433780550956726, "learning_rate": 1.6687532355857183e-05, "loss": 1.5654, "step": 13217 }, { "epoch": 0.7367482303104621, "grad_norm": 0.5848167538642883, "learning_rate": 1.668089273654611e-05, "loss": 1.7403, "step": 13218 }, { "epoch": 0.7368039685636252, "grad_norm": 0.5769858956336975, "learning_rate": 1.6674254173922893e-05, "loss": 1.8995, "step": 13219 }, { "epoch": 0.7368597068167884, "grad_norm": 0.572632372379303, "learning_rate": 1.666761666819801e-05, "loss": 1.4678, "step": 13220 }, { "epoch": 0.7369154450699515, "grad_norm": 0.592958927154541, "learning_rate": 1.6660980219582e-05, "loss": 1.5932, "step": 13221 }, { "epoch": 0.7369711833231146, "grad_norm": 0.5782008171081543, "learning_rate": 1.665434482828529e-05, "loss": 1.8013, "step": 13222 }, { "epoch": 0.7370269215762778, "grad_norm": 0.5540836453437805, "learning_rate": 1.664771049451837e-05, "loss": 1.4329, "step": 13223 }, { "epoch": 0.737082659829441, "grad_norm": 0.5827534198760986, "learning_rate": 1.6641077218491606e-05, "loss": 1.4933, "step": 13224 }, { "epoch": 0.7371383980826041, "grad_norm": 0.5785440802574158, "learning_rate": 1.6634445000415372e-05, "loss": 1.7321, "step": 13225 }, { "epoch": 0.7371941363357672, "grad_norm": 0.5536699891090393, "learning_rate": 1.662781384050001e-05, "loss": 1.5427, "step": 13226 }, { "epoch": 0.7372498745889304, "grad_norm": 0.5601542592048645, "learning_rate": 1.662118373895582e-05, "loss": 1.6237, "step": 13227 }, { "epoch": 0.7373056128420935, "grad_norm": 0.5668201446533203, "learning_rate": 1.6614554695993084e-05, "loss": 1.7387, "step": 13228 }, { "epoch": 0.7373613510952567, "grad_norm": 0.558070182800293, "learning_rate": 1.660792671182199e-05, "loss": 1.6295, "step": 13229 }, { "epoch": 0.7374170893484199, "grad_norm": 0.6125143766403198, "learning_rate": 1.6601299786652807e-05, "loss": 1.6571, "step": 13230 }, { "epoch": 0.737472827601583, "grad_norm": 0.5656547546386719, "learning_rate": 1.6594673920695647e-05, "loss": 1.7502, "step": 13231 }, { "epoch": 0.7375285658547461, "grad_norm": 0.6245994567871094, "learning_rate": 1.658804911416067e-05, "loss": 1.7857, "step": 13232 }, { "epoch": 0.7375843041079092, "grad_norm": 0.5701721906661987, "learning_rate": 1.6581425367257963e-05, "loss": 1.5482, "step": 13233 }, { "epoch": 0.7376400423610724, "grad_norm": 0.5576661229133606, "learning_rate": 1.65748026801976e-05, "loss": 1.6569, "step": 13234 }, { "epoch": 0.7376957806142356, "grad_norm": 0.546334445476532, "learning_rate": 1.656818105318963e-05, "loss": 1.8769, "step": 13235 }, { "epoch": 0.7377515188673986, "grad_norm": 0.5302374362945557, "learning_rate": 1.6561560486444023e-05, "loss": 1.5192, "step": 13236 }, { "epoch": 0.7378072571205618, "grad_norm": 0.5588144063949585, "learning_rate": 1.6554940980170757e-05, "loss": 1.5064, "step": 13237 }, { "epoch": 0.737862995373725, "grad_norm": 0.5656217932701111, "learning_rate": 1.6548322534579765e-05, "loss": 1.5538, "step": 13238 }, { "epoch": 0.7379187336268881, "grad_norm": 0.5525779724121094, "learning_rate": 1.6541705149880943e-05, "loss": 1.5847, "step": 13239 }, { "epoch": 0.7379744718800513, "grad_norm": 0.5362941026687622, "learning_rate": 1.6535088826284158e-05, "loss": 1.6449, "step": 13240 }, { "epoch": 0.7380302101332145, "grad_norm": 0.531810998916626, "learning_rate": 1.652847356399924e-05, "loss": 1.608, "step": 13241 }, { "epoch": 0.7380859483863775, "grad_norm": 0.5702958106994629, "learning_rate": 1.6521859363236008e-05, "loss": 1.5354, "step": 13242 }, { "epoch": 0.7381416866395407, "grad_norm": 0.5588272213935852, "learning_rate": 1.651524622420419e-05, "loss": 1.658, "step": 13243 }, { "epoch": 0.7381974248927039, "grad_norm": 0.5959174633026123, "learning_rate": 1.6508634147113515e-05, "loss": 1.6284, "step": 13244 }, { "epoch": 0.738253163145867, "grad_norm": 0.5471432209014893, "learning_rate": 1.6502023132173733e-05, "loss": 1.7111, "step": 13245 }, { "epoch": 0.7383089013990302, "grad_norm": 0.5873154401779175, "learning_rate": 1.6495413179594448e-05, "loss": 1.6066, "step": 13246 }, { "epoch": 0.7383646396521933, "grad_norm": 0.5228626728057861, "learning_rate": 1.648880428958533e-05, "loss": 1.409, "step": 13247 }, { "epoch": 0.7384203779053564, "grad_norm": 0.6058785319328308, "learning_rate": 1.6482196462355925e-05, "loss": 1.6826, "step": 13248 }, { "epoch": 0.7384761161585196, "grad_norm": 0.5443040728569031, "learning_rate": 1.6475589698115856e-05, "loss": 1.2258, "step": 13249 }, { "epoch": 0.7385318544116828, "grad_norm": 0.5611996054649353, "learning_rate": 1.6468983997074606e-05, "loss": 1.6302, "step": 13250 }, { "epoch": 0.7385875926648459, "grad_norm": 0.5696637630462646, "learning_rate": 1.6462379359441683e-05, "loss": 1.4842, "step": 13251 }, { "epoch": 0.738643330918009, "grad_norm": 0.7141457796096802, "learning_rate": 1.6455775785426548e-05, "loss": 1.771, "step": 13252 }, { "epoch": 0.7386990691711722, "grad_norm": 0.5674689412117004, "learning_rate": 1.6449173275238634e-05, "loss": 1.7011, "step": 13253 }, { "epoch": 0.7387548074243353, "grad_norm": 0.5802819728851318, "learning_rate": 1.644257182908734e-05, "loss": 1.7596, "step": 13254 }, { "epoch": 0.7388105456774985, "grad_norm": 0.5873621106147766, "learning_rate": 1.6435971447181982e-05, "loss": 1.626, "step": 13255 }, { "epoch": 0.7388662839306616, "grad_norm": 0.585585355758667, "learning_rate": 1.642937212973195e-05, "loss": 1.5525, "step": 13256 }, { "epoch": 0.7389220221838247, "grad_norm": 0.5948177576065063, "learning_rate": 1.642277387694649e-05, "loss": 1.5693, "step": 13257 }, { "epoch": 0.7389777604369879, "grad_norm": 0.5636075139045715, "learning_rate": 1.6416176689034873e-05, "loss": 1.6173, "step": 13258 }, { "epoch": 0.739033498690151, "grad_norm": 0.5540120005607605, "learning_rate": 1.6409580566206324e-05, "loss": 1.6737, "step": 13259 }, { "epoch": 0.7390892369433142, "grad_norm": 0.5813601016998291, "learning_rate": 1.6402985508670032e-05, "loss": 1.8666, "step": 13260 }, { "epoch": 0.7391449751964774, "grad_norm": 0.5551378130912781, "learning_rate": 1.639639151663518e-05, "loss": 1.8081, "step": 13261 }, { "epoch": 0.7392007134496404, "grad_norm": 0.5455393195152283, "learning_rate": 1.638979859031084e-05, "loss": 1.7515, "step": 13262 }, { "epoch": 0.7392564517028036, "grad_norm": 0.6024508476257324, "learning_rate": 1.638320672990613e-05, "loss": 1.8197, "step": 13263 }, { "epoch": 0.7393121899559668, "grad_norm": 0.5206683874130249, "learning_rate": 1.6376615935630106e-05, "loss": 1.4308, "step": 13264 }, { "epoch": 0.7393679282091299, "grad_norm": 0.5082628130912781, "learning_rate": 1.6370026207691786e-05, "loss": 1.4348, "step": 13265 }, { "epoch": 0.7394236664622931, "grad_norm": 0.5659313201904297, "learning_rate": 1.636343754630015e-05, "loss": 1.7006, "step": 13266 }, { "epoch": 0.7394794047154563, "grad_norm": 0.5450108647346497, "learning_rate": 1.6356849951664172e-05, "loss": 1.5063, "step": 13267 }, { "epoch": 0.7395351429686193, "grad_norm": 0.5550732016563416, "learning_rate": 1.6350263423992774e-05, "loss": 1.6295, "step": 13268 }, { "epoch": 0.7395908812217825, "grad_norm": 0.6069827675819397, "learning_rate": 1.634367796349481e-05, "loss": 1.7564, "step": 13269 }, { "epoch": 0.7396466194749457, "grad_norm": 0.5506473779678345, "learning_rate": 1.6337093570379153e-05, "loss": 1.6458, "step": 13270 }, { "epoch": 0.7397023577281088, "grad_norm": 0.5603538751602173, "learning_rate": 1.6330510244854612e-05, "loss": 1.5231, "step": 13271 }, { "epoch": 0.739758095981272, "grad_norm": 0.6235647201538086, "learning_rate": 1.632392798712999e-05, "loss": 1.9289, "step": 13272 }, { "epoch": 0.7398138342344351, "grad_norm": 0.5420436263084412, "learning_rate": 1.631734679741404e-05, "loss": 1.515, "step": 13273 }, { "epoch": 0.7398695724875982, "grad_norm": 0.5445640683174133, "learning_rate": 1.631076667591543e-05, "loss": 1.5093, "step": 13274 }, { "epoch": 0.7399253107407614, "grad_norm": 0.5499640107154846, "learning_rate": 1.6304187622842916e-05, "loss": 1.7421, "step": 13275 }, { "epoch": 0.7399810489939246, "grad_norm": 0.5200676321983337, "learning_rate": 1.6297609638405093e-05, "loss": 1.4374, "step": 13276 }, { "epoch": 0.7400367872470877, "grad_norm": 0.5372708439826965, "learning_rate": 1.629103272281059e-05, "loss": 1.591, "step": 13277 }, { "epoch": 0.7400925255002508, "grad_norm": 0.58269864320755, "learning_rate": 1.6284456876267994e-05, "loss": 1.8288, "step": 13278 }, { "epoch": 0.7401482637534139, "grad_norm": 0.5352569818496704, "learning_rate": 1.6277882098985852e-05, "loss": 1.4758, "step": 13279 }, { "epoch": 0.7402040020065771, "grad_norm": 0.591149628162384, "learning_rate": 1.6271308391172696e-05, "loss": 1.7342, "step": 13280 }, { "epoch": 0.7402597402597403, "grad_norm": 0.6212684512138367, "learning_rate": 1.626473575303695e-05, "loss": 1.7038, "step": 13281 }, { "epoch": 0.7403154785129034, "grad_norm": 0.6295444369316101, "learning_rate": 1.6258164184787123e-05, "loss": 1.5131, "step": 13282 }, { "epoch": 0.7403712167660665, "grad_norm": 0.5664548277854919, "learning_rate": 1.6251593686631588e-05, "loss": 1.6506, "step": 13283 }, { "epoch": 0.7404269550192297, "grad_norm": 0.5600103139877319, "learning_rate": 1.6245024258778733e-05, "loss": 1.7275, "step": 13284 }, { "epoch": 0.7404826932723928, "grad_norm": 0.5680475831031799, "learning_rate": 1.6238455901436905e-05, "loss": 1.4691, "step": 13285 }, { "epoch": 0.740538431525556, "grad_norm": 0.5569763779640198, "learning_rate": 1.6231888614814416e-05, "loss": 1.6232, "step": 13286 }, { "epoch": 0.7405941697787192, "grad_norm": 0.5917499661445618, "learning_rate": 1.622532239911955e-05, "loss": 1.6881, "step": 13287 }, { "epoch": 0.7406499080318822, "grad_norm": 0.5224557518959045, "learning_rate": 1.6218757254560523e-05, "loss": 1.496, "step": 13288 }, { "epoch": 0.7407056462850454, "grad_norm": 0.5698574781417847, "learning_rate": 1.6212193181345554e-05, "loss": 1.7215, "step": 13289 }, { "epoch": 0.7407613845382086, "grad_norm": 0.567707896232605, "learning_rate": 1.6205630179682825e-05, "loss": 1.6221, "step": 13290 }, { "epoch": 0.7408171227913717, "grad_norm": 0.5405696630477905, "learning_rate": 1.619906824978047e-05, "loss": 1.6172, "step": 13291 }, { "epoch": 0.7408728610445349, "grad_norm": 0.5634341239929199, "learning_rate": 1.6192507391846597e-05, "loss": 1.5224, "step": 13292 }, { "epoch": 0.740928599297698, "grad_norm": 0.6389575004577637, "learning_rate": 1.618594760608928e-05, "loss": 2.1171, "step": 13293 }, { "epoch": 0.7409843375508611, "grad_norm": 0.5640349388122559, "learning_rate": 1.6179388892716568e-05, "loss": 1.4332, "step": 13294 }, { "epoch": 0.7410400758040243, "grad_norm": 0.5597231388092041, "learning_rate": 1.617283125193644e-05, "loss": 1.7089, "step": 13295 }, { "epoch": 0.7410958140571875, "grad_norm": 0.5188087224960327, "learning_rate": 1.6166274683956872e-05, "loss": 1.425, "step": 13296 }, { "epoch": 0.7411515523103506, "grad_norm": 0.5404828190803528, "learning_rate": 1.6159719188985813e-05, "loss": 1.599, "step": 13297 }, { "epoch": 0.7412072905635138, "grad_norm": 0.5605739951133728, "learning_rate": 1.615316476723116e-05, "loss": 1.713, "step": 13298 }, { "epoch": 0.7412630288166769, "grad_norm": 0.5415946841239929, "learning_rate": 1.6146611418900777e-05, "loss": 1.5347, "step": 13299 }, { "epoch": 0.74131876706984, "grad_norm": 0.5645654201507568, "learning_rate": 1.6140059144202497e-05, "loss": 1.7316, "step": 13300 }, { "epoch": 0.7413745053230032, "grad_norm": 0.5823950171470642, "learning_rate": 1.6133507943344144e-05, "loss": 1.8572, "step": 13301 }, { "epoch": 0.7414302435761663, "grad_norm": 0.5628172159194946, "learning_rate": 1.612695781653345e-05, "loss": 1.6987, "step": 13302 }, { "epoch": 0.7414859818293295, "grad_norm": 0.5878089070320129, "learning_rate": 1.6120408763978156e-05, "loss": 1.6614, "step": 13303 }, { "epoch": 0.7415417200824926, "grad_norm": 0.5398010611534119, "learning_rate": 1.6113860785885966e-05, "loss": 1.4362, "step": 13304 }, { "epoch": 0.7415974583356557, "grad_norm": 0.5680728554725647, "learning_rate": 1.6107313882464542e-05, "loss": 1.5918, "step": 13305 }, { "epoch": 0.7416531965888189, "grad_norm": 0.5598174333572388, "learning_rate": 1.6100768053921534e-05, "loss": 1.6136, "step": 13306 }, { "epoch": 0.7417089348419821, "grad_norm": 0.5566685199737549, "learning_rate": 1.609422330046448e-05, "loss": 1.3586, "step": 13307 }, { "epoch": 0.7417646730951452, "grad_norm": 0.5482991933822632, "learning_rate": 1.608767962230101e-05, "loss": 1.6597, "step": 13308 }, { "epoch": 0.7418204113483083, "grad_norm": 0.5422983169555664, "learning_rate": 1.6081137019638603e-05, "loss": 1.3816, "step": 13309 }, { "epoch": 0.7418761496014715, "grad_norm": 0.592792272567749, "learning_rate": 1.6074595492684774e-05, "loss": 1.9714, "step": 13310 }, { "epoch": 0.7419318878546346, "grad_norm": 0.5382637977600098, "learning_rate": 1.6068055041646973e-05, "loss": 1.647, "step": 13311 }, { "epoch": 0.7419876261077978, "grad_norm": 0.549544095993042, "learning_rate": 1.606151566673263e-05, "loss": 1.5836, "step": 13312 }, { "epoch": 0.742043364360961, "grad_norm": 0.5724050402641296, "learning_rate": 1.6054977368149154e-05, "loss": 1.6138, "step": 13313 }, { "epoch": 0.742099102614124, "grad_norm": 0.5999428629875183, "learning_rate": 1.6048440146103866e-05, "loss": 1.9437, "step": 13314 }, { "epoch": 0.7421548408672872, "grad_norm": 0.6062003970146179, "learning_rate": 1.6041904000804103e-05, "loss": 1.6194, "step": 13315 }, { "epoch": 0.7422105791204504, "grad_norm": 0.5726443529129028, "learning_rate": 1.603536893245715e-05, "loss": 1.7029, "step": 13316 }, { "epoch": 0.7422663173736135, "grad_norm": 0.6113731265068054, "learning_rate": 1.6028834941270277e-05, "loss": 1.6231, "step": 13317 }, { "epoch": 0.7423220556267767, "grad_norm": 0.5550969839096069, "learning_rate": 1.602230202745069e-05, "loss": 1.4641, "step": 13318 }, { "epoch": 0.7423777938799399, "grad_norm": 0.5901103019714355, "learning_rate": 1.601577019120558e-05, "loss": 1.5502, "step": 13319 }, { "epoch": 0.7424335321331029, "grad_norm": 0.5575149655342102, "learning_rate": 1.600923943274211e-05, "loss": 1.437, "step": 13320 }, { "epoch": 0.7424892703862661, "grad_norm": 0.5528237819671631, "learning_rate": 1.6002709752267375e-05, "loss": 1.4918, "step": 13321 }, { "epoch": 0.7425450086394293, "grad_norm": 0.5548231601715088, "learning_rate": 1.5996181149988467e-05, "loss": 1.7844, "step": 13322 }, { "epoch": 0.7426007468925924, "grad_norm": 0.5276260375976562, "learning_rate": 1.598965362611243e-05, "loss": 1.5067, "step": 13323 }, { "epoch": 0.7426564851457556, "grad_norm": 0.5183296799659729, "learning_rate": 1.5983127180846298e-05, "loss": 1.454, "step": 13324 }, { "epoch": 0.7427122233989186, "grad_norm": 0.6147708892822266, "learning_rate": 1.597660181439703e-05, "loss": 1.7211, "step": 13325 }, { "epoch": 0.7427679616520818, "grad_norm": 0.5286272168159485, "learning_rate": 1.5970077526971582e-05, "loss": 1.4562, "step": 13326 }, { "epoch": 0.742823699905245, "grad_norm": 0.5524761080741882, "learning_rate": 1.596355431877689e-05, "loss": 1.8089, "step": 13327 }, { "epoch": 0.7428794381584081, "grad_norm": 0.581933856010437, "learning_rate": 1.5957032190019787e-05, "loss": 1.6357, "step": 13328 }, { "epoch": 0.7429351764115713, "grad_norm": 0.5518571138381958, "learning_rate": 1.5950511140907142e-05, "loss": 1.5216, "step": 13329 }, { "epoch": 0.7429909146647344, "grad_norm": 0.569599449634552, "learning_rate": 1.5943991171645762e-05, "loss": 1.6905, "step": 13330 }, { "epoch": 0.7430466529178975, "grad_norm": 0.5589736700057983, "learning_rate": 1.5937472282442416e-05, "loss": 1.6697, "step": 13331 }, { "epoch": 0.7431023911710607, "grad_norm": 0.6014086008071899, "learning_rate": 1.5930954473503874e-05, "loss": 1.7427, "step": 13332 }, { "epoch": 0.7431581294242239, "grad_norm": 0.5605618357658386, "learning_rate": 1.5924437745036784e-05, "loss": 1.6212, "step": 13333 }, { "epoch": 0.743213867677387, "grad_norm": 0.525735080242157, "learning_rate": 1.5917922097247882e-05, "loss": 1.4751, "step": 13334 }, { "epoch": 0.7432696059305501, "grad_norm": 0.6295618414878845, "learning_rate": 1.5911407530343768e-05, "loss": 1.9724, "step": 13335 }, { "epoch": 0.7433253441837133, "grad_norm": 0.5409222841262817, "learning_rate": 1.590489404453106e-05, "loss": 1.3127, "step": 13336 }, { "epoch": 0.7433810824368764, "grad_norm": 0.5514601469039917, "learning_rate": 1.5898381640016318e-05, "loss": 1.6791, "step": 13337 }, { "epoch": 0.7434368206900396, "grad_norm": 0.6076371669769287, "learning_rate": 1.5891870317006093e-05, "loss": 1.6209, "step": 13338 }, { "epoch": 0.7434925589432028, "grad_norm": 0.5812973976135254, "learning_rate": 1.5885360075706886e-05, "loss": 1.6723, "step": 13339 }, { "epoch": 0.7435482971963658, "grad_norm": 0.5968800187110901, "learning_rate": 1.587885091632514e-05, "loss": 1.8016, "step": 13340 }, { "epoch": 0.743604035449529, "grad_norm": 0.5531649589538574, "learning_rate": 1.5872342839067306e-05, "loss": 1.6212, "step": 13341 }, { "epoch": 0.7436597737026922, "grad_norm": 0.5633963942527771, "learning_rate": 1.5865835844139776e-05, "loss": 1.735, "step": 13342 }, { "epoch": 0.7437155119558553, "grad_norm": 0.5534663796424866, "learning_rate": 1.585932993174892e-05, "loss": 1.6558, "step": 13343 }, { "epoch": 0.7437712502090185, "grad_norm": 0.5312620997428894, "learning_rate": 1.585282510210106e-05, "loss": 1.4825, "step": 13344 }, { "epoch": 0.7438269884621816, "grad_norm": 0.6024535894393921, "learning_rate": 1.58463213554025e-05, "loss": 1.7989, "step": 13345 }, { "epoch": 0.7438827267153447, "grad_norm": 0.5507554411888123, "learning_rate": 1.583981869185951e-05, "loss": 1.6123, "step": 13346 }, { "epoch": 0.7439384649685079, "grad_norm": 0.5958787798881531, "learning_rate": 1.5833317111678285e-05, "loss": 1.7726, "step": 13347 }, { "epoch": 0.743994203221671, "grad_norm": 0.5509111285209656, "learning_rate": 1.5826816615065042e-05, "loss": 1.6517, "step": 13348 }, { "epoch": 0.7440499414748342, "grad_norm": 0.5578701496124268, "learning_rate": 1.5820317202225926e-05, "loss": 1.6328, "step": 13349 }, { "epoch": 0.7441056797279973, "grad_norm": 0.5589818954467773, "learning_rate": 1.5813818873367076e-05, "loss": 1.632, "step": 13350 }, { "epoch": 0.7441614179811604, "grad_norm": 0.5828130841255188, "learning_rate": 1.5807321628694567e-05, "loss": 1.5918, "step": 13351 }, { "epoch": 0.7442171562343236, "grad_norm": 0.5889452695846558, "learning_rate": 1.5800825468414452e-05, "loss": 1.8415, "step": 13352 }, { "epoch": 0.7442728944874868, "grad_norm": 0.624024510383606, "learning_rate": 1.5794330392732787e-05, "loss": 1.6525, "step": 13353 }, { "epoch": 0.7443286327406499, "grad_norm": 0.5891615748405457, "learning_rate": 1.5787836401855503e-05, "loss": 1.7335, "step": 13354 }, { "epoch": 0.744384370993813, "grad_norm": 0.5748935341835022, "learning_rate": 1.578134349598858e-05, "loss": 1.6273, "step": 13355 }, { "epoch": 0.7444401092469762, "grad_norm": 0.5996773838996887, "learning_rate": 1.577485167533794e-05, "loss": 1.7174, "step": 13356 }, { "epoch": 0.7444958475001393, "grad_norm": 0.5693355202674866, "learning_rate": 1.576836094010945e-05, "loss": 1.621, "step": 13357 }, { "epoch": 0.7445515857533025, "grad_norm": 0.5534161329269409, "learning_rate": 1.5761871290508983e-05, "loss": 1.4205, "step": 13358 }, { "epoch": 0.7446073240064657, "grad_norm": 0.5653291940689087, "learning_rate": 1.5755382726742308e-05, "loss": 1.6063, "step": 13359 }, { "epoch": 0.7446630622596287, "grad_norm": 0.5330468416213989, "learning_rate": 1.5748895249015266e-05, "loss": 1.6184, "step": 13360 }, { "epoch": 0.7447188005127919, "grad_norm": 0.6169772744178772, "learning_rate": 1.5742408857533546e-05, "loss": 1.6787, "step": 13361 }, { "epoch": 0.7447745387659551, "grad_norm": 0.5544992089271545, "learning_rate": 1.573592355250289e-05, "loss": 1.5111, "step": 13362 }, { "epoch": 0.7448302770191182, "grad_norm": 0.62140953540802, "learning_rate": 1.572943933412896e-05, "loss": 1.7729, "step": 13363 }, { "epoch": 0.7448860152722814, "grad_norm": 0.5693691372871399, "learning_rate": 1.5722956202617408e-05, "loss": 1.5573, "step": 13364 }, { "epoch": 0.7449417535254446, "grad_norm": 0.5448788404464722, "learning_rate": 1.5716474158173845e-05, "loss": 1.5506, "step": 13365 }, { "epoch": 0.7449974917786076, "grad_norm": 0.5135059952735901, "learning_rate": 1.5709993201003827e-05, "loss": 1.4187, "step": 13366 }, { "epoch": 0.7450532300317708, "grad_norm": 0.5643311738967896, "learning_rate": 1.570351333131289e-05, "loss": 1.6344, "step": 13367 }, { "epoch": 0.745108968284934, "grad_norm": 0.5740456581115723, "learning_rate": 1.5697034549306554e-05, "loss": 1.6753, "step": 13368 }, { "epoch": 0.7451647065380971, "grad_norm": 0.5633687973022461, "learning_rate": 1.569055685519028e-05, "loss": 1.8055, "step": 13369 }, { "epoch": 0.7452204447912603, "grad_norm": 0.5269423127174377, "learning_rate": 1.5684080249169507e-05, "loss": 1.426, "step": 13370 }, { "epoch": 0.7452761830444233, "grad_norm": 0.6111160516738892, "learning_rate": 1.5677604731449635e-05, "loss": 1.832, "step": 13371 }, { "epoch": 0.7453319212975865, "grad_norm": 0.5429782271385193, "learning_rate": 1.5671130302236038e-05, "loss": 1.511, "step": 13372 }, { "epoch": 0.7453876595507497, "grad_norm": 0.6169877052307129, "learning_rate": 1.5664656961734025e-05, "loss": 1.8098, "step": 13373 }, { "epoch": 0.7454433978039128, "grad_norm": 0.5550345182418823, "learning_rate": 1.5658184710148897e-05, "loss": 1.6672, "step": 13374 }, { "epoch": 0.745499136057076, "grad_norm": 0.6161905527114868, "learning_rate": 1.565171354768593e-05, "loss": 1.9259, "step": 13375 }, { "epoch": 0.7455548743102391, "grad_norm": 0.5543949007987976, "learning_rate": 1.5645243474550346e-05, "loss": 1.6948, "step": 13376 }, { "epoch": 0.7456106125634022, "grad_norm": 0.5576022863388062, "learning_rate": 1.5638774490947332e-05, "loss": 1.6303, "step": 13377 }, { "epoch": 0.7456663508165654, "grad_norm": 0.5574358105659485, "learning_rate": 1.563230659708206e-05, "loss": 1.56, "step": 13378 }, { "epoch": 0.7457220890697286, "grad_norm": 0.5838919281959534, "learning_rate": 1.562583979315965e-05, "loss": 1.79, "step": 13379 }, { "epoch": 0.7457778273228917, "grad_norm": 0.5559114813804626, "learning_rate": 1.5619374079385175e-05, "loss": 1.6725, "step": 13380 }, { "epoch": 0.7458335655760548, "grad_norm": 0.5833230018615723, "learning_rate": 1.5612909455963703e-05, "loss": 1.8324, "step": 13381 }, { "epoch": 0.745889303829218, "grad_norm": 0.59188312292099, "learning_rate": 1.560644592310025e-05, "loss": 1.5773, "step": 13382 }, { "epoch": 0.7459450420823811, "grad_norm": 0.5672659277915955, "learning_rate": 1.5599983480999802e-05, "loss": 1.4419, "step": 13383 }, { "epoch": 0.7460007803355443, "grad_norm": 0.5613914728164673, "learning_rate": 1.559352212986733e-05, "loss": 1.6136, "step": 13384 }, { "epoch": 0.7460565185887075, "grad_norm": 0.5510649681091309, "learning_rate": 1.5587061869907704e-05, "loss": 1.5806, "step": 13385 }, { "epoch": 0.7461122568418705, "grad_norm": 0.5434938073158264, "learning_rate": 1.5580602701325865e-05, "loss": 1.6182, "step": 13386 }, { "epoch": 0.7461679950950337, "grad_norm": 0.5438975691795349, "learning_rate": 1.5574144624326607e-05, "loss": 1.4877, "step": 13387 }, { "epoch": 0.7462237333481969, "grad_norm": 0.5524957180023193, "learning_rate": 1.5567687639114776e-05, "loss": 1.4704, "step": 13388 }, { "epoch": 0.74627947160136, "grad_norm": 0.5521454811096191, "learning_rate": 1.5561231745895127e-05, "loss": 1.6477, "step": 13389 }, { "epoch": 0.7463352098545232, "grad_norm": 0.6323177218437195, "learning_rate": 1.5554776944872422e-05, "loss": 1.8731, "step": 13390 }, { "epoch": 0.7463909481076864, "grad_norm": 0.5329812169075012, "learning_rate": 1.5548323236251378e-05, "loss": 1.4527, "step": 13391 }, { "epoch": 0.7464466863608494, "grad_norm": 0.5540409088134766, "learning_rate": 1.5541870620236622e-05, "loss": 1.7002, "step": 13392 }, { "epoch": 0.7465024246140126, "grad_norm": 0.5764815807342529, "learning_rate": 1.5535419097032854e-05, "loss": 1.7703, "step": 13393 }, { "epoch": 0.7465581628671757, "grad_norm": 0.5406001806259155, "learning_rate": 1.5528968666844634e-05, "loss": 1.7067, "step": 13394 }, { "epoch": 0.7466139011203389, "grad_norm": 0.5606533288955688, "learning_rate": 1.552251932987655e-05, "loss": 1.5557, "step": 13395 }, { "epoch": 0.7466696393735021, "grad_norm": 0.5777366757392883, "learning_rate": 1.5516071086333138e-05, "loss": 1.6904, "step": 13396 }, { "epoch": 0.7467253776266651, "grad_norm": 0.6230834722518921, "learning_rate": 1.5509623936418892e-05, "loss": 1.6019, "step": 13397 }, { "epoch": 0.7467811158798283, "grad_norm": 0.5899634957313538, "learning_rate": 1.5503177880338298e-05, "loss": 1.8989, "step": 13398 }, { "epoch": 0.7468368541329915, "grad_norm": 0.5678186416625977, "learning_rate": 1.5496732918295755e-05, "loss": 1.7319, "step": 13399 }, { "epoch": 0.7468925923861546, "grad_norm": 0.587462842464447, "learning_rate": 1.5490289050495676e-05, "loss": 1.8259, "step": 13400 }, { "epoch": 0.7469483306393178, "grad_norm": 0.5820348858833313, "learning_rate": 1.5483846277142423e-05, "loss": 1.658, "step": 13401 }, { "epoch": 0.7470040688924809, "grad_norm": 0.5110056400299072, "learning_rate": 1.5477404598440327e-05, "loss": 1.3426, "step": 13402 }, { "epoch": 0.747059807145644, "grad_norm": 0.6571440696716309, "learning_rate": 1.547096401459367e-05, "loss": 1.5072, "step": 13403 }, { "epoch": 0.7471155453988072, "grad_norm": 0.6007630228996277, "learning_rate": 1.5464524525806717e-05, "loss": 1.7707, "step": 13404 }, { "epoch": 0.7471712836519704, "grad_norm": 0.5080630779266357, "learning_rate": 1.5458086132283712e-05, "loss": 1.5782, "step": 13405 }, { "epoch": 0.7472270219051335, "grad_norm": 0.5876208543777466, "learning_rate": 1.54516488342288e-05, "loss": 1.5931, "step": 13406 }, { "epoch": 0.7472827601582966, "grad_norm": 0.5827615261077881, "learning_rate": 1.5445212631846157e-05, "loss": 1.7915, "step": 13407 }, { "epoch": 0.7473384984114598, "grad_norm": 0.570421576499939, "learning_rate": 1.5438777525339902e-05, "loss": 1.6272, "step": 13408 }, { "epoch": 0.7473942366646229, "grad_norm": 0.5399143099784851, "learning_rate": 1.5432343514914123e-05, "loss": 1.4453, "step": 13409 }, { "epoch": 0.7474499749177861, "grad_norm": 0.5795050859451294, "learning_rate": 1.5425910600772868e-05, "loss": 1.4543, "step": 13410 }, { "epoch": 0.7475057131709493, "grad_norm": 0.542121946811676, "learning_rate": 1.5419478783120127e-05, "loss": 1.5641, "step": 13411 }, { "epoch": 0.7475614514241123, "grad_norm": 0.5423764586448669, "learning_rate": 1.541304806215993e-05, "loss": 1.5104, "step": 13412 }, { "epoch": 0.7476171896772755, "grad_norm": 0.5753214359283447, "learning_rate": 1.5406618438096172e-05, "loss": 1.8031, "step": 13413 }, { "epoch": 0.7476729279304387, "grad_norm": 0.5540892481803894, "learning_rate": 1.540018991113279e-05, "loss": 1.8608, "step": 13414 }, { "epoch": 0.7477286661836018, "grad_norm": 0.5682497620582581, "learning_rate": 1.5393762481473644e-05, "loss": 1.6909, "step": 13415 }, { "epoch": 0.747784404436765, "grad_norm": 0.529656708240509, "learning_rate": 1.5387336149322594e-05, "loss": 1.7286, "step": 13416 }, { "epoch": 0.747840142689928, "grad_norm": 0.5613870024681091, "learning_rate": 1.5380910914883445e-05, "loss": 1.6818, "step": 13417 }, { "epoch": 0.7478958809430912, "grad_norm": 0.5584611296653748, "learning_rate": 1.5374486778359932e-05, "loss": 1.6994, "step": 13418 }, { "epoch": 0.7479516191962544, "grad_norm": 0.5976415872573853, "learning_rate": 1.5368063739955845e-05, "loss": 1.8279, "step": 13419 }, { "epoch": 0.7480073574494175, "grad_norm": 0.5717959403991699, "learning_rate": 1.5361641799874843e-05, "loss": 1.5076, "step": 13420 }, { "epoch": 0.7480630957025807, "grad_norm": 0.5503527522087097, "learning_rate": 1.5355220958320604e-05, "loss": 1.604, "step": 13421 }, { "epoch": 0.7481188339557439, "grad_norm": 0.5726061463356018, "learning_rate": 1.5348801215496773e-05, "loss": 1.8152, "step": 13422 }, { "epoch": 0.7481745722089069, "grad_norm": 0.5453668236732483, "learning_rate": 1.5342382571606928e-05, "loss": 1.6422, "step": 13423 }, { "epoch": 0.7482303104620701, "grad_norm": 0.5500398278236389, "learning_rate": 1.533596502685466e-05, "loss": 1.5407, "step": 13424 }, { "epoch": 0.7482860487152333, "grad_norm": 0.5172477960586548, "learning_rate": 1.532954858144346e-05, "loss": 1.4653, "step": 13425 }, { "epoch": 0.7483417869683964, "grad_norm": 0.5574005842208862, "learning_rate": 1.532313323557683e-05, "loss": 1.5693, "step": 13426 }, { "epoch": 0.7483975252215596, "grad_norm": 0.5705146193504333, "learning_rate": 1.531671898945824e-05, "loss": 1.5493, "step": 13427 }, { "epoch": 0.7484532634747227, "grad_norm": 0.5598993301391602, "learning_rate": 1.5310305843291106e-05, "loss": 1.3341, "step": 13428 }, { "epoch": 0.7485090017278858, "grad_norm": 0.5688292384147644, "learning_rate": 1.5303893797278813e-05, "loss": 1.6766, "step": 13429 }, { "epoch": 0.748564739981049, "grad_norm": 0.5565484166145325, "learning_rate": 1.5297482851624718e-05, "loss": 1.7853, "step": 13430 }, { "epoch": 0.7486204782342122, "grad_norm": 0.5675035119056702, "learning_rate": 1.529107300653215e-05, "loss": 1.6365, "step": 13431 }, { "epoch": 0.7486762164873753, "grad_norm": 0.5844092965126038, "learning_rate": 1.528466426220437e-05, "loss": 1.6628, "step": 13432 }, { "epoch": 0.7487319547405384, "grad_norm": 0.5560231804847717, "learning_rate": 1.527825661884463e-05, "loss": 1.5994, "step": 13433 }, { "epoch": 0.7487876929937016, "grad_norm": 0.5227312445640564, "learning_rate": 1.527185007665615e-05, "loss": 1.4491, "step": 13434 }, { "epoch": 0.7488434312468647, "grad_norm": 0.5424653887748718, "learning_rate": 1.526544463584211e-05, "loss": 1.7824, "step": 13435 }, { "epoch": 0.7488991695000279, "grad_norm": 0.586765468120575, "learning_rate": 1.525904029660566e-05, "loss": 1.5596, "step": 13436 }, { "epoch": 0.7489549077531911, "grad_norm": 0.5893957614898682, "learning_rate": 1.5252637059149866e-05, "loss": 1.7777, "step": 13437 }, { "epoch": 0.7490106460063541, "grad_norm": 0.5714499950408936, "learning_rate": 1.5246234923677866e-05, "loss": 1.6985, "step": 13438 }, { "epoch": 0.7490663842595173, "grad_norm": 0.620743453502655, "learning_rate": 1.5239833890392652e-05, "loss": 1.9387, "step": 13439 }, { "epoch": 0.7491221225126804, "grad_norm": 0.5654244422912598, "learning_rate": 1.523343395949724e-05, "loss": 1.5936, "step": 13440 }, { "epoch": 0.7491778607658436, "grad_norm": 0.5660731792449951, "learning_rate": 1.52270351311946e-05, "loss": 1.6402, "step": 13441 }, { "epoch": 0.7492335990190068, "grad_norm": 0.5458896160125732, "learning_rate": 1.5220637405687659e-05, "loss": 1.599, "step": 13442 }, { "epoch": 0.7492893372721698, "grad_norm": 0.5200908184051514, "learning_rate": 1.5214240783179345e-05, "loss": 1.5019, "step": 13443 }, { "epoch": 0.749345075525333, "grad_norm": 0.5807740092277527, "learning_rate": 1.5207845263872466e-05, "loss": 1.6836, "step": 13444 }, { "epoch": 0.7494008137784962, "grad_norm": 0.6102779507637024, "learning_rate": 1.5201450847969911e-05, "loss": 1.6637, "step": 13445 }, { "epoch": 0.7494565520316593, "grad_norm": 0.5772900581359863, "learning_rate": 1.5195057535674434e-05, "loss": 1.7797, "step": 13446 }, { "epoch": 0.7495122902848225, "grad_norm": 0.6321548223495483, "learning_rate": 1.5188665327188805e-05, "loss": 1.7168, "step": 13447 }, { "epoch": 0.7495680285379857, "grad_norm": 0.5762151479721069, "learning_rate": 1.5182274222715747e-05, "loss": 1.4862, "step": 13448 }, { "epoch": 0.7496237667911487, "grad_norm": 0.5636744499206543, "learning_rate": 1.5175884222457948e-05, "loss": 1.6067, "step": 13449 }, { "epoch": 0.7496795050443119, "grad_norm": 0.5497311949729919, "learning_rate": 1.5169495326618077e-05, "loss": 1.5944, "step": 13450 }, { "epoch": 0.7497352432974751, "grad_norm": 0.5859318971633911, "learning_rate": 1.5163107535398724e-05, "loss": 1.7657, "step": 13451 }, { "epoch": 0.7497909815506382, "grad_norm": 0.5736318826675415, "learning_rate": 1.5156720849002487e-05, "loss": 1.4466, "step": 13452 }, { "epoch": 0.7498467198038014, "grad_norm": 0.6058290600776672, "learning_rate": 1.5150335267631915e-05, "loss": 1.6204, "step": 13453 }, { "epoch": 0.7499024580569645, "grad_norm": 0.568292498588562, "learning_rate": 1.514395079148952e-05, "loss": 1.8471, "step": 13454 }, { "epoch": 0.7499581963101276, "grad_norm": 0.566444993019104, "learning_rate": 1.5137567420777783e-05, "loss": 1.567, "step": 13455 }, { "epoch": 0.7500139345632908, "grad_norm": 0.5849542617797852, "learning_rate": 1.513118515569914e-05, "loss": 1.6508, "step": 13456 }, { "epoch": 0.750069672816454, "grad_norm": 0.5593271255493164, "learning_rate": 1.5124803996456021e-05, "loss": 1.6505, "step": 13457 }, { "epoch": 0.750125411069617, "grad_norm": 0.5788416862487793, "learning_rate": 1.5118423943250771e-05, "loss": 1.7703, "step": 13458 }, { "epoch": 0.7501811493227802, "grad_norm": 0.6123077869415283, "learning_rate": 1.511204499628574e-05, "loss": 1.8, "step": 13459 }, { "epoch": 0.7502368875759434, "grad_norm": 0.5456638336181641, "learning_rate": 1.5105667155763231e-05, "loss": 1.597, "step": 13460 }, { "epoch": 0.7502926258291065, "grad_norm": 0.5396011471748352, "learning_rate": 1.5099290421885509e-05, "loss": 1.56, "step": 13461 }, { "epoch": 0.7503483640822697, "grad_norm": 0.5928915143013, "learning_rate": 1.5092914794854824e-05, "loss": 1.7026, "step": 13462 }, { "epoch": 0.7504041023354328, "grad_norm": 0.5351876020431519, "learning_rate": 1.5086540274873328e-05, "loss": 1.7195, "step": 13463 }, { "epoch": 0.7504598405885959, "grad_norm": 0.561486005783081, "learning_rate": 1.5080166862143253e-05, "loss": 1.5934, "step": 13464 }, { "epoch": 0.7505155788417591, "grad_norm": 0.5588828325271606, "learning_rate": 1.5073794556866666e-05, "loss": 1.4999, "step": 13465 }, { "epoch": 0.7505713170949222, "grad_norm": 0.6039292812347412, "learning_rate": 1.5067423359245686e-05, "loss": 1.836, "step": 13466 }, { "epoch": 0.7506270553480854, "grad_norm": 0.5354037284851074, "learning_rate": 1.5061053269482362e-05, "loss": 1.7677, "step": 13467 }, { "epoch": 0.7506827936012486, "grad_norm": 0.5810551643371582, "learning_rate": 1.505468428777872e-05, "loss": 1.5995, "step": 13468 }, { "epoch": 0.7507385318544116, "grad_norm": 0.5635651350021362, "learning_rate": 1.5048316414336766e-05, "loss": 1.7743, "step": 13469 }, { "epoch": 0.7507942701075748, "grad_norm": 0.5567081570625305, "learning_rate": 1.50419496493584e-05, "loss": 1.4866, "step": 13470 }, { "epoch": 0.750850008360738, "grad_norm": 0.5657464861869812, "learning_rate": 1.5035583993045604e-05, "loss": 1.601, "step": 13471 }, { "epoch": 0.7509057466139011, "grad_norm": 0.5875218510627747, "learning_rate": 1.5029219445600212e-05, "loss": 1.8084, "step": 13472 }, { "epoch": 0.7509614848670643, "grad_norm": 0.5419088006019592, "learning_rate": 1.5022856007224084e-05, "loss": 1.498, "step": 13473 }, { "epoch": 0.7510172231202275, "grad_norm": 0.5150018334388733, "learning_rate": 1.5016493678119031e-05, "loss": 1.4284, "step": 13474 }, { "epoch": 0.7510729613733905, "grad_norm": 0.5912177562713623, "learning_rate": 1.5010132458486832e-05, "loss": 1.8732, "step": 13475 }, { "epoch": 0.7511286996265537, "grad_norm": 0.5249746441841125, "learning_rate": 1.5003772348529243e-05, "loss": 1.4964, "step": 13476 }, { "epoch": 0.7511844378797169, "grad_norm": 0.5486159920692444, "learning_rate": 1.4997413348447937e-05, "loss": 1.5421, "step": 13477 }, { "epoch": 0.75124017613288, "grad_norm": 0.5511107444763184, "learning_rate": 1.4991055458444597e-05, "loss": 1.664, "step": 13478 }, { "epoch": 0.7512959143860432, "grad_norm": 0.5520987510681152, "learning_rate": 1.4984698678720865e-05, "loss": 1.4636, "step": 13479 }, { "epoch": 0.7513516526392063, "grad_norm": 0.553927481174469, "learning_rate": 1.4978343009478335e-05, "loss": 1.5685, "step": 13480 }, { "epoch": 0.7514073908923694, "grad_norm": 0.5070444345474243, "learning_rate": 1.4971988450918578e-05, "loss": 1.4127, "step": 13481 }, { "epoch": 0.7514631291455326, "grad_norm": 0.5468535423278809, "learning_rate": 1.4965635003243117e-05, "loss": 1.6098, "step": 13482 }, { "epoch": 0.7515188673986958, "grad_norm": 0.5456457138061523, "learning_rate": 1.4959282666653468e-05, "loss": 1.6879, "step": 13483 }, { "epoch": 0.7515746056518589, "grad_norm": 0.54702228307724, "learning_rate": 1.4952931441351054e-05, "loss": 1.5991, "step": 13484 }, { "epoch": 0.751630343905022, "grad_norm": 0.5564615726470947, "learning_rate": 1.4946581327537312e-05, "loss": 1.5979, "step": 13485 }, { "epoch": 0.7516860821581851, "grad_norm": 0.5617123246192932, "learning_rate": 1.4940232325413638e-05, "loss": 1.672, "step": 13486 }, { "epoch": 0.7517418204113483, "grad_norm": 0.6110641956329346, "learning_rate": 1.4933884435181383e-05, "loss": 1.7764, "step": 13487 }, { "epoch": 0.7517975586645115, "grad_norm": 0.5776284337043762, "learning_rate": 1.4927537657041879e-05, "loss": 1.6431, "step": 13488 }, { "epoch": 0.7518532969176746, "grad_norm": 0.5718814730644226, "learning_rate": 1.4921191991196365e-05, "loss": 1.5381, "step": 13489 }, { "epoch": 0.7519090351708377, "grad_norm": 0.616912841796875, "learning_rate": 1.4914847437846147e-05, "loss": 1.7817, "step": 13490 }, { "epoch": 0.7519647734240009, "grad_norm": 0.5536419153213501, "learning_rate": 1.4908503997192391e-05, "loss": 1.6046, "step": 13491 }, { "epoch": 0.752020511677164, "grad_norm": 0.5831686854362488, "learning_rate": 1.4902161669436287e-05, "loss": 1.8871, "step": 13492 }, { "epoch": 0.7520762499303272, "grad_norm": 0.6020655632019043, "learning_rate": 1.4895820454778986e-05, "loss": 1.6076, "step": 13493 }, { "epoch": 0.7521319881834904, "grad_norm": 0.5684720873832703, "learning_rate": 1.4889480353421586e-05, "loss": 1.5878, "step": 13494 }, { "epoch": 0.7521877264366534, "grad_norm": 0.5338294506072998, "learning_rate": 1.4883141365565178e-05, "loss": 1.5914, "step": 13495 }, { "epoch": 0.7522434646898166, "grad_norm": 0.5656914710998535, "learning_rate": 1.4876803491410746e-05, "loss": 1.5741, "step": 13496 }, { "epoch": 0.7522992029429798, "grad_norm": 0.6254673004150391, "learning_rate": 1.487046673115936e-05, "loss": 1.6547, "step": 13497 }, { "epoch": 0.7523549411961429, "grad_norm": 0.5721585750579834, "learning_rate": 1.4864131085011934e-05, "loss": 1.6673, "step": 13498 }, { "epoch": 0.7524106794493061, "grad_norm": 0.589742124080658, "learning_rate": 1.4857796553169412e-05, "loss": 1.4904, "step": 13499 }, { "epoch": 0.7524664177024692, "grad_norm": 0.5777998566627502, "learning_rate": 1.4851463135832689e-05, "loss": 1.6627, "step": 13500 }, { "epoch": 0.7525221559556323, "grad_norm": 0.5200433135032654, "learning_rate": 1.4845130833202625e-05, "loss": 1.4796, "step": 13501 }, { "epoch": 0.7525778942087955, "grad_norm": 0.5812724232673645, "learning_rate": 1.4838799645480061e-05, "loss": 1.4919, "step": 13502 }, { "epoch": 0.7526336324619587, "grad_norm": 0.6124995946884155, "learning_rate": 1.4832469572865754e-05, "loss": 1.7236, "step": 13503 }, { "epoch": 0.7526893707151218, "grad_norm": 0.5346381664276123, "learning_rate": 1.4826140615560469e-05, "loss": 1.56, "step": 13504 }, { "epoch": 0.752745108968285, "grad_norm": 0.6158668994903564, "learning_rate": 1.4819812773764924e-05, "loss": 1.7928, "step": 13505 }, { "epoch": 0.7528008472214481, "grad_norm": 0.5613118410110474, "learning_rate": 1.4813486047679808e-05, "loss": 1.7989, "step": 13506 }, { "epoch": 0.7528565854746112, "grad_norm": 0.5942383408546448, "learning_rate": 1.4807160437505756e-05, "loss": 1.8563, "step": 13507 }, { "epoch": 0.7529123237277744, "grad_norm": 0.5570755004882812, "learning_rate": 1.4800835943443392e-05, "loss": 1.6341, "step": 13508 }, { "epoch": 0.7529680619809375, "grad_norm": 0.5737524032592773, "learning_rate": 1.4794512565693303e-05, "loss": 1.5606, "step": 13509 }, { "epoch": 0.7530238002341006, "grad_norm": 0.5694605112075806, "learning_rate": 1.4788190304455996e-05, "loss": 1.6136, "step": 13510 }, { "epoch": 0.7530795384872638, "grad_norm": 0.5719156265258789, "learning_rate": 1.4781869159931994e-05, "loss": 1.6516, "step": 13511 }, { "epoch": 0.7531352767404269, "grad_norm": 0.5556133389472961, "learning_rate": 1.4775549132321764e-05, "loss": 1.6058, "step": 13512 }, { "epoch": 0.7531910149935901, "grad_norm": 0.5758563280105591, "learning_rate": 1.4769230221825741e-05, "loss": 1.6085, "step": 13513 }, { "epoch": 0.7532467532467533, "grad_norm": 0.5548908710479736, "learning_rate": 1.4762912428644348e-05, "loss": 1.5802, "step": 13514 }, { "epoch": 0.7533024914999163, "grad_norm": 0.5532346963882446, "learning_rate": 1.4756595752977892e-05, "loss": 1.5401, "step": 13515 }, { "epoch": 0.7533582297530795, "grad_norm": NaN, "learning_rate": 1.4756595752977892e-05, "loss": 1.6784, "step": 13516 }, { "epoch": 0.7534139680062427, "grad_norm": 0.5984622240066528, "learning_rate": 1.4750280195026767e-05, "loss": 1.782, "step": 13517 }, { "epoch": 0.7534697062594058, "grad_norm": 0.5604650378227234, "learning_rate": 1.4743965754991217e-05, "loss": 1.4861, "step": 13518 }, { "epoch": 0.753525444512569, "grad_norm": 0.5935846567153931, "learning_rate": 1.4737652433071513e-05, "loss": 1.8707, "step": 13519 }, { "epoch": 0.7535811827657322, "grad_norm": 0.5540009140968323, "learning_rate": 1.4731340229467888e-05, "loss": 1.606, "step": 13520 }, { "epoch": 0.7536369210188952, "grad_norm": 0.5764244794845581, "learning_rate": 1.4725029144380515e-05, "loss": 1.7317, "step": 13521 }, { "epoch": 0.7536926592720584, "grad_norm": 0.5727776288986206, "learning_rate": 1.4718719178009567e-05, "loss": 1.5096, "step": 13522 }, { "epoch": 0.7537483975252216, "grad_norm": 0.6232305765151978, "learning_rate": 1.471241033055511e-05, "loss": 1.5065, "step": 13523 }, { "epoch": 0.7538041357783847, "grad_norm": 0.5429270267486572, "learning_rate": 1.4706102602217291e-05, "loss": 1.5696, "step": 13524 }, { "epoch": 0.7538598740315479, "grad_norm": 0.5457437634468079, "learning_rate": 1.4699795993196103e-05, "loss": 1.7403, "step": 13525 }, { "epoch": 0.753915612284711, "grad_norm": 0.5556396245956421, "learning_rate": 1.4693490503691571e-05, "loss": 1.6232, "step": 13526 }, { "epoch": 0.7539713505378741, "grad_norm": 0.5786697864532471, "learning_rate": 1.4687186133903668e-05, "loss": 1.732, "step": 13527 }, { "epoch": 0.7540270887910373, "grad_norm": 0.5584318041801453, "learning_rate": 1.4680882884032332e-05, "loss": 1.4731, "step": 13528 }, { "epoch": 0.7540828270442005, "grad_norm": 0.5617906451225281, "learning_rate": 1.4674580754277483e-05, "loss": 1.6842, "step": 13529 }, { "epoch": 0.7541385652973636, "grad_norm": 0.5737243890762329, "learning_rate": 1.4668279744838958e-05, "loss": 1.6571, "step": 13530 }, { "epoch": 0.7541943035505267, "grad_norm": 0.5647205710411072, "learning_rate": 1.4661979855916602e-05, "loss": 1.5902, "step": 13531 }, { "epoch": 0.7542500418036898, "grad_norm": 0.5912965536117554, "learning_rate": 1.4655681087710205e-05, "loss": 1.799, "step": 13532 }, { "epoch": 0.754305780056853, "grad_norm": 0.5538866519927979, "learning_rate": 1.4649383440419534e-05, "loss": 1.477, "step": 13533 }, { "epoch": 0.7543615183100162, "grad_norm": 0.5638540983200073, "learning_rate": 1.4643086914244314e-05, "loss": 1.6655, "step": 13534 }, { "epoch": 0.7544172565631793, "grad_norm": 0.5820828676223755, "learning_rate": 1.463679150938423e-05, "loss": 1.7415, "step": 13535 }, { "epoch": 0.7544729948163424, "grad_norm": 0.5478092432022095, "learning_rate": 1.4630497226038953e-05, "loss": 1.5266, "step": 13536 }, { "epoch": 0.7545287330695056, "grad_norm": 0.555873453617096, "learning_rate": 1.462420406440807e-05, "loss": 1.5921, "step": 13537 }, { "epoch": 0.7545844713226687, "grad_norm": 0.5451073050498962, "learning_rate": 1.4617912024691177e-05, "loss": 1.5367, "step": 13538 }, { "epoch": 0.7546402095758319, "grad_norm": 0.838469386100769, "learning_rate": 1.4611621107087826e-05, "loss": 1.3466, "step": 13539 }, { "epoch": 0.7546959478289951, "grad_norm": 0.5384388566017151, "learning_rate": 1.4605331311797526e-05, "loss": 1.5763, "step": 13540 }, { "epoch": 0.7547516860821581, "grad_norm": 0.6133666634559631, "learning_rate": 1.4599042639019767e-05, "loss": 2.1011, "step": 13541 }, { "epoch": 0.7548074243353213, "grad_norm": 0.5743862390518188, "learning_rate": 1.4592755088953935e-05, "loss": 1.7852, "step": 13542 }, { "epoch": 0.7548631625884845, "grad_norm": 0.5554697513580322, "learning_rate": 1.4586468661799512e-05, "loss": 1.7704, "step": 13543 }, { "epoch": 0.7549189008416476, "grad_norm": 0.5402542948722839, "learning_rate": 1.458018335775581e-05, "loss": 1.6328, "step": 13544 }, { "epoch": 0.7549746390948108, "grad_norm": 0.5552062392234802, "learning_rate": 1.4573899177022176e-05, "loss": 1.4073, "step": 13545 }, { "epoch": 0.755030377347974, "grad_norm": 0.5201606154441833, "learning_rate": 1.4567616119797916e-05, "loss": 1.4983, "step": 13546 }, { "epoch": 0.755086115601137, "grad_norm": 0.6160169243812561, "learning_rate": 1.4561334186282288e-05, "loss": 1.7044, "step": 13547 }, { "epoch": 0.7551418538543002, "grad_norm": 0.572885274887085, "learning_rate": 1.4555053376674532e-05, "loss": 1.5564, "step": 13548 }, { "epoch": 0.7551975921074634, "grad_norm": 0.5509326457977295, "learning_rate": 1.4548773691173794e-05, "loss": 1.5752, "step": 13549 }, { "epoch": 0.7552533303606265, "grad_norm": 0.5369094610214233, "learning_rate": 1.4542495129979294e-05, "loss": 1.5379, "step": 13550 }, { "epoch": 0.7553090686137897, "grad_norm": 0.5564013719558716, "learning_rate": 1.4536217693290094e-05, "loss": 1.6103, "step": 13551 }, { "epoch": 0.7553648068669528, "grad_norm": 0.5279716849327087, "learning_rate": 1.4529941381305307e-05, "loss": 1.2465, "step": 13552 }, { "epoch": 0.7554205451201159, "grad_norm": 0.6001760363578796, "learning_rate": 1.4523666194223972e-05, "loss": 1.8333, "step": 13553 }, { "epoch": 0.7554762833732791, "grad_norm": 0.5822129845619202, "learning_rate": 1.4517392132245105e-05, "loss": 1.6442, "step": 13554 }, { "epoch": 0.7555320216264422, "grad_norm": 0.5734686851501465, "learning_rate": 1.4511119195567696e-05, "loss": 1.5921, "step": 13555 }, { "epoch": 0.7555877598796054, "grad_norm": 0.6088379621505737, "learning_rate": 1.4504847384390657e-05, "loss": 1.833, "step": 13556 }, { "epoch": 0.7556434981327685, "grad_norm": 0.5933637619018555, "learning_rate": 1.4498576698912902e-05, "loss": 1.8395, "step": 13557 }, { "epoch": 0.7556992363859316, "grad_norm": 0.5619442462921143, "learning_rate": 1.4492307139333316e-05, "loss": 1.4357, "step": 13558 }, { "epoch": 0.7557549746390948, "grad_norm": 0.5386919379234314, "learning_rate": 1.448603870585072e-05, "loss": 1.511, "step": 13559 }, { "epoch": 0.755810712892258, "grad_norm": 0.5765253305435181, "learning_rate": 1.447977139866391e-05, "loss": 1.5333, "step": 13560 }, { "epoch": 0.7558664511454211, "grad_norm": 0.5310157537460327, "learning_rate": 1.447350521797166e-05, "loss": 1.3624, "step": 13561 }, { "epoch": 0.7559221893985842, "grad_norm": 0.5235006809234619, "learning_rate": 1.4467240163972706e-05, "loss": 1.3457, "step": 13562 }, { "epoch": 0.7559779276517474, "grad_norm": 0.5245125889778137, "learning_rate": 1.4460976236865703e-05, "loss": 1.1391, "step": 13563 }, { "epoch": 0.7560336659049105, "grad_norm": 0.5855050683021545, "learning_rate": 1.4454713436849333e-05, "loss": 1.7568, "step": 13564 }, { "epoch": 0.7560894041580737, "grad_norm": 0.5477002263069153, "learning_rate": 1.4448451764122206e-05, "loss": 1.6173, "step": 13565 }, { "epoch": 0.7561451424112369, "grad_norm": 0.5755841135978699, "learning_rate": 1.4442191218882911e-05, "loss": 1.7494, "step": 13566 }, { "epoch": 0.7562008806643999, "grad_norm": 0.5701185464859009, "learning_rate": 1.4435931801329994e-05, "loss": 1.6753, "step": 13567 }, { "epoch": 0.7562566189175631, "grad_norm": 0.6093330383300781, "learning_rate": 1.4429673511661962e-05, "loss": 1.7557, "step": 13568 }, { "epoch": 0.7563123571707263, "grad_norm": 0.5922994017601013, "learning_rate": 1.4423416350077318e-05, "loss": 1.6739, "step": 13569 }, { "epoch": 0.7563680954238894, "grad_norm": 0.5818817615509033, "learning_rate": 1.4417160316774465e-05, "loss": 1.824, "step": 13570 }, { "epoch": 0.7564238336770526, "grad_norm": 0.5563713908195496, "learning_rate": 1.4410905411951824e-05, "loss": 1.5959, "step": 13571 }, { "epoch": 0.7564795719302158, "grad_norm": 0.5510568022727966, "learning_rate": 1.4404651635807764e-05, "loss": 1.6415, "step": 13572 }, { "epoch": 0.7565353101833788, "grad_norm": 0.5633411407470703, "learning_rate": 1.4398398988540623e-05, "loss": 1.3973, "step": 13573 }, { "epoch": 0.756591048436542, "grad_norm": 0.5479226112365723, "learning_rate": 1.4392147470348704e-05, "loss": 1.5665, "step": 13574 }, { "epoch": 0.7566467866897052, "grad_norm": 0.5947529673576355, "learning_rate": 1.4385897081430233e-05, "loss": 1.4084, "step": 13575 }, { "epoch": 0.7567025249428683, "grad_norm": 0.5711276531219482, "learning_rate": 1.4379647821983488e-05, "loss": 1.6453, "step": 13576 }, { "epoch": 0.7567582631960315, "grad_norm": 0.5438666939735413, "learning_rate": 1.4373399692206618e-05, "loss": 1.7396, "step": 13577 }, { "epoch": 0.7568140014491945, "grad_norm": 0.5963262319564819, "learning_rate": 1.4367152692297797e-05, "loss": 1.8246, "step": 13578 }, { "epoch": 0.7568697397023577, "grad_norm": 0.5485786199569702, "learning_rate": 1.4360906822455134e-05, "loss": 1.5035, "step": 13579 }, { "epoch": 0.7569254779555209, "grad_norm": 0.5633838176727295, "learning_rate": 1.4354662082876718e-05, "loss": 1.6054, "step": 13580 }, { "epoch": 0.756981216208684, "grad_norm": 0.5588928461074829, "learning_rate": 1.434841847376061e-05, "loss": 1.5249, "step": 13581 }, { "epoch": 0.7570369544618472, "grad_norm": 0.5531945824623108, "learning_rate": 1.434217599530479e-05, "loss": 1.6811, "step": 13582 }, { "epoch": 0.7570926927150103, "grad_norm": 0.5261382460594177, "learning_rate": 1.4335934647707244e-05, "loss": 1.5311, "step": 13583 }, { "epoch": 0.7571484309681734, "grad_norm": 0.5691108703613281, "learning_rate": 1.432969443116592e-05, "loss": 1.2596, "step": 13584 }, { "epoch": 0.7572041692213366, "grad_norm": 0.5955578088760376, "learning_rate": 1.4323455345878717e-05, "loss": 1.737, "step": 13585 }, { "epoch": 0.7572599074744998, "grad_norm": 0.5733932852745056, "learning_rate": 1.4317217392043496e-05, "loss": 1.6467, "step": 13586 }, { "epoch": 0.7573156457276629, "grad_norm": 0.5477440357208252, "learning_rate": 1.4310980569858096e-05, "loss": 1.6087, "step": 13587 }, { "epoch": 0.757371383980826, "grad_norm": 0.5244430303573608, "learning_rate": 1.4304744879520333e-05, "loss": 1.495, "step": 13588 }, { "epoch": 0.7574271222339892, "grad_norm": 0.5610432028770447, "learning_rate": 1.429851032122792e-05, "loss": 1.5276, "step": 13589 }, { "epoch": 0.7574828604871523, "grad_norm": 0.5067460536956787, "learning_rate": 1.4292276895178608e-05, "loss": 1.3332, "step": 13590 }, { "epoch": 0.7575385987403155, "grad_norm": 0.5795394778251648, "learning_rate": 1.4286044601570086e-05, "loss": 1.6538, "step": 13591 }, { "epoch": 0.7575943369934787, "grad_norm": 0.5770653486251831, "learning_rate": 1.4279813440599999e-05, "loss": 1.6558, "step": 13592 }, { "epoch": 0.7576500752466417, "grad_norm": 0.6013298630714417, "learning_rate": 1.4273583412465968e-05, "loss": 1.9873, "step": 13593 }, { "epoch": 0.7577058134998049, "grad_norm": 0.554816484451294, "learning_rate": 1.4267354517365572e-05, "loss": 1.5415, "step": 13594 }, { "epoch": 0.7577615517529681, "grad_norm": 0.5791512727737427, "learning_rate": 1.4261126755496368e-05, "loss": 1.7035, "step": 13595 }, { "epoch": 0.7578172900061312, "grad_norm": 0.5591641068458557, "learning_rate": 1.4254900127055843e-05, "loss": 1.5896, "step": 13596 }, { "epoch": 0.7578730282592944, "grad_norm": 0.5369632840156555, "learning_rate": 1.424867463224147e-05, "loss": 1.5107, "step": 13597 }, { "epoch": 0.7579287665124576, "grad_norm": 0.6140280365943909, "learning_rate": 1.4242450271250696e-05, "loss": 1.7632, "step": 13598 }, { "epoch": 0.7579845047656206, "grad_norm": 0.5376614332199097, "learning_rate": 1.4236227044280914e-05, "loss": 1.4528, "step": 13599 }, { "epoch": 0.7580402430187838, "grad_norm": 0.5407631397247314, "learning_rate": 1.4230004951529513e-05, "loss": 1.5148, "step": 13600 }, { "epoch": 0.7580959812719469, "grad_norm": 0.5585989952087402, "learning_rate": 1.4223783993193767e-05, "loss": 1.6172, "step": 13601 }, { "epoch": 0.7581517195251101, "grad_norm": 0.6118331551551819, "learning_rate": 1.4217564169471038e-05, "loss": 1.9368, "step": 13602 }, { "epoch": 0.7582074577782733, "grad_norm": 0.555669903755188, "learning_rate": 1.421134548055853e-05, "loss": 1.6861, "step": 13603 }, { "epoch": 0.7582631960314363, "grad_norm": 0.5365854501724243, "learning_rate": 1.4205127926653483e-05, "loss": 1.5603, "step": 13604 }, { "epoch": 0.7583189342845995, "grad_norm": 0.603866457939148, "learning_rate": 1.4198911507953077e-05, "loss": 1.7819, "step": 13605 }, { "epoch": 0.7583746725377627, "grad_norm": 0.5609720945358276, "learning_rate": 1.4192696224654467e-05, "loss": 1.7878, "step": 13606 }, { "epoch": 0.7584304107909258, "grad_norm": 0.5774200558662415, "learning_rate": 1.418648207695477e-05, "loss": 1.7785, "step": 13607 }, { "epoch": 0.758486149044089, "grad_norm": 0.5633645057678223, "learning_rate": 1.4180269065051043e-05, "loss": 1.6062, "step": 13608 }, { "epoch": 0.7585418872972521, "grad_norm": 0.595280647277832, "learning_rate": 1.4174057189140339e-05, "loss": 1.663, "step": 13609 }, { "epoch": 0.7585976255504152, "grad_norm": 0.5903527736663818, "learning_rate": 1.4167846449419659e-05, "loss": 1.5436, "step": 13610 }, { "epoch": 0.7586533638035784, "grad_norm": 0.5782942771911621, "learning_rate": 1.4161636846085973e-05, "loss": 2.0576, "step": 13611 }, { "epoch": 0.7587091020567416, "grad_norm": 0.5964334607124329, "learning_rate": 1.4155428379336216e-05, "loss": 1.5829, "step": 13612 }, { "epoch": 0.7587648403099047, "grad_norm": 0.5553979277610779, "learning_rate": 1.4149221049367283e-05, "loss": 1.7045, "step": 13613 }, { "epoch": 0.7588205785630678, "grad_norm": 0.5777998566627502, "learning_rate": 1.4143014856376053e-05, "loss": 1.7569, "step": 13614 }, { "epoch": 0.758876316816231, "grad_norm": 0.5861302018165588, "learning_rate": 1.4136809800559308e-05, "loss": 1.592, "step": 13615 }, { "epoch": 0.7589320550693941, "grad_norm": 0.61280357837677, "learning_rate": 1.4130605882113862e-05, "loss": 1.7016, "step": 13616 }, { "epoch": 0.7589877933225573, "grad_norm": 0.5595108270645142, "learning_rate": 1.4124403101236466e-05, "loss": 1.8053, "step": 13617 }, { "epoch": 0.7590435315757205, "grad_norm": 0.5404929518699646, "learning_rate": 1.411820145812383e-05, "loss": 1.3909, "step": 13618 }, { "epoch": 0.7590992698288835, "grad_norm": 0.5916149616241455, "learning_rate": 1.4112000952972643e-05, "loss": 1.6921, "step": 13619 }, { "epoch": 0.7591550080820467, "grad_norm": 0.6086878776550293, "learning_rate": 1.4105801585979545e-05, "loss": 1.7093, "step": 13620 }, { "epoch": 0.7592107463352099, "grad_norm": 0.5869114995002747, "learning_rate": 1.4099603357341152e-05, "loss": 1.7623, "step": 13621 }, { "epoch": 0.759266484588373, "grad_norm": 0.5688807964324951, "learning_rate": 1.4093406267254017e-05, "loss": 1.5728, "step": 13622 }, { "epoch": 0.7593222228415362, "grad_norm": 0.5534716844558716, "learning_rate": 1.4087210315914684e-05, "loss": 1.653, "step": 13623 }, { "epoch": 0.7593779610946992, "grad_norm": 0.5276861190795898, "learning_rate": 1.4081015503519651e-05, "loss": 1.6385, "step": 13624 }, { "epoch": 0.7594336993478624, "grad_norm": 0.5419962406158447, "learning_rate": 1.4074821830265388e-05, "loss": 1.5487, "step": 13625 }, { "epoch": 0.7594894376010256, "grad_norm": 0.5207490921020508, "learning_rate": 1.406862929634833e-05, "loss": 1.4538, "step": 13626 }, { "epoch": 0.7595451758541887, "grad_norm": 0.7052216529846191, "learning_rate": 1.4062437901964825e-05, "loss": 1.5614, "step": 13627 }, { "epoch": 0.7596009141073519, "grad_norm": 0.5324676036834717, "learning_rate": 1.4056247647311294e-05, "loss": 1.382, "step": 13628 }, { "epoch": 0.759656652360515, "grad_norm": 0.5526208281517029, "learning_rate": 1.4050058532584003e-05, "loss": 1.5966, "step": 13629 }, { "epoch": 0.7597123906136781, "grad_norm": 0.5500971674919128, "learning_rate": 1.4043870557979255e-05, "loss": 1.5784, "step": 13630 }, { "epoch": 0.7597681288668413, "grad_norm": 0.5780391693115234, "learning_rate": 1.4037683723693296e-05, "loss": 1.409, "step": 13631 }, { "epoch": 0.7598238671200045, "grad_norm": 0.577774703502655, "learning_rate": 1.403149802992233e-05, "loss": 1.5705, "step": 13632 }, { "epoch": 0.7598796053731676, "grad_norm": 0.5978648066520691, "learning_rate": 1.4025313476862551e-05, "loss": 1.8538, "step": 13633 }, { "epoch": 0.7599353436263308, "grad_norm": 0.5553382039070129, "learning_rate": 1.4019130064710068e-05, "loss": 1.3479, "step": 13634 }, { "epoch": 0.7599910818794939, "grad_norm": 0.5762467384338379, "learning_rate": 1.4012947793660996e-05, "loss": 1.6895, "step": 13635 }, { "epoch": 0.760046820132657, "grad_norm": 0.602973461151123, "learning_rate": 1.4006766663911397e-05, "loss": 1.5507, "step": 13636 }, { "epoch": 0.7601025583858202, "grad_norm": 0.5446701049804688, "learning_rate": 1.4000586675657312e-05, "loss": 1.4995, "step": 13637 }, { "epoch": 0.7601582966389834, "grad_norm": 0.5432769656181335, "learning_rate": 1.399440782909472e-05, "loss": 1.6442, "step": 13638 }, { "epoch": 0.7602140348921465, "grad_norm": 0.5659343004226685, "learning_rate": 1.3988230124419589e-05, "loss": 1.6857, "step": 13639 }, { "epoch": 0.7602697731453096, "grad_norm": 0.5553669333457947, "learning_rate": 1.3982053561827846e-05, "loss": 1.6515, "step": 13640 }, { "epoch": 0.7603255113984728, "grad_norm": 0.6063775420188904, "learning_rate": 1.3975878141515352e-05, "loss": 1.7898, "step": 13641 }, { "epoch": 0.7603812496516359, "grad_norm": 0.5545953512191772, "learning_rate": 1.3969703863677969e-05, "loss": 1.5781, "step": 13642 }, { "epoch": 0.7604369879047991, "grad_norm": 0.6215736269950867, "learning_rate": 1.396353072851151e-05, "loss": 1.7786, "step": 13643 }, { "epoch": 0.7604927261579623, "grad_norm": 0.5639563798904419, "learning_rate": 1.3957358736211745e-05, "loss": 1.6052, "step": 13644 }, { "epoch": 0.7605484644111253, "grad_norm": 0.5856985449790955, "learning_rate": 1.3951187886974416e-05, "loss": 1.7038, "step": 13645 }, { "epoch": 0.7606042026642885, "grad_norm": 0.5552805662155151, "learning_rate": 1.3945018180995234e-05, "loss": 1.6561, "step": 13646 }, { "epoch": 0.7606599409174516, "grad_norm": 0.5644158720970154, "learning_rate": 1.3938849618469868e-05, "loss": 1.6025, "step": 13647 }, { "epoch": 0.7607156791706148, "grad_norm": 0.5574057698249817, "learning_rate": 1.3932682199593933e-05, "loss": 1.7453, "step": 13648 }, { "epoch": 0.760771417423778, "grad_norm": 0.5566650629043579, "learning_rate": 1.3926515924563027e-05, "loss": 1.6144, "step": 13649 }, { "epoch": 0.760827155676941, "grad_norm": 0.5857501029968262, "learning_rate": 1.3920350793572717e-05, "loss": 1.6279, "step": 13650 }, { "epoch": 0.7608828939301042, "grad_norm": 0.5910730361938477, "learning_rate": 1.391418680681852e-05, "loss": 1.6013, "step": 13651 }, { "epoch": 0.7609386321832674, "grad_norm": 0.6606738567352295, "learning_rate": 1.3908023964495937e-05, "loss": 1.7508, "step": 13652 }, { "epoch": 0.7609943704364305, "grad_norm": 0.5536946058273315, "learning_rate": 1.390186226680037e-05, "loss": 1.7573, "step": 13653 }, { "epoch": 0.7610501086895937, "grad_norm": 0.5876284837722778, "learning_rate": 1.38957017139273e-05, "loss": 1.8013, "step": 13654 }, { "epoch": 0.7611058469427568, "grad_norm": 0.5489315390586853, "learning_rate": 1.3889542306072052e-05, "loss": 1.5425, "step": 13655 }, { "epoch": 0.7611615851959199, "grad_norm": 0.6121096014976501, "learning_rate": 1.388338404342998e-05, "loss": 1.694, "step": 13656 }, { "epoch": 0.7612173234490831, "grad_norm": 0.5223791599273682, "learning_rate": 1.3877226926196397e-05, "loss": 1.6321, "step": 13657 }, { "epoch": 0.7612730617022463, "grad_norm": 0.5644776225090027, "learning_rate": 1.3871070954566561e-05, "loss": 1.7296, "step": 13658 }, { "epoch": 0.7613287999554094, "grad_norm": 0.5516535639762878, "learning_rate": 1.3864916128735727e-05, "loss": 1.5889, "step": 13659 }, { "epoch": 0.7613845382085725, "grad_norm": 0.5555848479270935, "learning_rate": 1.3858762448899037e-05, "loss": 1.6957, "step": 13660 }, { "epoch": 0.7614402764617357, "grad_norm": 0.559370756149292, "learning_rate": 1.3852609915251719e-05, "loss": 1.504, "step": 13661 }, { "epoch": 0.7614960147148988, "grad_norm": 0.5376693606376648, "learning_rate": 1.3846458527988842e-05, "loss": 1.5899, "step": 13662 }, { "epoch": 0.761551752968062, "grad_norm": 0.5808365345001221, "learning_rate": 1.3840308287305509e-05, "loss": 1.8645, "step": 13663 }, { "epoch": 0.7616074912212252, "grad_norm": 0.5773041844367981, "learning_rate": 1.3834159193396778e-05, "loss": 1.7324, "step": 13664 }, { "epoch": 0.7616632294743882, "grad_norm": 0.6116316914558411, "learning_rate": 1.382801124645765e-05, "loss": 1.8378, "step": 13665 }, { "epoch": 0.7617189677275514, "grad_norm": 0.5963553786277771, "learning_rate": 1.3821864446683125e-05, "loss": 1.7662, "step": 13666 }, { "epoch": 0.7617747059807146, "grad_norm": 0.5584465265274048, "learning_rate": 1.3815718794268112e-05, "loss": 1.5952, "step": 13667 }, { "epoch": 0.7618304442338777, "grad_norm": 0.5512256622314453, "learning_rate": 1.3809574289407529e-05, "loss": 1.5949, "step": 13668 }, { "epoch": 0.7618861824870409, "grad_norm": 0.5420078635215759, "learning_rate": 1.3803430932296247e-05, "loss": 1.7301, "step": 13669 }, { "epoch": 0.761941920740204, "grad_norm": 0.5526279211044312, "learning_rate": 1.3797288723129093e-05, "loss": 1.6325, "step": 13670 }, { "epoch": 0.7619976589933671, "grad_norm": 0.5725477337837219, "learning_rate": 1.3791147662100867e-05, "loss": 1.7894, "step": 13671 }, { "epoch": 0.7620533972465303, "grad_norm": 0.5640320181846619, "learning_rate": 1.378500774940632e-05, "loss": 1.6614, "step": 13672 }, { "epoch": 0.7621091354996934, "grad_norm": 0.5445780754089355, "learning_rate": 1.3778868985240195e-05, "loss": 1.2925, "step": 13673 }, { "epoch": 0.7621648737528566, "grad_norm": 0.5337774753570557, "learning_rate": 1.3772731369797154e-05, "loss": 1.4987, "step": 13674 }, { "epoch": 0.7622206120060198, "grad_norm": 0.5738458633422852, "learning_rate": 1.3766594903271845e-05, "loss": 1.6826, "step": 13675 }, { "epoch": 0.7622763502591828, "grad_norm": 0.563511312007904, "learning_rate": 1.3760459585858897e-05, "loss": 1.7222, "step": 13676 }, { "epoch": 0.762332088512346, "grad_norm": 0.5649859309196472, "learning_rate": 1.3754325417752878e-05, "loss": 1.6004, "step": 13677 }, { "epoch": 0.7623878267655092, "grad_norm": 0.5031634569168091, "learning_rate": 1.374819239914834e-05, "loss": 1.3402, "step": 13678 }, { "epoch": 0.7624435650186723, "grad_norm": 0.6033832430839539, "learning_rate": 1.3742060530239753e-05, "loss": 1.7393, "step": 13679 }, { "epoch": 0.7624993032718355, "grad_norm": 0.5838034152984619, "learning_rate": 1.3735929811221637e-05, "loss": 1.7233, "step": 13680 }, { "epoch": 0.7625550415249986, "grad_norm": 0.5297046303749084, "learning_rate": 1.372980024228837e-05, "loss": 1.3896, "step": 13681 }, { "epoch": 0.7626107797781617, "grad_norm": 0.6580976843833923, "learning_rate": 1.3723671823634376e-05, "loss": 1.8186, "step": 13682 }, { "epoch": 0.7626665180313249, "grad_norm": 0.7035778164863586, "learning_rate": 1.3717544555454009e-05, "loss": 1.721, "step": 13683 }, { "epoch": 0.7627222562844881, "grad_norm": 0.5296900272369385, "learning_rate": 1.3711418437941582e-05, "loss": 1.462, "step": 13684 }, { "epoch": 0.7627779945376512, "grad_norm": 0.5588696002960205, "learning_rate": 1.3705293471291403e-05, "loss": 1.5209, "step": 13685 }, { "epoch": 0.7628337327908143, "grad_norm": 0.5136246085166931, "learning_rate": 1.3699169655697669e-05, "loss": 1.5621, "step": 13686 }, { "epoch": 0.7628894710439775, "grad_norm": 0.560178279876709, "learning_rate": 1.3693046991354658e-05, "loss": 1.7407, "step": 13687 }, { "epoch": 0.7629452092971406, "grad_norm": 0.5490294694900513, "learning_rate": 1.3686925478456497e-05, "loss": 1.6656, "step": 13688 }, { "epoch": 0.7630009475503038, "grad_norm": 0.5643256902694702, "learning_rate": 1.3680805117197344e-05, "loss": 1.4874, "step": 13689 }, { "epoch": 0.763056685803467, "grad_norm": 0.5297697186470032, "learning_rate": 1.367468590777129e-05, "loss": 1.6193, "step": 13690 }, { "epoch": 0.76311242405663, "grad_norm": 0.5320075750350952, "learning_rate": 1.366856785037241e-05, "loss": 1.5072, "step": 13691 }, { "epoch": 0.7631681623097932, "grad_norm": 0.5761438012123108, "learning_rate": 1.3662450945194743e-05, "loss": 1.5143, "step": 13692 }, { "epoch": 0.7632239005629563, "grad_norm": 0.5583884119987488, "learning_rate": 1.3656335192432258e-05, "loss": 1.6937, "step": 13693 }, { "epoch": 0.7632796388161195, "grad_norm": 0.5506449937820435, "learning_rate": 1.3650220592278923e-05, "loss": 1.6081, "step": 13694 }, { "epoch": 0.7633353770692827, "grad_norm": 0.5765452980995178, "learning_rate": 1.3644107144928658e-05, "loss": 1.5314, "step": 13695 }, { "epoch": 0.7633911153224457, "grad_norm": 0.6005212068557739, "learning_rate": 1.3637994850575341e-05, "loss": 1.6142, "step": 13696 }, { "epoch": 0.7634468535756089, "grad_norm": 0.5738573670387268, "learning_rate": 1.3631883709412823e-05, "loss": 1.683, "step": 13697 }, { "epoch": 0.7635025918287721, "grad_norm": 0.5588680505752563, "learning_rate": 1.3625773721634915e-05, "loss": 1.6197, "step": 13698 }, { "epoch": 0.7635583300819352, "grad_norm": 0.5157375931739807, "learning_rate": 1.3619664887435402e-05, "loss": 1.6233, "step": 13699 }, { "epoch": 0.7636140683350984, "grad_norm": 0.5695037245750427, "learning_rate": 1.3613557207007988e-05, "loss": 1.5264, "step": 13700 }, { "epoch": 0.7636698065882616, "grad_norm": 0.5643973350524902, "learning_rate": 1.3607450680546397e-05, "loss": 1.5529, "step": 13701 }, { "epoch": 0.7637255448414246, "grad_norm": 0.5982683897018433, "learning_rate": 1.3601345308244284e-05, "loss": 1.8334, "step": 13702 }, { "epoch": 0.7637812830945878, "grad_norm": 0.5559334754943848, "learning_rate": 1.359524109029528e-05, "loss": 1.3872, "step": 13703 }, { "epoch": 0.763837021347751, "grad_norm": 0.591163694858551, "learning_rate": 1.3589138026892988e-05, "loss": 1.7196, "step": 13704 }, { "epoch": 0.7638927596009141, "grad_norm": 0.5622092485427856, "learning_rate": 1.3583036118230924e-05, "loss": 1.7068, "step": 13705 }, { "epoch": 0.7639484978540773, "grad_norm": 0.5617137551307678, "learning_rate": 1.3576935364502653e-05, "loss": 1.6201, "step": 13706 }, { "epoch": 0.7640042361072404, "grad_norm": 0.6120706796646118, "learning_rate": 1.3570835765901612e-05, "loss": 1.697, "step": 13707 }, { "epoch": 0.7640599743604035, "grad_norm": 0.562481164932251, "learning_rate": 1.3564737322621274e-05, "loss": 1.6438, "step": 13708 }, { "epoch": 0.7641157126135667, "grad_norm": 0.5552496314048767, "learning_rate": 1.355864003485503e-05, "loss": 1.7532, "step": 13709 }, { "epoch": 0.7641714508667299, "grad_norm": 0.5245192646980286, "learning_rate": 1.3552543902796256e-05, "loss": 1.3265, "step": 13710 }, { "epoch": 0.764227189119893, "grad_norm": 0.5369590520858765, "learning_rate": 1.35464489266383e-05, "loss": 1.509, "step": 13711 }, { "epoch": 0.7642829273730561, "grad_norm": 0.5913751721382141, "learning_rate": 1.3540355106574416e-05, "loss": 1.663, "step": 13712 }, { "epoch": 0.7643386656262193, "grad_norm": 0.5683638453483582, "learning_rate": 1.3534262442797923e-05, "loss": 1.6732, "step": 13713 }, { "epoch": 0.7643944038793824, "grad_norm": 0.5665015578269958, "learning_rate": 1.3528170935502005e-05, "loss": 1.5886, "step": 13714 }, { "epoch": 0.7644501421325456, "grad_norm": 0.528668224811554, "learning_rate": 1.3522080584879854e-05, "loss": 1.5023, "step": 13715 }, { "epoch": 0.7645058803857087, "grad_norm": 0.5553814172744751, "learning_rate": 1.3515991391124627e-05, "loss": 1.5308, "step": 13716 }, { "epoch": 0.7645616186388718, "grad_norm": 0.6319010257720947, "learning_rate": 1.3509903354429437e-05, "loss": 1.7049, "step": 13717 }, { "epoch": 0.764617356892035, "grad_norm": 0.5890353918075562, "learning_rate": 1.3503816474987379e-05, "loss": 1.6599, "step": 13718 }, { "epoch": 0.7646730951451981, "grad_norm": 0.5836519598960876, "learning_rate": 1.3497730752991455e-05, "loss": 1.8447, "step": 13719 }, { "epoch": 0.7647288333983613, "grad_norm": 0.5459491610527039, "learning_rate": 1.3491646188634689e-05, "loss": 1.6414, "step": 13720 }, { "epoch": 0.7647845716515245, "grad_norm": 0.5694407224655151, "learning_rate": 1.348556278211005e-05, "loss": 1.5857, "step": 13721 }, { "epoch": 0.7648403099046875, "grad_norm": 0.5732302069664001, "learning_rate": 1.3479480533610468e-05, "loss": 1.7178, "step": 13722 }, { "epoch": 0.7648960481578507, "grad_norm": 0.6317426562309265, "learning_rate": 1.3473399443328826e-05, "loss": 1.864, "step": 13723 }, { "epoch": 0.7649517864110139, "grad_norm": 0.5422190427780151, "learning_rate": 1.3467319511457993e-05, "loss": 1.6681, "step": 13724 }, { "epoch": 0.765007524664177, "grad_norm": 0.5311571955680847, "learning_rate": 1.34612407381908e-05, "loss": 1.5217, "step": 13725 }, { "epoch": 0.7650632629173402, "grad_norm": 0.5102006196975708, "learning_rate": 1.3455163123719999e-05, "loss": 1.3252, "step": 13726 }, { "epoch": 0.7651190011705034, "grad_norm": 0.5632702112197876, "learning_rate": 1.344908666823836e-05, "loss": 1.5403, "step": 13727 }, { "epoch": 0.7651747394236664, "grad_norm": 0.5720388293266296, "learning_rate": 1.3443011371938574e-05, "loss": 1.7533, "step": 13728 }, { "epoch": 0.7652304776768296, "grad_norm": 0.5603064298629761, "learning_rate": 1.3436937235013331e-05, "loss": 1.7345, "step": 13729 }, { "epoch": 0.7652862159299928, "grad_norm": 0.5317055583000183, "learning_rate": 1.3430864257655273e-05, "loss": 1.5839, "step": 13730 }, { "epoch": 0.7653419541831559, "grad_norm": 0.5410267114639282, "learning_rate": 1.3424792440056966e-05, "loss": 1.4791, "step": 13731 }, { "epoch": 0.765397692436319, "grad_norm": 0.5275070071220398, "learning_rate": 1.3418721782411015e-05, "loss": 1.5329, "step": 13732 }, { "epoch": 0.7654534306894822, "grad_norm": 0.5779644250869751, "learning_rate": 1.3412652284909916e-05, "loss": 1.7906, "step": 13733 }, { "epoch": 0.7655091689426453, "grad_norm": 0.6660231947898865, "learning_rate": 1.3406583947746166e-05, "loss": 1.952, "step": 13734 }, { "epoch": 0.7655649071958085, "grad_norm": 0.5571669936180115, "learning_rate": 1.340051677111222e-05, "loss": 1.6007, "step": 13735 }, { "epoch": 0.7656206454489717, "grad_norm": 0.5619083046913147, "learning_rate": 1.3394450755200488e-05, "loss": 1.6623, "step": 13736 }, { "epoch": 0.7656763837021348, "grad_norm": 0.5739771127700806, "learning_rate": 1.3388385900203371e-05, "loss": 1.7574, "step": 13737 }, { "epoch": 0.7657321219552979, "grad_norm": 0.5774732232093811, "learning_rate": 1.3382322206313164e-05, "loss": 1.5834, "step": 13738 }, { "epoch": 0.765787860208461, "grad_norm": 0.5748267769813538, "learning_rate": 1.337625967372223e-05, "loss": 1.7372, "step": 13739 }, { "epoch": 0.7658435984616242, "grad_norm": 0.5925459265708923, "learning_rate": 1.3370198302622794e-05, "loss": 1.8107, "step": 13740 }, { "epoch": 0.7658993367147874, "grad_norm": 0.5471937656402588, "learning_rate": 1.3364138093207096e-05, "loss": 1.4694, "step": 13741 }, { "epoch": 0.7659550749679505, "grad_norm": 0.6107663512229919, "learning_rate": 1.3358079045667338e-05, "loss": 1.6048, "step": 13742 }, { "epoch": 0.7660108132211136, "grad_norm": 0.5694422125816345, "learning_rate": 1.3352021160195676e-05, "loss": 1.5999, "step": 13743 }, { "epoch": 0.7660665514742768, "grad_norm": 0.5657966732978821, "learning_rate": 1.3345964436984249e-05, "loss": 1.5563, "step": 13744 }, { "epoch": 0.7661222897274399, "grad_norm": 0.5410760045051575, "learning_rate": 1.3339908876225105e-05, "loss": 1.4062, "step": 13745 }, { "epoch": 0.7661780279806031, "grad_norm": 0.6214928030967712, "learning_rate": 1.3333854478110309e-05, "loss": 1.5772, "step": 13746 }, { "epoch": 0.7662337662337663, "grad_norm": 0.6026737689971924, "learning_rate": 1.3327801242831867e-05, "loss": 1.7012, "step": 13747 }, { "epoch": 0.7662895044869293, "grad_norm": 0.5919846892356873, "learning_rate": 1.332174917058176e-05, "loss": 1.5764, "step": 13748 }, { "epoch": 0.7663452427400925, "grad_norm": 0.5703722238540649, "learning_rate": 1.3315698261551917e-05, "loss": 1.4723, "step": 13749 }, { "epoch": 0.7664009809932557, "grad_norm": 0.5685303807258606, "learning_rate": 1.3309648515934241e-05, "loss": 1.6053, "step": 13750 }, { "epoch": 0.7664567192464188, "grad_norm": 0.5829964876174927, "learning_rate": 1.3303599933920613e-05, "loss": 1.5209, "step": 13751 }, { "epoch": 0.766512457499582, "grad_norm": 0.5797625184059143, "learning_rate": 1.3297552515702822e-05, "loss": 1.8081, "step": 13752 }, { "epoch": 0.7665681957527452, "grad_norm": 0.6179783940315247, "learning_rate": 1.3291506261472675e-05, "loss": 1.754, "step": 13753 }, { "epoch": 0.7666239340059082, "grad_norm": 0.5700926184654236, "learning_rate": 1.3285461171421925e-05, "loss": 1.622, "step": 13754 }, { "epoch": 0.7666796722590714, "grad_norm": 0.5579239130020142, "learning_rate": 1.3279417245742286e-05, "loss": 1.6621, "step": 13755 }, { "epoch": 0.7667354105122346, "grad_norm": 0.5812460780143738, "learning_rate": 1.327337448462545e-05, "loss": 1.6559, "step": 13756 }, { "epoch": 0.7667911487653977, "grad_norm": 0.5232528448104858, "learning_rate": 1.3267332888263013e-05, "loss": 1.631, "step": 13757 }, { "epoch": 0.7668468870185609, "grad_norm": 0.5652537941932678, "learning_rate": 1.3261292456846647e-05, "loss": 1.6011, "step": 13758 }, { "epoch": 0.766902625271724, "grad_norm": 0.5638788938522339, "learning_rate": 1.3255253190567863e-05, "loss": 1.7915, "step": 13759 }, { "epoch": 0.7669583635248871, "grad_norm": 0.5904683470726013, "learning_rate": 1.3249215089618211e-05, "loss": 1.6165, "step": 13760 }, { "epoch": 0.7670141017780503, "grad_norm": 0.5620837211608887, "learning_rate": 1.3243178154189184e-05, "loss": 1.5416, "step": 13761 }, { "epoch": 0.7670698400312134, "grad_norm": 0.5649104118347168, "learning_rate": 1.323714238447224e-05, "loss": 1.678, "step": 13762 }, { "epoch": 0.7671255782843766, "grad_norm": 0.6296602487564087, "learning_rate": 1.3231107780658814e-05, "loss": 1.6428, "step": 13763 }, { "epoch": 0.7671813165375397, "grad_norm": 0.5722455978393555, "learning_rate": 1.3225074342940235e-05, "loss": 1.6772, "step": 13764 }, { "epoch": 0.7672370547907028, "grad_norm": 0.5544499158859253, "learning_rate": 1.321904207150792e-05, "loss": 1.4968, "step": 13765 }, { "epoch": 0.767292793043866, "grad_norm": 0.5880872011184692, "learning_rate": 1.321301096655313e-05, "loss": 1.67, "step": 13766 }, { "epoch": 0.7673485312970292, "grad_norm": 0.5740914940834045, "learning_rate": 1.3206981028267145e-05, "loss": 1.6711, "step": 13767 }, { "epoch": 0.7674042695501923, "grad_norm": 0.5627743601799011, "learning_rate": 1.3200952256841204e-05, "loss": 1.7168, "step": 13768 }, { "epoch": 0.7674600078033554, "grad_norm": 0.5852112174034119, "learning_rate": 1.3194924652466507e-05, "loss": 1.5528, "step": 13769 }, { "epoch": 0.7675157460565186, "grad_norm": 0.5481190085411072, "learning_rate": 1.3188898215334228e-05, "loss": 1.668, "step": 13770 }, { "epoch": 0.7675714843096817, "grad_norm": 0.5531885027885437, "learning_rate": 1.3182872945635455e-05, "loss": 1.5727, "step": 13771 }, { "epoch": 0.7676272225628449, "grad_norm": 0.5442955493927002, "learning_rate": 1.317684884356129e-05, "loss": 1.6687, "step": 13772 }, { "epoch": 0.7676829608160081, "grad_norm": 0.5647032260894775, "learning_rate": 1.3170825909302792e-05, "loss": 1.7627, "step": 13773 }, { "epoch": 0.7677386990691711, "grad_norm": 0.5629161596298218, "learning_rate": 1.3164804143050963e-05, "loss": 1.4726, "step": 13774 }, { "epoch": 0.7677944373223343, "grad_norm": 0.5685316920280457, "learning_rate": 1.3158783544996789e-05, "loss": 1.4011, "step": 13775 }, { "epoch": 0.7678501755754975, "grad_norm": 0.5748550295829773, "learning_rate": 1.3152764115331195e-05, "loss": 1.5873, "step": 13776 }, { "epoch": 0.7679059138286606, "grad_norm": 0.5731246471405029, "learning_rate": 1.3146745854245108e-05, "loss": 1.5637, "step": 13777 }, { "epoch": 0.7679616520818238, "grad_norm": 0.5486955046653748, "learning_rate": 1.3140728761929356e-05, "loss": 1.6605, "step": 13778 }, { "epoch": 0.768017390334987, "grad_norm": 0.5804146528244019, "learning_rate": 1.313471283857478e-05, "loss": 1.7208, "step": 13779 }, { "epoch": 0.76807312858815, "grad_norm": 0.5566115379333496, "learning_rate": 1.3128698084372182e-05, "loss": 1.8526, "step": 13780 }, { "epoch": 0.7681288668413132, "grad_norm": 0.5542247295379639, "learning_rate": 1.31226844995123e-05, "loss": 1.4398, "step": 13781 }, { "epoch": 0.7681846050944764, "grad_norm": 0.556767463684082, "learning_rate": 1.3116672084185872e-05, "loss": 1.6531, "step": 13782 }, { "epoch": 0.7682403433476395, "grad_norm": 0.5431240797042847, "learning_rate": 1.3110660838583533e-05, "loss": 1.6007, "step": 13783 }, { "epoch": 0.7682960816008026, "grad_norm": 0.5814715027809143, "learning_rate": 1.3104650762895975e-05, "loss": 1.4798, "step": 13784 }, { "epoch": 0.7683518198539657, "grad_norm": 0.5413219332695007, "learning_rate": 1.3098641857313777e-05, "loss": 1.5713, "step": 13785 }, { "epoch": 0.7684075581071289, "grad_norm": 0.6077486872673035, "learning_rate": 1.3092634122027497e-05, "loss": 1.7747, "step": 13786 }, { "epoch": 0.7684632963602921, "grad_norm": 0.5583086609840393, "learning_rate": 1.3086627557227687e-05, "loss": 1.6071, "step": 13787 }, { "epoch": 0.7685190346134552, "grad_norm": 0.5888667702674866, "learning_rate": 1.3080622163104827e-05, "loss": 1.823, "step": 13788 }, { "epoch": 0.7685747728666183, "grad_norm": 0.5727972984313965, "learning_rate": 1.3074617939849393e-05, "loss": 1.5356, "step": 13789 }, { "epoch": 0.7686305111197815, "grad_norm": 0.5865001082420349, "learning_rate": 1.3068614887651759e-05, "loss": 1.6255, "step": 13790 }, { "epoch": 0.7686862493729446, "grad_norm": 0.578157901763916, "learning_rate": 1.3062613006702361e-05, "loss": 1.7089, "step": 13791 }, { "epoch": 0.7687419876261078, "grad_norm": 0.5981795191764832, "learning_rate": 1.3056612297191505e-05, "loss": 1.9307, "step": 13792 }, { "epoch": 0.768797725879271, "grad_norm": 0.58543461561203, "learning_rate": 1.3050612759309515e-05, "loss": 1.5325, "step": 13793 }, { "epoch": 0.768853464132434, "grad_norm": 0.5604169964790344, "learning_rate": 1.3044614393246662e-05, "loss": 1.5726, "step": 13794 }, { "epoch": 0.7689092023855972, "grad_norm": 0.5601847767829895, "learning_rate": 1.3038617199193171e-05, "loss": 1.5083, "step": 13795 }, { "epoch": 0.7689649406387604, "grad_norm": 0.552564799785614, "learning_rate": 1.3032621177339255e-05, "loss": 1.6135, "step": 13796 }, { "epoch": 0.7690206788919235, "grad_norm": 0.5546259880065918, "learning_rate": 1.3026626327875052e-05, "loss": 1.5547, "step": 13797 }, { "epoch": 0.7690764171450867, "grad_norm": 0.540576696395874, "learning_rate": 1.3020632650990688e-05, "loss": 1.5781, "step": 13798 }, { "epoch": 0.7691321553982499, "grad_norm": 0.5935271382331848, "learning_rate": 1.301464014687625e-05, "loss": 1.7391, "step": 13799 }, { "epoch": 0.7691878936514129, "grad_norm": 0.5803846120834351, "learning_rate": 1.300864881572179e-05, "loss": 1.6353, "step": 13800 }, { "epoch": 0.7692436319045761, "grad_norm": 0.5862022042274475, "learning_rate": 1.3002658657717314e-05, "loss": 1.7033, "step": 13801 }, { "epoch": 0.7692993701577393, "grad_norm": 0.6239582896232605, "learning_rate": 1.2996669673052797e-05, "loss": 1.7809, "step": 13802 }, { "epoch": 0.7693551084109024, "grad_norm": 0.5111715793609619, "learning_rate": 1.2990681861918186e-05, "loss": 1.3578, "step": 13803 }, { "epoch": 0.7694108466640656, "grad_norm": 0.5416402220726013, "learning_rate": 1.2984695224503351e-05, "loss": 1.607, "step": 13804 }, { "epoch": 0.7694665849172287, "grad_norm": 0.5554835796356201, "learning_rate": 1.2978709760998176e-05, "loss": 1.5583, "step": 13805 }, { "epoch": 0.7695223231703918, "grad_norm": 0.5633331537246704, "learning_rate": 1.2972725471592473e-05, "loss": 1.6499, "step": 13806 }, { "epoch": 0.769578061423555, "grad_norm": 0.5715453028678894, "learning_rate": 1.2966742356476036e-05, "loss": 1.8379, "step": 13807 }, { "epoch": 0.7696337996767181, "grad_norm": 0.5345661044120789, "learning_rate": 1.2960760415838625e-05, "loss": 1.4554, "step": 13808 }, { "epoch": 0.7696895379298813, "grad_norm": 0.5594824552536011, "learning_rate": 1.2954779649869914e-05, "loss": 1.6364, "step": 13809 }, { "epoch": 0.7697452761830444, "grad_norm": 0.6407233476638794, "learning_rate": 1.294880005875963e-05, "loss": 1.8723, "step": 13810 }, { "epoch": 0.7698010144362075, "grad_norm": 0.5817638635635376, "learning_rate": 1.2942821642697372e-05, "loss": 1.7793, "step": 13811 }, { "epoch": 0.7698567526893707, "grad_norm": 0.5345514416694641, "learning_rate": 1.293684440187275e-05, "loss": 1.3296, "step": 13812 }, { "epoch": 0.7699124909425339, "grad_norm": 0.6158250570297241, "learning_rate": 1.2930868336475332e-05, "loss": 1.8778, "step": 13813 }, { "epoch": 0.769968229195697, "grad_norm": 0.6545181274414062, "learning_rate": 1.2924893446694647e-05, "loss": 1.8938, "step": 13814 }, { "epoch": 0.7700239674488601, "grad_norm": 0.6429218649864197, "learning_rate": 1.2918919732720186e-05, "loss": 2.1305, "step": 13815 }, { "epoch": 0.7700797057020233, "grad_norm": 0.6089257001876831, "learning_rate": 1.291294719474137e-05, "loss": 1.7058, "step": 13816 }, { "epoch": 0.7701354439551864, "grad_norm": 0.5691222548484802, "learning_rate": 1.2906975832947665e-05, "loss": 1.7782, "step": 13817 }, { "epoch": 0.7701911822083496, "grad_norm": 0.5711841583251953, "learning_rate": 1.2901005647528402e-05, "loss": 1.4945, "step": 13818 }, { "epoch": 0.7702469204615128, "grad_norm": 0.5910068154335022, "learning_rate": 1.2895036638672937e-05, "loss": 1.7357, "step": 13819 }, { "epoch": 0.7703026587146758, "grad_norm": 0.5314319133758545, "learning_rate": 1.2889068806570575e-05, "loss": 1.5099, "step": 13820 }, { "epoch": 0.770358396967839, "grad_norm": 0.5431066155433655, "learning_rate": 1.288310215141058e-05, "loss": 1.6854, "step": 13821 }, { "epoch": 0.7704141352210022, "grad_norm": 0.6121734976768494, "learning_rate": 1.287713667338219e-05, "loss": 1.9443, "step": 13822 }, { "epoch": 0.7704698734741653, "grad_norm": 0.5128597617149353, "learning_rate": 1.2871172372674573e-05, "loss": 1.3639, "step": 13823 }, { "epoch": 0.7705256117273285, "grad_norm": 0.5147601962089539, "learning_rate": 1.286520924947689e-05, "loss": 1.3894, "step": 13824 }, { "epoch": 0.7705813499804917, "grad_norm": 0.5614168047904968, "learning_rate": 1.2859247303978255e-05, "loss": 1.6414, "step": 13825 }, { "epoch": 0.7706370882336547, "grad_norm": 0.540399968624115, "learning_rate": 1.2853286536367753e-05, "loss": 1.5018, "step": 13826 }, { "epoch": 0.7706928264868179, "grad_norm": 0.5493924021720886, "learning_rate": 1.2847326946834426e-05, "loss": 1.8156, "step": 13827 }, { "epoch": 0.7707485647399811, "grad_norm": 0.5494512915611267, "learning_rate": 1.2841368535567267e-05, "loss": 1.5758, "step": 13828 }, { "epoch": 0.7708043029931442, "grad_norm": 0.566554605960846, "learning_rate": 1.2835411302755262e-05, "loss": 1.4204, "step": 13829 }, { "epoch": 0.7708600412463074, "grad_norm": 0.5874374508857727, "learning_rate": 1.2829455248587319e-05, "loss": 1.7024, "step": 13830 }, { "epoch": 0.7709157794994704, "grad_norm": 0.5894142389297485, "learning_rate": 1.2823500373252329e-05, "loss": 1.534, "step": 13831 }, { "epoch": 0.7709715177526336, "grad_norm": 0.5818924903869629, "learning_rate": 1.2817546676939158e-05, "loss": 1.7682, "step": 13832 }, { "epoch": 0.7710272560057968, "grad_norm": 0.5268850326538086, "learning_rate": 1.281159415983661e-05, "loss": 1.4134, "step": 13833 }, { "epoch": 0.7710829942589599, "grad_norm": 0.5993547439575195, "learning_rate": 1.2805642822133478e-05, "loss": 1.5439, "step": 13834 }, { "epoch": 0.7711387325121231, "grad_norm": 0.5826319456100464, "learning_rate": 1.2799692664018498e-05, "loss": 1.7694, "step": 13835 }, { "epoch": 0.7711944707652862, "grad_norm": 0.5974748134613037, "learning_rate": 1.2793743685680388e-05, "loss": 1.7029, "step": 13836 }, { "epoch": 0.7712502090184493, "grad_norm": 0.5868716835975647, "learning_rate": 1.2787795887307784e-05, "loss": 1.8878, "step": 13837 }, { "epoch": 0.7713059472716125, "grad_norm": 0.5850960612297058, "learning_rate": 1.278184926908933e-05, "loss": 1.7172, "step": 13838 }, { "epoch": 0.7713616855247757, "grad_norm": 0.5551589131355286, "learning_rate": 1.2775903831213625e-05, "loss": 1.6341, "step": 13839 }, { "epoch": 0.7714174237779388, "grad_norm": 0.5528069138526917, "learning_rate": 1.2769959573869217e-05, "loss": 1.7551, "step": 13840 }, { "epoch": 0.7714731620311019, "grad_norm": 0.5707437992095947, "learning_rate": 1.2764016497244641e-05, "loss": 1.6185, "step": 13841 }, { "epoch": 0.7715289002842651, "grad_norm": 0.6060401797294617, "learning_rate": 1.275807460152833e-05, "loss": 1.7512, "step": 13842 }, { "epoch": 0.7715846385374282, "grad_norm": 0.6141118407249451, "learning_rate": 1.275213388690879e-05, "loss": 1.6764, "step": 13843 }, { "epoch": 0.7716403767905914, "grad_norm": 0.5402005910873413, "learning_rate": 1.2746194353574375e-05, "loss": 1.5101, "step": 13844 }, { "epoch": 0.7716961150437546, "grad_norm": 0.503443717956543, "learning_rate": 1.2740256001713468e-05, "loss": 1.3814, "step": 13845 }, { "epoch": 0.7717518532969176, "grad_norm": 0.5166171789169312, "learning_rate": 1.2734318831514408e-05, "loss": 1.1882, "step": 13846 }, { "epoch": 0.7718075915500808, "grad_norm": 0.528704047203064, "learning_rate": 1.2728382843165477e-05, "loss": 1.615, "step": 13847 }, { "epoch": 0.771863329803244, "grad_norm": 0.5474404096603394, "learning_rate": 1.272244803685495e-05, "loss": 1.6113, "step": 13848 }, { "epoch": 0.7719190680564071, "grad_norm": 0.5809311866760254, "learning_rate": 1.2716514412771008e-05, "loss": 1.5951, "step": 13849 }, { "epoch": 0.7719748063095703, "grad_norm": 0.5877264142036438, "learning_rate": 1.2710581971101854e-05, "loss": 1.5873, "step": 13850 }, { "epoch": 0.7720305445627335, "grad_norm": 0.6127954125404358, "learning_rate": 1.2704650712035632e-05, "loss": 1.7727, "step": 13851 }, { "epoch": 0.7720862828158965, "grad_norm": 0.5746996402740479, "learning_rate": 1.2698720635760435e-05, "loss": 1.7201, "step": 13852 }, { "epoch": 0.7721420210690597, "grad_norm": 0.5272437930107117, "learning_rate": 1.2692791742464343e-05, "loss": 1.5452, "step": 13853 }, { "epoch": 0.7721977593222228, "grad_norm": 0.5763612985610962, "learning_rate": 1.2686864032335376e-05, "loss": 1.7422, "step": 13854 }, { "epoch": 0.772253497575386, "grad_norm": 0.5544466376304626, "learning_rate": 1.2680937505561552e-05, "loss": 1.5116, "step": 13855 }, { "epoch": 0.7723092358285492, "grad_norm": 0.5890754461288452, "learning_rate": 1.267501216233079e-05, "loss": 1.7639, "step": 13856 }, { "epoch": 0.7723649740817122, "grad_norm": 0.5503895282745361, "learning_rate": 1.266908800283102e-05, "loss": 1.7836, "step": 13857 }, { "epoch": 0.7724207123348754, "grad_norm": 0.5393791198730469, "learning_rate": 1.2663165027250124e-05, "loss": 1.5314, "step": 13858 }, { "epoch": 0.7724764505880386, "grad_norm": 0.6032135486602783, "learning_rate": 1.2657243235775945e-05, "loss": 1.738, "step": 13859 }, { "epoch": 0.7725321888412017, "grad_norm": 0.5893515944480896, "learning_rate": 1.2651322628596285e-05, "loss": 1.8642, "step": 13860 }, { "epoch": 0.7725879270943649, "grad_norm": 0.5650129914283752, "learning_rate": 1.2645403205898914e-05, "loss": 1.7399, "step": 13861 }, { "epoch": 0.772643665347528, "grad_norm": 0.5308829545974731, "learning_rate": 1.2639484967871578e-05, "loss": 1.5345, "step": 13862 }, { "epoch": 0.7726994036006911, "grad_norm": 0.5386495590209961, "learning_rate": 1.2633567914701939e-05, "loss": 1.5579, "step": 13863 }, { "epoch": 0.7727551418538543, "grad_norm": 0.5876171588897705, "learning_rate": 1.2627652046577659e-05, "loss": 1.686, "step": 13864 }, { "epoch": 0.7728108801070175, "grad_norm": 0.5936629176139832, "learning_rate": 1.2621737363686365e-05, "loss": 1.5364, "step": 13865 }, { "epoch": 0.7728666183601806, "grad_norm": 0.5996050238609314, "learning_rate": 1.2615823866215626e-05, "loss": 1.8273, "step": 13866 }, { "epoch": 0.7729223566133437, "grad_norm": 0.6248884797096252, "learning_rate": 1.260991155435301e-05, "loss": 1.6975, "step": 13867 }, { "epoch": 0.7729780948665069, "grad_norm": 0.5924556255340576, "learning_rate": 1.2604000428285967e-05, "loss": 1.6625, "step": 13868 }, { "epoch": 0.77303383311967, "grad_norm": 0.5752199292182922, "learning_rate": 1.2598090488202025e-05, "loss": 1.6419, "step": 13869 }, { "epoch": 0.7730895713728332, "grad_norm": 0.5989288687705994, "learning_rate": 1.2592181734288572e-05, "loss": 1.8036, "step": 13870 }, { "epoch": 0.7731453096259964, "grad_norm": 0.5582593679428101, "learning_rate": 1.2586274166733009e-05, "loss": 1.6237, "step": 13871 }, { "epoch": 0.7732010478791594, "grad_norm": 0.5653522610664368, "learning_rate": 1.2580367785722697e-05, "loss": 1.6326, "step": 13872 }, { "epoch": 0.7732567861323226, "grad_norm": 0.5593123435974121, "learning_rate": 1.257446259144494e-05, "loss": 1.5155, "step": 13873 }, { "epoch": 0.7733125243854858, "grad_norm": 0.5426331162452698, "learning_rate": 1.2568558584087048e-05, "loss": 1.4979, "step": 13874 }, { "epoch": 0.7733682626386489, "grad_norm": 0.5261883735656738, "learning_rate": 1.2562655763836217e-05, "loss": 1.4564, "step": 13875 }, { "epoch": 0.7734240008918121, "grad_norm": 0.5568323731422424, "learning_rate": 1.2556754130879666e-05, "loss": 1.5123, "step": 13876 }, { "epoch": 0.7734797391449751, "grad_norm": 0.5774717926979065, "learning_rate": 1.2550853685404573e-05, "loss": 1.7825, "step": 13877 }, { "epoch": 0.7735354773981383, "grad_norm": 0.5544146299362183, "learning_rate": 1.2544954427598066e-05, "loss": 1.5749, "step": 13878 }, { "epoch": 0.7735912156513015, "grad_norm": 0.5266306400299072, "learning_rate": 1.25390563576472e-05, "loss": 1.6941, "step": 13879 }, { "epoch": 0.7736469539044646, "grad_norm": 0.605617880821228, "learning_rate": 1.253315947573907e-05, "loss": 1.6413, "step": 13880 }, { "epoch": 0.7737026921576278, "grad_norm": 0.5538267493247986, "learning_rate": 1.2527263782060689e-05, "loss": 1.6178, "step": 13881 }, { "epoch": 0.773758430410791, "grad_norm": 0.6117134690284729, "learning_rate": 1.2521369276799e-05, "loss": 1.7388, "step": 13882 }, { "epoch": 0.773814168663954, "grad_norm": 0.6116869449615479, "learning_rate": 1.2515475960140966e-05, "loss": 1.8371, "step": 13883 }, { "epoch": 0.7738699069171172, "grad_norm": 0.5542810559272766, "learning_rate": 1.2509583832273486e-05, "loss": 1.4842, "step": 13884 }, { "epoch": 0.7739256451702804, "grad_norm": 0.6455458402633667, "learning_rate": 1.2503692893383424e-05, "loss": 1.836, "step": 13885 }, { "epoch": 0.7739813834234435, "grad_norm": 0.5932666063308716, "learning_rate": 1.24978031436576e-05, "loss": 1.5868, "step": 13886 }, { "epoch": 0.7740371216766067, "grad_norm": 0.5318253040313721, "learning_rate": 1.2491914583282805e-05, "loss": 1.5224, "step": 13887 }, { "epoch": 0.7740928599297698, "grad_norm": 0.5483694076538086, "learning_rate": 1.2486027212445812e-05, "loss": 1.6178, "step": 13888 }, { "epoch": 0.7741485981829329, "grad_norm": 0.5445347428321838, "learning_rate": 1.2480141031333299e-05, "loss": 1.5006, "step": 13889 }, { "epoch": 0.7742043364360961, "grad_norm": 0.5520989298820496, "learning_rate": 1.2474256040131955e-05, "loss": 1.6473, "step": 13890 }, { "epoch": 0.7742600746892593, "grad_norm": 0.5939954519271851, "learning_rate": 1.246837223902842e-05, "loss": 1.7462, "step": 13891 }, { "epoch": 0.7743158129424224, "grad_norm": 0.5705162286758423, "learning_rate": 1.246248962820929e-05, "loss": 1.5555, "step": 13892 }, { "epoch": 0.7743715511955855, "grad_norm": 0.5511966943740845, "learning_rate": 1.2456608207861147e-05, "loss": 1.4746, "step": 13893 }, { "epoch": 0.7744272894487487, "grad_norm": 0.560956597328186, "learning_rate": 1.2450727978170473e-05, "loss": 1.6586, "step": 13894 }, { "epoch": 0.7744830277019118, "grad_norm": 0.5590883493423462, "learning_rate": 1.2444848939323805e-05, "loss": 1.7492, "step": 13895 }, { "epoch": 0.774538765955075, "grad_norm": 0.5555147528648376, "learning_rate": 1.243897109150755e-05, "loss": 1.5381, "step": 13896 }, { "epoch": 0.7745945042082382, "grad_norm": 0.5215170383453369, "learning_rate": 1.2433094434908143e-05, "loss": 1.4131, "step": 13897 }, { "epoch": 0.7746502424614012, "grad_norm": 0.5432319045066833, "learning_rate": 1.2427218969711945e-05, "loss": 1.3415, "step": 13898 }, { "epoch": 0.7747059807145644, "grad_norm": 0.6142305731773376, "learning_rate": 1.2421344696105298e-05, "loss": 1.6424, "step": 13899 }, { "epoch": 0.7747617189677275, "grad_norm": 0.5684335827827454, "learning_rate": 1.2415471614274515e-05, "loss": 1.8077, "step": 13900 }, { "epoch": 0.7748174572208907, "grad_norm": 0.5752829313278198, "learning_rate": 1.2409599724405807e-05, "loss": 1.6892, "step": 13901 }, { "epoch": 0.7748731954740539, "grad_norm": 0.5764576196670532, "learning_rate": 1.2403729026685462e-05, "loss": 1.722, "step": 13902 }, { "epoch": 0.7749289337272169, "grad_norm": 0.5339565873146057, "learning_rate": 1.2397859521299615e-05, "loss": 1.5116, "step": 13903 }, { "epoch": 0.7749846719803801, "grad_norm": 0.6000561714172363, "learning_rate": 1.2391991208434439e-05, "loss": 1.6784, "step": 13904 }, { "epoch": 0.7750404102335433, "grad_norm": 0.592254638671875, "learning_rate": 1.2386124088276007e-05, "loss": 1.7135, "step": 13905 }, { "epoch": 0.7750961484867064, "grad_norm": 0.5584617853164673, "learning_rate": 1.2380258161010427e-05, "loss": 1.6567, "step": 13906 }, { "epoch": 0.7751518867398696, "grad_norm": 0.5328884124755859, "learning_rate": 1.2374393426823733e-05, "loss": 1.5234, "step": 13907 }, { "epoch": 0.7752076249930328, "grad_norm": 0.5227957367897034, "learning_rate": 1.2368529885901898e-05, "loss": 1.4218, "step": 13908 }, { "epoch": 0.7752633632461958, "grad_norm": 0.5832966566085815, "learning_rate": 1.2362667538430883e-05, "loss": 1.6467, "step": 13909 }, { "epoch": 0.775319101499359, "grad_norm": 0.5582821369171143, "learning_rate": 1.2356806384596614e-05, "loss": 1.5709, "step": 13910 }, { "epoch": 0.7753748397525222, "grad_norm": 0.5849579572677612, "learning_rate": 1.235094642458497e-05, "loss": 1.7064, "step": 13911 }, { "epoch": 0.7754305780056853, "grad_norm": 0.64003586769104, "learning_rate": 1.23450876585818e-05, "loss": 1.7045, "step": 13912 }, { "epoch": 0.7754863162588485, "grad_norm": 0.5383117198944092, "learning_rate": 1.2339230086772907e-05, "loss": 1.5096, "step": 13913 }, { "epoch": 0.7755420545120116, "grad_norm": 0.5988223552703857, "learning_rate": 1.2333373709344065e-05, "loss": 1.8464, "step": 13914 }, { "epoch": 0.7755977927651747, "grad_norm": 0.522731602191925, "learning_rate": 1.2327518526480992e-05, "loss": 1.4544, "step": 13915 }, { "epoch": 0.7756535310183379, "grad_norm": 0.5857813358306885, "learning_rate": 1.2321664538369382e-05, "loss": 1.851, "step": 13916 }, { "epoch": 0.7757092692715011, "grad_norm": 0.560541570186615, "learning_rate": 1.231581174519489e-05, "loss": 1.732, "step": 13917 }, { "epoch": 0.7757650075246642, "grad_norm": 0.564007580280304, "learning_rate": 1.2309960147143134e-05, "loss": 1.6381, "step": 13918 }, { "epoch": 0.7758207457778273, "grad_norm": 0.5471463203430176, "learning_rate": 1.2304109744399716e-05, "loss": 1.5694, "step": 13919 }, { "epoch": 0.7758764840309905, "grad_norm": 0.5344834327697754, "learning_rate": 1.2298260537150119e-05, "loss": 1.7807, "step": 13920 }, { "epoch": 0.7759322222841536, "grad_norm": 0.5642966628074646, "learning_rate": 1.2292412525579916e-05, "loss": 1.7266, "step": 13921 }, { "epoch": 0.7759879605373168, "grad_norm": 0.5271955132484436, "learning_rate": 1.228656570987452e-05, "loss": 1.4124, "step": 13922 }, { "epoch": 0.7760436987904799, "grad_norm": 0.5498999357223511, "learning_rate": 1.2280720090219372e-05, "loss": 1.5466, "step": 13923 }, { "epoch": 0.776099437043643, "grad_norm": 0.595337450504303, "learning_rate": 1.2274875666799867e-05, "loss": 1.8677, "step": 13924 }, { "epoch": 0.7761551752968062, "grad_norm": 0.581565260887146, "learning_rate": 1.2269032439801353e-05, "loss": 1.862, "step": 13925 }, { "epoch": 0.7762109135499693, "grad_norm": 0.623028576374054, "learning_rate": 1.226319040940916e-05, "loss": 1.9658, "step": 13926 }, { "epoch": 0.7762666518031325, "grad_norm": 0.5315784215927124, "learning_rate": 1.2257349575808513e-05, "loss": 1.488, "step": 13927 }, { "epoch": 0.7763223900562957, "grad_norm": 0.5710899233818054, "learning_rate": 1.2251509939184713e-05, "loss": 1.8711, "step": 13928 }, { "epoch": 0.7763781283094587, "grad_norm": 0.5678262710571289, "learning_rate": 1.2245671499722916e-05, "loss": 1.6375, "step": 13929 }, { "epoch": 0.7764338665626219, "grad_norm": 0.6168885231018066, "learning_rate": 1.2239834257608312e-05, "loss": 1.9096, "step": 13930 }, { "epoch": 0.7764896048157851, "grad_norm": 0.6101101636886597, "learning_rate": 1.2233998213025977e-05, "loss": 1.7417, "step": 13931 }, { "epoch": 0.7765453430689482, "grad_norm": 0.5685074329376221, "learning_rate": 1.2228163366161038e-05, "loss": 1.6114, "step": 13932 }, { "epoch": 0.7766010813221114, "grad_norm": 0.5775470733642578, "learning_rate": 1.2222329717198556e-05, "loss": 1.6181, "step": 13933 }, { "epoch": 0.7766568195752745, "grad_norm": 0.53831946849823, "learning_rate": 1.2216497266323495e-05, "loss": 1.6174, "step": 13934 }, { "epoch": 0.7767125578284376, "grad_norm": 0.5419134497642517, "learning_rate": 1.2210666013720845e-05, "loss": 1.5781, "step": 13935 }, { "epoch": 0.7767682960816008, "grad_norm": 0.5791894793510437, "learning_rate": 1.2204835959575545e-05, "loss": 1.6628, "step": 13936 }, { "epoch": 0.776824034334764, "grad_norm": 0.5430119037628174, "learning_rate": 1.219900710407249e-05, "loss": 1.506, "step": 13937 }, { "epoch": 0.7768797725879271, "grad_norm": 0.542615532875061, "learning_rate": 1.219317944739653e-05, "loss": 1.8301, "step": 13938 }, { "epoch": 0.7769355108410902, "grad_norm": 0.5840906500816345, "learning_rate": 1.2187352989732493e-05, "loss": 1.6372, "step": 13939 }, { "epoch": 0.7769912490942534, "grad_norm": 0.5867896676063538, "learning_rate": 1.2181527731265169e-05, "loss": 1.5859, "step": 13940 }, { "epoch": 0.7770469873474165, "grad_norm": 0.5682836771011353, "learning_rate": 1.2175703672179273e-05, "loss": 1.756, "step": 13941 }, { "epoch": 0.7771027256005797, "grad_norm": 0.6265798807144165, "learning_rate": 1.2169880812659529e-05, "loss": 1.682, "step": 13942 }, { "epoch": 0.7771584638537429, "grad_norm": 0.5579496026039124, "learning_rate": 1.21640591528906e-05, "loss": 1.5255, "step": 13943 }, { "epoch": 0.777214202106906, "grad_norm": 0.5834450125694275, "learning_rate": 1.2158238693057112e-05, "loss": 1.724, "step": 13944 }, { "epoch": 0.7772699403600691, "grad_norm": 0.5844115614891052, "learning_rate": 1.2152419433343676e-05, "loss": 1.6637, "step": 13945 }, { "epoch": 0.7773256786132322, "grad_norm": 0.5828450322151184, "learning_rate": 1.2146601373934801e-05, "loss": 1.732, "step": 13946 }, { "epoch": 0.7773814168663954, "grad_norm": 0.5758583545684814, "learning_rate": 1.2140784515015058e-05, "loss": 1.6216, "step": 13947 }, { "epoch": 0.7774371551195586, "grad_norm": 0.6091346740722656, "learning_rate": 1.213496885676888e-05, "loss": 1.9204, "step": 13948 }, { "epoch": 0.7774928933727216, "grad_norm": 0.5350582003593445, "learning_rate": 1.212915439938072e-05, "loss": 1.4757, "step": 13949 }, { "epoch": 0.7775486316258848, "grad_norm": 0.5768659114837646, "learning_rate": 1.2123341143034988e-05, "loss": 1.7228, "step": 13950 }, { "epoch": 0.777604369879048, "grad_norm": 0.5592014193534851, "learning_rate": 1.211752908791603e-05, "loss": 1.6262, "step": 13951 }, { "epoch": 0.7776601081322111, "grad_norm": 0.610351026058197, "learning_rate": 1.2111718234208197e-05, "loss": 1.7301, "step": 13952 }, { "epoch": 0.7777158463853743, "grad_norm": 0.5659115314483643, "learning_rate": 1.2105908582095731e-05, "loss": 1.6193, "step": 13953 }, { "epoch": 0.7777715846385375, "grad_norm": 0.5750666260719299, "learning_rate": 1.2100100131762932e-05, "loss": 1.8068, "step": 13954 }, { "epoch": 0.7778273228917005, "grad_norm": 0.5514704585075378, "learning_rate": 1.2094292883393976e-05, "loss": 1.5966, "step": 13955 }, { "epoch": 0.7778830611448637, "grad_norm": 0.5872873067855835, "learning_rate": 1.2088486837173051e-05, "loss": 1.8582, "step": 13956 }, { "epoch": 0.7779387993980269, "grad_norm": 0.5862222909927368, "learning_rate": 1.2082681993284261e-05, "loss": 1.7307, "step": 13957 }, { "epoch": 0.77799453765119, "grad_norm": 0.5620829463005066, "learning_rate": 1.2076878351911736e-05, "loss": 1.7103, "step": 13958 }, { "epoch": 0.7780502759043532, "grad_norm": 0.5808737874031067, "learning_rate": 1.2071075913239538e-05, "loss": 1.7988, "step": 13959 }, { "epoch": 0.7781060141575163, "grad_norm": 0.5587299466133118, "learning_rate": 1.206527467745166e-05, "loss": 1.7373, "step": 13960 }, { "epoch": 0.7781617524106794, "grad_norm": 0.607959508895874, "learning_rate": 1.2059474644732088e-05, "loss": 1.7231, "step": 13961 }, { "epoch": 0.7782174906638426, "grad_norm": 0.569949209690094, "learning_rate": 1.2053675815264776e-05, "loss": 1.6934, "step": 13962 }, { "epoch": 0.7782732289170058, "grad_norm": 0.5692344307899475, "learning_rate": 1.2047878189233625e-05, "loss": 1.7624, "step": 13963 }, { "epoch": 0.7783289671701689, "grad_norm": 0.5457063317298889, "learning_rate": 1.2042081766822499e-05, "loss": 1.6195, "step": 13964 }, { "epoch": 0.778384705423332, "grad_norm": 0.6106131076812744, "learning_rate": 1.2036286548215231e-05, "loss": 1.8619, "step": 13965 }, { "epoch": 0.7784404436764952, "grad_norm": 0.590175986289978, "learning_rate": 1.2030492533595623e-05, "loss": 1.7022, "step": 13966 }, { "epoch": 0.7784961819296583, "grad_norm": 0.6062188744544983, "learning_rate": 1.2024699723147403e-05, "loss": 1.6003, "step": 13967 }, { "epoch": 0.7785519201828215, "grad_norm": 0.5541261434555054, "learning_rate": 1.2018908117054295e-05, "loss": 1.488, "step": 13968 }, { "epoch": 0.7786076584359846, "grad_norm": 0.5979520082473755, "learning_rate": 1.2013117715499972e-05, "loss": 1.7595, "step": 13969 }, { "epoch": 0.7786633966891477, "grad_norm": 0.5797428488731384, "learning_rate": 1.2007328518668082e-05, "loss": 1.6972, "step": 13970 }, { "epoch": 0.7787191349423109, "grad_norm": 0.5980271697044373, "learning_rate": 1.2001540526742234e-05, "loss": 1.6437, "step": 13971 }, { "epoch": 0.778774873195474, "grad_norm": 0.5568384528160095, "learning_rate": 1.199575373990594e-05, "loss": 1.455, "step": 13972 }, { "epoch": 0.7788306114486372, "grad_norm": 0.5588963627815247, "learning_rate": 1.198996815834279e-05, "loss": 1.6241, "step": 13973 }, { "epoch": 0.7788863497018004, "grad_norm": 0.5465938448905945, "learning_rate": 1.1984183782236219e-05, "loss": 1.7752, "step": 13974 }, { "epoch": 0.7789420879549634, "grad_norm": 0.5582731366157532, "learning_rate": 1.197840061176969e-05, "loss": 1.5402, "step": 13975 }, { "epoch": 0.7789978262081266, "grad_norm": 0.6088888049125671, "learning_rate": 1.1972618647126616e-05, "loss": 1.6849, "step": 13976 }, { "epoch": 0.7790535644612898, "grad_norm": 0.5533426403999329, "learning_rate": 1.1966837888490361e-05, "loss": 1.6473, "step": 13977 }, { "epoch": 0.7791093027144529, "grad_norm": 0.5501806139945984, "learning_rate": 1.1961058336044274e-05, "loss": 1.5457, "step": 13978 }, { "epoch": 0.7791650409676161, "grad_norm": 0.5428063273429871, "learning_rate": 1.1955279989971607e-05, "loss": 1.5844, "step": 13979 }, { "epoch": 0.7792207792207793, "grad_norm": 0.5709086060523987, "learning_rate": 1.1949502850455669e-05, "loss": 1.6657, "step": 13980 }, { "epoch": 0.7792765174739423, "grad_norm": 0.5452801585197449, "learning_rate": 1.1943726917679637e-05, "loss": 1.5395, "step": 13981 }, { "epoch": 0.7793322557271055, "grad_norm": 0.5536159873008728, "learning_rate": 1.1937952191826723e-05, "loss": 1.4989, "step": 13982 }, { "epoch": 0.7793879939802687, "grad_norm": 0.5623399019241333, "learning_rate": 1.1932178673080014e-05, "loss": 1.692, "step": 13983 }, { "epoch": 0.7794437322334318, "grad_norm": 0.6092471480369568, "learning_rate": 1.1926406361622661e-05, "loss": 1.7339, "step": 13984 }, { "epoch": 0.779499470486595, "grad_norm": 0.5527442693710327, "learning_rate": 1.192063525763773e-05, "loss": 1.7289, "step": 13985 }, { "epoch": 0.7795552087397581, "grad_norm": 0.6212316155433655, "learning_rate": 1.1914865361308213e-05, "loss": 1.6664, "step": 13986 }, { "epoch": 0.7796109469929212, "grad_norm": 0.6183369755744934, "learning_rate": 1.1909096672817121e-05, "loss": 1.8228, "step": 13987 }, { "epoch": 0.7796666852460844, "grad_norm": 0.5724116563796997, "learning_rate": 1.1903329192347395e-05, "loss": 1.7243, "step": 13988 }, { "epoch": 0.7797224234992476, "grad_norm": 0.5382424592971802, "learning_rate": 1.1897562920081945e-05, "loss": 1.4968, "step": 13989 }, { "epoch": 0.7797781617524107, "grad_norm": 0.5783862471580505, "learning_rate": 1.1891797856203651e-05, "loss": 1.6693, "step": 13990 }, { "epoch": 0.7798339000055738, "grad_norm": 0.5656108856201172, "learning_rate": 1.188603400089534e-05, "loss": 1.6512, "step": 13991 }, { "epoch": 0.7798896382587369, "grad_norm": 0.5719448924064636, "learning_rate": 1.1880271354339822e-05, "loss": 1.6319, "step": 13992 }, { "epoch": 0.7799453765119001, "grad_norm": 0.5753348469734192, "learning_rate": 1.1874509916719834e-05, "loss": 1.6541, "step": 13993 }, { "epoch": 0.7800011147650633, "grad_norm": 0.5586207509040833, "learning_rate": 1.1868749688218106e-05, "loss": 1.6669, "step": 13994 }, { "epoch": 0.7800568530182264, "grad_norm": 0.6782849431037903, "learning_rate": 1.186299066901731e-05, "loss": 2.0342, "step": 13995 }, { "epoch": 0.7801125912713895, "grad_norm": 0.5908063650131226, "learning_rate": 1.18572328593001e-05, "loss": 1.7769, "step": 13996 }, { "epoch": 0.7801683295245527, "grad_norm": 0.5401486158370972, "learning_rate": 1.185147625924909e-05, "loss": 1.5211, "step": 13997 }, { "epoch": 0.7802240677777158, "grad_norm": 0.5483136177062988, "learning_rate": 1.1845720869046805e-05, "loss": 1.4663, "step": 13998 }, { "epoch": 0.780279806030879, "grad_norm": 0.6039329171180725, "learning_rate": 1.1839966688875825e-05, "loss": 1.6968, "step": 13999 }, { "epoch": 0.7803355442840422, "grad_norm": 0.5703708529472351, "learning_rate": 1.1834213718918602e-05, "loss": 1.6897, "step": 14000 }, { "epoch": 0.7803912825372052, "grad_norm": 0.5920760035514832, "learning_rate": 1.1828461959357595e-05, "loss": 1.7622, "step": 14001 }, { "epoch": 0.7804470207903684, "grad_norm": 0.5369116067886353, "learning_rate": 1.182271141037522e-05, "loss": 1.5367, "step": 14002 }, { "epoch": 0.7805027590435316, "grad_norm": 0.5805405974388123, "learning_rate": 1.1816962072153848e-05, "loss": 1.8351, "step": 14003 }, { "epoch": 0.7805584972966947, "grad_norm": 0.5652420520782471, "learning_rate": 1.1811213944875832e-05, "loss": 1.5502, "step": 14004 }, { "epoch": 0.7806142355498579, "grad_norm": 0.5290706753730774, "learning_rate": 1.1805467028723426e-05, "loss": 1.5114, "step": 14005 }, { "epoch": 0.7806699738030211, "grad_norm": 0.5578330755233765, "learning_rate": 1.1799721323878943e-05, "loss": 1.5488, "step": 14006 }, { "epoch": 0.7807257120561841, "grad_norm": 0.557753324508667, "learning_rate": 1.1793976830524567e-05, "loss": 1.5781, "step": 14007 }, { "epoch": 0.7807814503093473, "grad_norm": 0.5453119277954102, "learning_rate": 1.1788233548842486e-05, "loss": 1.6267, "step": 14008 }, { "epoch": 0.7808371885625105, "grad_norm": 0.5647554993629456, "learning_rate": 1.1782491479014846e-05, "loss": 1.7858, "step": 14009 }, { "epoch": 0.7808929268156736, "grad_norm": 0.553887665271759, "learning_rate": 1.1776750621223754e-05, "loss": 1.394, "step": 14010 }, { "epoch": 0.7809486650688368, "grad_norm": 0.6797167658805847, "learning_rate": 1.1771010975651287e-05, "loss": 1.8826, "step": 14011 }, { "epoch": 0.7810044033219999, "grad_norm": 0.568385660648346, "learning_rate": 1.176527254247945e-05, "loss": 1.6163, "step": 14012 }, { "epoch": 0.781060141575163, "grad_norm": 0.566242516040802, "learning_rate": 1.1759535321890247e-05, "loss": 1.6258, "step": 14013 }, { "epoch": 0.7811158798283262, "grad_norm": 0.5936616659164429, "learning_rate": 1.1753799314065622e-05, "loss": 1.7727, "step": 14014 }, { "epoch": 0.7811716180814893, "grad_norm": 0.5442579984664917, "learning_rate": 1.1748064519187507e-05, "loss": 1.5458, "step": 14015 }, { "epoch": 0.7812273563346525, "grad_norm": 0.6045543551445007, "learning_rate": 1.1742330937437729e-05, "loss": 1.5589, "step": 14016 }, { "epoch": 0.7812830945878156, "grad_norm": 0.5740007162094116, "learning_rate": 1.1736598568998175e-05, "loss": 1.752, "step": 14017 }, { "epoch": 0.7813388328409787, "grad_norm": 0.5655611753463745, "learning_rate": 1.1730867414050634e-05, "loss": 1.8233, "step": 14018 }, { "epoch": 0.7813945710941419, "grad_norm": 0.5489192605018616, "learning_rate": 1.1725137472776842e-05, "loss": 1.3202, "step": 14019 }, { "epoch": 0.7814503093473051, "grad_norm": 0.6361293792724609, "learning_rate": 1.1719408745358534e-05, "loss": 1.7173, "step": 14020 }, { "epoch": 0.7815060476004682, "grad_norm": 0.5393735766410828, "learning_rate": 1.1713681231977387e-05, "loss": 1.6645, "step": 14021 }, { "epoch": 0.7815617858536313, "grad_norm": 0.5540429353713989, "learning_rate": 1.1707954932815046e-05, "loss": 1.6041, "step": 14022 }, { "epoch": 0.7816175241067945, "grad_norm": 0.5488499402999878, "learning_rate": 1.1702229848053131e-05, "loss": 1.6196, "step": 14023 }, { "epoch": 0.7816732623599576, "grad_norm": 0.6127748489379883, "learning_rate": 1.1696505977873167e-05, "loss": 1.9201, "step": 14024 }, { "epoch": 0.7817290006131208, "grad_norm": 0.5382614731788635, "learning_rate": 1.1690783322456734e-05, "loss": 1.596, "step": 14025 }, { "epoch": 0.781784738866284, "grad_norm": 0.564821183681488, "learning_rate": 1.1685061881985282e-05, "loss": 1.6196, "step": 14026 }, { "epoch": 0.781840477119447, "grad_norm": 0.5664968490600586, "learning_rate": 1.1679341656640275e-05, "loss": 1.7333, "step": 14027 }, { "epoch": 0.7818962153726102, "grad_norm": 0.5671424269676208, "learning_rate": 1.1673622646603127e-05, "loss": 1.6193, "step": 14028 }, { "epoch": 0.7819519536257734, "grad_norm": 0.5720041394233704, "learning_rate": 1.1667904852055212e-05, "loss": 1.6827, "step": 14029 }, { "epoch": 0.7820076918789365, "grad_norm": 0.5713546276092529, "learning_rate": 1.1662188273177877e-05, "loss": 1.5779, "step": 14030 }, { "epoch": 0.7820634301320997, "grad_norm": 0.5552577376365662, "learning_rate": 1.1656472910152376e-05, "loss": 1.5118, "step": 14031 }, { "epoch": 0.7821191683852629, "grad_norm": 0.5922068357467651, "learning_rate": 1.1650758763160025e-05, "loss": 1.7555, "step": 14032 }, { "epoch": 0.7821749066384259, "grad_norm": 0.6121614575386047, "learning_rate": 1.1645045832381995e-05, "loss": 1.8431, "step": 14033 }, { "epoch": 0.7822306448915891, "grad_norm": 0.5433709025382996, "learning_rate": 1.1639334117999496e-05, "loss": 1.523, "step": 14034 }, { "epoch": 0.7822863831447523, "grad_norm": 0.5548751950263977, "learning_rate": 1.163362362019365e-05, "loss": 1.6873, "step": 14035 }, { "epoch": 0.7823421213979154, "grad_norm": 0.6262120008468628, "learning_rate": 1.162791433914558e-05, "loss": 1.7052, "step": 14036 }, { "epoch": 0.7823978596510786, "grad_norm": 0.5512871742248535, "learning_rate": 1.1622206275036352e-05, "loss": 1.8216, "step": 14037 }, { "epoch": 0.7824535979042416, "grad_norm": 0.5779017210006714, "learning_rate": 1.1616499428046974e-05, "loss": 1.5163, "step": 14038 }, { "epoch": 0.7825093361574048, "grad_norm": 0.5814633965492249, "learning_rate": 1.1610793798358433e-05, "loss": 1.5863, "step": 14039 }, { "epoch": 0.782565074410568, "grad_norm": 0.5750308632850647, "learning_rate": 1.1605089386151695e-05, "loss": 1.5654, "step": 14040 }, { "epoch": 0.7826208126637311, "grad_norm": 0.5587421655654907, "learning_rate": 1.1599386191607675e-05, "loss": 1.6553, "step": 14041 }, { "epoch": 0.7826765509168943, "grad_norm": 0.5279087424278259, "learning_rate": 1.1593684214907207e-05, "loss": 1.5521, "step": 14042 }, { "epoch": 0.7827322891700574, "grad_norm": 0.5872433185577393, "learning_rate": 1.1587983456231166e-05, "loss": 1.6284, "step": 14043 }, { "epoch": 0.7827880274232205, "grad_norm": 0.5682265162467957, "learning_rate": 1.158228391576035e-05, "loss": 1.29, "step": 14044 }, { "epoch": 0.7828437656763837, "grad_norm": 0.548341691493988, "learning_rate": 1.1576585593675477e-05, "loss": 1.5505, "step": 14045 }, { "epoch": 0.7828995039295469, "grad_norm": 0.5533690452575684, "learning_rate": 1.1570888490157289e-05, "loss": 1.369, "step": 14046 }, { "epoch": 0.78295524218271, "grad_norm": 0.6491280794143677, "learning_rate": 1.156519260538646e-05, "loss": 1.7068, "step": 14047 }, { "epoch": 0.7830109804358731, "grad_norm": 0.5396941304206848, "learning_rate": 1.155949793954363e-05, "loss": 1.5027, "step": 14048 }, { "epoch": 0.7830667186890363, "grad_norm": 0.6212543845176697, "learning_rate": 1.1553804492809417e-05, "loss": 1.9128, "step": 14049 }, { "epoch": 0.7831224569421994, "grad_norm": 0.5530951619148254, "learning_rate": 1.1548112265364336e-05, "loss": 1.7097, "step": 14050 }, { "epoch": 0.7831781951953626, "grad_norm": 0.5900622606277466, "learning_rate": 1.154242125738898e-05, "loss": 1.78, "step": 14051 }, { "epoch": 0.7832339334485258, "grad_norm": 0.6479026079177856, "learning_rate": 1.1536731469063777e-05, "loss": 2.0257, "step": 14052 }, { "epoch": 0.7832896717016888, "grad_norm": 0.5552021861076355, "learning_rate": 1.15310429005692e-05, "loss": 1.5198, "step": 14053 }, { "epoch": 0.783345409954852, "grad_norm": 0.6861423850059509, "learning_rate": 1.1525355552085648e-05, "loss": 1.2568, "step": 14054 }, { "epoch": 0.7834011482080152, "grad_norm": 0.5832570791244507, "learning_rate": 1.15196694237935e-05, "loss": 1.6483, "step": 14055 }, { "epoch": 0.7834568864611783, "grad_norm": 0.5791754722595215, "learning_rate": 1.1513984515873094e-05, "loss": 1.5516, "step": 14056 }, { "epoch": 0.7835126247143415, "grad_norm": 0.6173155307769775, "learning_rate": 1.150830082850468e-05, "loss": 1.8275, "step": 14057 }, { "epoch": 0.7835683629675047, "grad_norm": 0.5452615022659302, "learning_rate": 1.1502618361868572e-05, "loss": 1.5679, "step": 14058 }, { "epoch": 0.7836241012206677, "grad_norm": 0.5976300835609436, "learning_rate": 1.149693711614494e-05, "loss": 1.6907, "step": 14059 }, { "epoch": 0.7836798394738309, "grad_norm": 0.7489990592002869, "learning_rate": 1.1491257091513974e-05, "loss": 1.4673, "step": 14060 }, { "epoch": 0.783735577726994, "grad_norm": 0.5231119990348816, "learning_rate": 1.1485578288155813e-05, "loss": 1.4492, "step": 14061 }, { "epoch": 0.7837913159801572, "grad_norm": 0.5623896718025208, "learning_rate": 1.1479900706250552e-05, "loss": 1.6429, "step": 14062 }, { "epoch": 0.7838470542333204, "grad_norm": 0.5758726000785828, "learning_rate": 1.1474224345978268e-05, "loss": 1.6883, "step": 14063 }, { "epoch": 0.7839027924864834, "grad_norm": 0.617182195186615, "learning_rate": 1.1468549207518953e-05, "loss": 1.6511, "step": 14064 }, { "epoch": 0.7839585307396466, "grad_norm": 0.5945354700088501, "learning_rate": 1.1462875291052604e-05, "loss": 1.7315, "step": 14065 }, { "epoch": 0.7840142689928098, "grad_norm": 0.5089705586433411, "learning_rate": 1.1457202596759165e-05, "loss": 1.2648, "step": 14066 }, { "epoch": 0.7840700072459729, "grad_norm": 0.5625550746917725, "learning_rate": 1.1451531124818548e-05, "loss": 1.5514, "step": 14067 }, { "epoch": 0.784125745499136, "grad_norm": 0.5699339509010315, "learning_rate": 1.1445860875410586e-05, "loss": 1.5198, "step": 14068 }, { "epoch": 0.7841814837522992, "grad_norm": 0.568102240562439, "learning_rate": 1.1440191848715143e-05, "loss": 1.6896, "step": 14069 }, { "epoch": 0.7842372220054623, "grad_norm": 0.5320776700973511, "learning_rate": 1.1434524044912009e-05, "loss": 1.4559, "step": 14070 }, { "epoch": 0.7842929602586255, "grad_norm": 0.5914183855056763, "learning_rate": 1.1428857464180908e-05, "loss": 1.6122, "step": 14071 }, { "epoch": 0.7843486985117887, "grad_norm": 0.5802083015441895, "learning_rate": 1.1423192106701563e-05, "loss": 1.7864, "step": 14072 }, { "epoch": 0.7844044367649518, "grad_norm": 0.6148181557655334, "learning_rate": 1.1417527972653647e-05, "loss": 1.6808, "step": 14073 }, { "epoch": 0.7844601750181149, "grad_norm": 0.5611074566841125, "learning_rate": 1.1411865062216792e-05, "loss": 1.7022, "step": 14074 }, { "epoch": 0.7845159132712781, "grad_norm": 0.5848512649536133, "learning_rate": 1.140620337557059e-05, "loss": 1.5988, "step": 14075 }, { "epoch": 0.7845716515244412, "grad_norm": 0.5921863913536072, "learning_rate": 1.1400542912894602e-05, "loss": 1.5858, "step": 14076 }, { "epoch": 0.7846273897776044, "grad_norm": 0.5781610012054443, "learning_rate": 1.1394883674368356e-05, "loss": 1.795, "step": 14077 }, { "epoch": 0.7846831280307676, "grad_norm": 0.5733685493469238, "learning_rate": 1.1389225660171299e-05, "loss": 1.7813, "step": 14078 }, { "epoch": 0.7847388662839306, "grad_norm": 0.5941674709320068, "learning_rate": 1.1383568870482891e-05, "loss": 1.6134, "step": 14079 }, { "epoch": 0.7847946045370938, "grad_norm": 0.5456043481826782, "learning_rate": 1.137791330548253e-05, "loss": 1.6329, "step": 14080 }, { "epoch": 0.784850342790257, "grad_norm": 0.5724867582321167, "learning_rate": 1.1372258965349575e-05, "loss": 1.6439, "step": 14081 }, { "epoch": 0.7849060810434201, "grad_norm": 0.5601940155029297, "learning_rate": 1.1366605850263368e-05, "loss": 1.5401, "step": 14082 }, { "epoch": 0.7849618192965833, "grad_norm": 0.6222748756408691, "learning_rate": 1.1360953960403142e-05, "loss": 1.7608, "step": 14083 }, { "epoch": 0.7850175575497463, "grad_norm": 0.5868781208992004, "learning_rate": 1.1355303295948205e-05, "loss": 1.7358, "step": 14084 }, { "epoch": 0.7850732958029095, "grad_norm": 0.5608435869216919, "learning_rate": 1.1349653857077718e-05, "loss": 1.7373, "step": 14085 }, { "epoch": 0.7851290340560727, "grad_norm": 0.5700390338897705, "learning_rate": 1.1344005643970863e-05, "loss": 1.6794, "step": 14086 }, { "epoch": 0.7851847723092358, "grad_norm": 0.5813573002815247, "learning_rate": 1.1338358656806769e-05, "loss": 1.7308, "step": 14087 }, { "epoch": 0.785240510562399, "grad_norm": 0.557304859161377, "learning_rate": 1.133271289576453e-05, "loss": 1.602, "step": 14088 }, { "epoch": 0.7852962488155621, "grad_norm": 0.6621891260147095, "learning_rate": 1.1327068361023202e-05, "loss": 1.803, "step": 14089 }, { "epoch": 0.7853519870687252, "grad_norm": 0.586766242980957, "learning_rate": 1.132142505276177e-05, "loss": 1.4317, "step": 14090 }, { "epoch": 0.7854077253218884, "grad_norm": 0.5434842705726624, "learning_rate": 1.1315782971159227e-05, "loss": 1.6327, "step": 14091 }, { "epoch": 0.7854634635750516, "grad_norm": 0.5822570323944092, "learning_rate": 1.1310142116394506e-05, "loss": 1.7124, "step": 14092 }, { "epoch": 0.7855192018282147, "grad_norm": 0.5885120630264282, "learning_rate": 1.1304502488646513e-05, "loss": 1.7282, "step": 14093 }, { "epoch": 0.7855749400813778, "grad_norm": 0.5187118649482727, "learning_rate": 1.1298864088094058e-05, "loss": 1.3171, "step": 14094 }, { "epoch": 0.785630678334541, "grad_norm": 0.5518853068351746, "learning_rate": 1.1293226914916006e-05, "loss": 1.52, "step": 14095 }, { "epoch": 0.7856864165877041, "grad_norm": 0.5994083285331726, "learning_rate": 1.128759096929114e-05, "loss": 1.6558, "step": 14096 }, { "epoch": 0.7857421548408673, "grad_norm": 0.5273770689964294, "learning_rate": 1.1281956251398157e-05, "loss": 1.6298, "step": 14097 }, { "epoch": 0.7857978930940305, "grad_norm": 0.5608893036842346, "learning_rate": 1.1276322761415786e-05, "loss": 1.4396, "step": 14098 }, { "epoch": 0.7858536313471935, "grad_norm": 0.6453974843025208, "learning_rate": 1.127069049952268e-05, "loss": 2.072, "step": 14099 }, { "epoch": 0.7859093696003567, "grad_norm": 0.5457696914672852, "learning_rate": 1.1265059465897465e-05, "loss": 1.6547, "step": 14100 }, { "epoch": 0.7859651078535199, "grad_norm": 0.5690119862556458, "learning_rate": 1.1259429660718723e-05, "loss": 1.6223, "step": 14101 }, { "epoch": 0.786020846106683, "grad_norm": 0.5498533248901367, "learning_rate": 1.1253801084164995e-05, "loss": 1.632, "step": 14102 }, { "epoch": 0.7860765843598462, "grad_norm": 0.6033128499984741, "learning_rate": 1.1248173736414808e-05, "loss": 1.7907, "step": 14103 }, { "epoch": 0.7861323226130094, "grad_norm": 0.5408663749694824, "learning_rate": 1.1242547617646598e-05, "loss": 1.7747, "step": 14104 }, { "epoch": 0.7861880608661724, "grad_norm": 0.556079089641571, "learning_rate": 1.1236922728038807e-05, "loss": 1.5891, "step": 14105 }, { "epoch": 0.7862437991193356, "grad_norm": 0.5124304890632629, "learning_rate": 1.1231299067769818e-05, "loss": 1.5138, "step": 14106 }, { "epoch": 0.7862995373724987, "grad_norm": 0.551034152507782, "learning_rate": 1.122567663701799e-05, "loss": 1.5725, "step": 14107 }, { "epoch": 0.7863552756256619, "grad_norm": 0.5727046728134155, "learning_rate": 1.122005543596164e-05, "loss": 1.8054, "step": 14108 }, { "epoch": 0.7864110138788251, "grad_norm": 0.5518726706504822, "learning_rate": 1.1214435464779006e-05, "loss": 1.6492, "step": 14109 }, { "epoch": 0.7864667521319881, "grad_norm": 0.530637264251709, "learning_rate": 1.1208816723648364e-05, "loss": 1.5806, "step": 14110 }, { "epoch": 0.7865224903851513, "grad_norm": 0.5481143593788147, "learning_rate": 1.1203199212747878e-05, "loss": 1.691, "step": 14111 }, { "epoch": 0.7865782286383145, "grad_norm": 0.5416361689567566, "learning_rate": 1.1197582932255712e-05, "loss": 1.7296, "step": 14112 }, { "epoch": 0.7866339668914776, "grad_norm": 0.577296793460846, "learning_rate": 1.119196788234998e-05, "loss": 1.5213, "step": 14113 }, { "epoch": 0.7866897051446408, "grad_norm": 0.5732555985450745, "learning_rate": 1.1186354063208759e-05, "loss": 1.6295, "step": 14114 }, { "epoch": 0.786745443397804, "grad_norm": 0.525271475315094, "learning_rate": 1.1180741475010104e-05, "loss": 1.5283, "step": 14115 }, { "epoch": 0.786801181650967, "grad_norm": 0.5291435718536377, "learning_rate": 1.1175130117931987e-05, "loss": 1.3781, "step": 14116 }, { "epoch": 0.7868569199041302, "grad_norm": 0.5415179133415222, "learning_rate": 1.1169519992152372e-05, "loss": 1.554, "step": 14117 }, { "epoch": 0.7869126581572934, "grad_norm": 0.567564070224762, "learning_rate": 1.1163911097849189e-05, "loss": 1.5795, "step": 14118 }, { "epoch": 0.7869683964104565, "grad_norm": 0.5729326009750366, "learning_rate": 1.1158303435200324e-05, "loss": 1.7236, "step": 14119 }, { "epoch": 0.7870241346636196, "grad_norm": 0.5714365243911743, "learning_rate": 1.115269700438359e-05, "loss": 1.7392, "step": 14120 }, { "epoch": 0.7870798729167828, "grad_norm": 0.5530888438224792, "learning_rate": 1.114709180557682e-05, "loss": 1.6407, "step": 14121 }, { "epoch": 0.7871356111699459, "grad_norm": 0.5813184380531311, "learning_rate": 1.1141487838957787e-05, "loss": 1.6539, "step": 14122 }, { "epoch": 0.7871913494231091, "grad_norm": 0.573197066783905, "learning_rate": 1.1135885104704186e-05, "loss": 1.3411, "step": 14123 }, { "epoch": 0.7872470876762723, "grad_norm": 0.5426301956176758, "learning_rate": 1.1130283602993718e-05, "loss": 1.6036, "step": 14124 }, { "epoch": 0.7873028259294353, "grad_norm": 0.5949573516845703, "learning_rate": 1.1124683334004021e-05, "loss": 1.8074, "step": 14125 }, { "epoch": 0.7873585641825985, "grad_norm": 0.587485134601593, "learning_rate": 1.1119084297912729e-05, "loss": 1.893, "step": 14126 }, { "epoch": 0.7874143024357617, "grad_norm": 0.5607683062553406, "learning_rate": 1.1113486494897363e-05, "loss": 1.5241, "step": 14127 }, { "epoch": 0.7874700406889248, "grad_norm": 0.533374547958374, "learning_rate": 1.1107889925135495e-05, "loss": 1.4693, "step": 14128 }, { "epoch": 0.787525778942088, "grad_norm": 0.6041263341903687, "learning_rate": 1.1102294588804613e-05, "loss": 1.8778, "step": 14129 }, { "epoch": 0.787581517195251, "grad_norm": 0.5453782081604004, "learning_rate": 1.1096700486082146e-05, "loss": 1.5998, "step": 14130 }, { "epoch": 0.7876372554484142, "grad_norm": 0.5560128688812256, "learning_rate": 1.1091107617145519e-05, "loss": 1.473, "step": 14131 }, { "epoch": 0.7876929937015774, "grad_norm": 0.5424692630767822, "learning_rate": 1.10855159821721e-05, "loss": 1.4516, "step": 14132 }, { "epoch": 0.7877487319547405, "grad_norm": 0.5891462564468384, "learning_rate": 1.1079925581339229e-05, "loss": 1.8915, "step": 14133 }, { "epoch": 0.7878044702079037, "grad_norm": 0.5774295330047607, "learning_rate": 1.1074336414824215e-05, "loss": 1.5509, "step": 14134 }, { "epoch": 0.7878602084610669, "grad_norm": 0.5502496957778931, "learning_rate": 1.1068748482804264e-05, "loss": 1.4576, "step": 14135 }, { "epoch": 0.7879159467142299, "grad_norm": 0.5660259127616882, "learning_rate": 1.106316178545666e-05, "loss": 1.8418, "step": 14136 }, { "epoch": 0.7879716849673931, "grad_norm": 0.5616737604141235, "learning_rate": 1.105757632295853e-05, "loss": 1.6923, "step": 14137 }, { "epoch": 0.7880274232205563, "grad_norm": 0.559689998626709, "learning_rate": 1.1051992095487029e-05, "loss": 1.6677, "step": 14138 }, { "epoch": 0.7880831614737194, "grad_norm": 0.6023997664451599, "learning_rate": 1.1046409103219251e-05, "loss": 2.0795, "step": 14139 }, { "epoch": 0.7881388997268826, "grad_norm": 0.5657042264938354, "learning_rate": 1.1040827346332272e-05, "loss": 1.572, "step": 14140 }, { "epoch": 0.7881946379800457, "grad_norm": 0.5904360413551331, "learning_rate": 1.103524682500311e-05, "loss": 1.7121, "step": 14141 }, { "epoch": 0.7882503762332088, "grad_norm": 0.6427820920944214, "learning_rate": 1.1029667539408723e-05, "loss": 1.8675, "step": 14142 }, { "epoch": 0.788306114486372, "grad_norm": 0.5739613771438599, "learning_rate": 1.102408948972607e-05, "loss": 1.7795, "step": 14143 }, { "epoch": 0.7883618527395352, "grad_norm": 0.5235968232154846, "learning_rate": 1.1018512676132054e-05, "loss": 1.3756, "step": 14144 }, { "epoch": 0.7884175909926983, "grad_norm": 0.5444537997245789, "learning_rate": 1.101293709880355e-05, "loss": 1.5751, "step": 14145 }, { "epoch": 0.7884733292458614, "grad_norm": 0.5618844628334045, "learning_rate": 1.1007362757917344e-05, "loss": 1.5829, "step": 14146 }, { "epoch": 0.7885290674990246, "grad_norm": 0.5503376722335815, "learning_rate": 1.1001789653650264e-05, "loss": 1.6555, "step": 14147 }, { "epoch": 0.7885848057521877, "grad_norm": 0.5950319170951843, "learning_rate": 1.0996217786179052e-05, "loss": 1.8102, "step": 14148 }, { "epoch": 0.7886405440053509, "grad_norm": 0.5576203465461731, "learning_rate": 1.099064715568039e-05, "loss": 1.6791, "step": 14149 }, { "epoch": 0.7886962822585141, "grad_norm": 0.569321870803833, "learning_rate": 1.0985077762330963e-05, "loss": 1.6076, "step": 14150 }, { "epoch": 0.7887520205116771, "grad_norm": 0.5832285284996033, "learning_rate": 1.0979509606307398e-05, "loss": 1.6604, "step": 14151 }, { "epoch": 0.7888077587648403, "grad_norm": 0.5698609948158264, "learning_rate": 1.0973942687786293e-05, "loss": 1.588, "step": 14152 }, { "epoch": 0.7888634970180034, "grad_norm": 0.5445098876953125, "learning_rate": 1.0968377006944158e-05, "loss": 1.4434, "step": 14153 }, { "epoch": 0.7889192352711666, "grad_norm": 0.5489819645881653, "learning_rate": 1.0962812563957552e-05, "loss": 1.5954, "step": 14154 }, { "epoch": 0.7889749735243298, "grad_norm": 0.5446029901504517, "learning_rate": 1.095724935900294e-05, "loss": 1.359, "step": 14155 }, { "epoch": 0.7890307117774928, "grad_norm": 0.5374274849891663, "learning_rate": 1.0951687392256738e-05, "loss": 1.5071, "step": 14156 }, { "epoch": 0.789086450030656, "grad_norm": 0.5869937539100647, "learning_rate": 1.0946126663895335e-05, "loss": 1.7058, "step": 14157 }, { "epoch": 0.7891421882838192, "grad_norm": 0.549609899520874, "learning_rate": 1.0940567174095101e-05, "loss": 1.6391, "step": 14158 }, { "epoch": 0.7891979265369823, "grad_norm": 0.5412814617156982, "learning_rate": 1.0935008923032336e-05, "loss": 1.3799, "step": 14159 }, { "epoch": 0.7892536647901455, "grad_norm": 0.5886615514755249, "learning_rate": 1.0929451910883343e-05, "loss": 1.5895, "step": 14160 }, { "epoch": 0.7893094030433087, "grad_norm": 0.559424638748169, "learning_rate": 1.0923896137824308e-05, "loss": 1.3587, "step": 14161 }, { "epoch": 0.7893651412964717, "grad_norm": 0.6307703256607056, "learning_rate": 1.0918341604031491e-05, "loss": 1.6722, "step": 14162 }, { "epoch": 0.7894208795496349, "grad_norm": 0.5167428255081177, "learning_rate": 1.0912788309680999e-05, "loss": 1.3863, "step": 14163 }, { "epoch": 0.7894766178027981, "grad_norm": 0.5647063851356506, "learning_rate": 1.0907236254948967e-05, "loss": 1.6009, "step": 14164 }, { "epoch": 0.7895323560559612, "grad_norm": 0.5547575354576111, "learning_rate": 1.0901685440011471e-05, "loss": 1.5707, "step": 14165 }, { "epoch": 0.7895880943091244, "grad_norm": 0.5227721333503723, "learning_rate": 1.089613586504456e-05, "loss": 1.4619, "step": 14166 }, { "epoch": 0.7896438325622875, "grad_norm": 0.5079346895217896, "learning_rate": 1.0890587530224239e-05, "loss": 1.5639, "step": 14167 }, { "epoch": 0.7896995708154506, "grad_norm": 0.5701187252998352, "learning_rate": 1.088504043572643e-05, "loss": 1.7238, "step": 14168 }, { "epoch": 0.7897553090686138, "grad_norm": 0.5453519821166992, "learning_rate": 1.0879494581727112e-05, "loss": 1.5679, "step": 14169 }, { "epoch": 0.789811047321777, "grad_norm": 0.5493216514587402, "learning_rate": 1.087394996840212e-05, "loss": 1.6117, "step": 14170 }, { "epoch": 0.7898667855749401, "grad_norm": 0.5504185557365417, "learning_rate": 1.0868406595927327e-05, "loss": 1.5702, "step": 14171 }, { "epoch": 0.7899225238281032, "grad_norm": 0.5828469395637512, "learning_rate": 1.0862864464478501e-05, "loss": 1.8501, "step": 14172 }, { "epoch": 0.7899782620812664, "grad_norm": 0.5702177882194519, "learning_rate": 1.0857323574231443e-05, "loss": 1.7125, "step": 14173 }, { "epoch": 0.7900340003344295, "grad_norm": 0.6009947061538696, "learning_rate": 1.0851783925361875e-05, "loss": 1.8166, "step": 14174 }, { "epoch": 0.7900897385875927, "grad_norm": 0.5664753317832947, "learning_rate": 1.0846245518045457e-05, "loss": 1.5777, "step": 14175 }, { "epoch": 0.7901454768407558, "grad_norm": 0.5617591738700867, "learning_rate": 1.0840708352457851e-05, "loss": 1.8449, "step": 14176 }, { "epoch": 0.7902012150939189, "grad_norm": 0.5748462080955505, "learning_rate": 1.0835172428774659e-05, "loss": 1.8072, "step": 14177 }, { "epoch": 0.7902569533470821, "grad_norm": 0.5637654066085815, "learning_rate": 1.0829637747171468e-05, "loss": 1.4591, "step": 14178 }, { "epoch": 0.7903126916002452, "grad_norm": 0.5314264297485352, "learning_rate": 1.0824104307823756e-05, "loss": 1.5205, "step": 14179 }, { "epoch": 0.7903684298534084, "grad_norm": 0.5520778894424438, "learning_rate": 1.081857211090706e-05, "loss": 1.5751, "step": 14180 }, { "epoch": 0.7904241681065716, "grad_norm": 0.5554165244102478, "learning_rate": 1.0813041156596826e-05, "loss": 1.445, "step": 14181 }, { "epoch": 0.7904799063597346, "grad_norm": 0.5123404264450073, "learning_rate": 1.080751144506844e-05, "loss": 1.3204, "step": 14182 }, { "epoch": 0.7905356446128978, "grad_norm": 0.5553086400032043, "learning_rate": 1.0801982976497283e-05, "loss": 1.6754, "step": 14183 }, { "epoch": 0.790591382866061, "grad_norm": 0.6091317534446716, "learning_rate": 1.0796455751058682e-05, "loss": 1.685, "step": 14184 }, { "epoch": 0.7906471211192241, "grad_norm": 0.5874457955360413, "learning_rate": 1.0790929768927932e-05, "loss": 1.6895, "step": 14185 }, { "epoch": 0.7907028593723873, "grad_norm": 0.5442774295806885, "learning_rate": 1.0785405030280305e-05, "loss": 1.512, "step": 14186 }, { "epoch": 0.7907585976255505, "grad_norm": 0.5660844445228577, "learning_rate": 1.077988153529096e-05, "loss": 1.6243, "step": 14187 }, { "epoch": 0.7908143358787135, "grad_norm": 0.5658431649208069, "learning_rate": 1.0774359284135133e-05, "loss": 1.5732, "step": 14188 }, { "epoch": 0.7908700741318767, "grad_norm": 0.5881638526916504, "learning_rate": 1.0768838276987914e-05, "loss": 1.7611, "step": 14189 }, { "epoch": 0.7909258123850399, "grad_norm": 0.5711503624916077, "learning_rate": 1.0763318514024412e-05, "loss": 1.5442, "step": 14190 }, { "epoch": 0.790981550638203, "grad_norm": 0.5731552243232727, "learning_rate": 1.0757799995419677e-05, "loss": 1.7336, "step": 14191 }, { "epoch": 0.7910372888913662, "grad_norm": 0.5885837078094482, "learning_rate": 1.0752282721348733e-05, "loss": 1.794, "step": 14192 }, { "epoch": 0.7910930271445293, "grad_norm": 0.5574288368225098, "learning_rate": 1.0746766691986565e-05, "loss": 1.7035, "step": 14193 }, { "epoch": 0.7911487653976924, "grad_norm": 0.5461791753768921, "learning_rate": 1.0741251907508065e-05, "loss": 1.5022, "step": 14194 }, { "epoch": 0.7912045036508556, "grad_norm": 0.5729514360427856, "learning_rate": 1.0735738368088188e-05, "loss": 1.6407, "step": 14195 }, { "epoch": 0.7912602419040188, "grad_norm": 0.5560081005096436, "learning_rate": 1.073022607390175e-05, "loss": 1.5304, "step": 14196 }, { "epoch": 0.7913159801571819, "grad_norm": 0.6013069748878479, "learning_rate": 1.0724715025123599e-05, "loss": 1.6339, "step": 14197 }, { "epoch": 0.791371718410345, "grad_norm": 0.5798637866973877, "learning_rate": 1.0719205221928464e-05, "loss": 1.7463, "step": 14198 }, { "epoch": 0.7914274566635081, "grad_norm": 0.5300682187080383, "learning_rate": 1.0713696664491134e-05, "loss": 1.5111, "step": 14199 }, { "epoch": 0.7914831949166713, "grad_norm": 0.5579528212547302, "learning_rate": 1.0708189352986304e-05, "loss": 1.5489, "step": 14200 }, { "epoch": 0.7915389331698345, "grad_norm": 0.5883510112762451, "learning_rate": 1.0702683287588606e-05, "loss": 1.7422, "step": 14201 }, { "epoch": 0.7915946714229976, "grad_norm": 0.5935243964195251, "learning_rate": 1.0697178468472674e-05, "loss": 1.6753, "step": 14202 }, { "epoch": 0.7916504096761607, "grad_norm": 0.5785866379737854, "learning_rate": 1.0691674895813092e-05, "loss": 1.5724, "step": 14203 }, { "epoch": 0.7917061479293239, "grad_norm": 0.5762687921524048, "learning_rate": 1.0686172569784415e-05, "loss": 1.6701, "step": 14204 }, { "epoch": 0.791761886182487, "grad_norm": 0.5559493899345398, "learning_rate": 1.0680671490561095e-05, "loss": 1.6701, "step": 14205 }, { "epoch": 0.7918176244356502, "grad_norm": 0.571079432964325, "learning_rate": 1.0675171658317645e-05, "loss": 1.6881, "step": 14206 }, { "epoch": 0.7918733626888134, "grad_norm": 0.5778709053993225, "learning_rate": 1.0669673073228482e-05, "loss": 1.7261, "step": 14207 }, { "epoch": 0.7919291009419764, "grad_norm": 0.6055009961128235, "learning_rate": 1.0664175735467963e-05, "loss": 1.8894, "step": 14208 }, { "epoch": 0.7919848391951396, "grad_norm": 0.541527509689331, "learning_rate": 1.0658679645210445e-05, "loss": 1.4879, "step": 14209 }, { "epoch": 0.7920405774483028, "grad_norm": 0.5720058679580688, "learning_rate": 1.065318480263024e-05, "loss": 1.7824, "step": 14210 }, { "epoch": 0.7920963157014659, "grad_norm": 0.5170486569404602, "learning_rate": 1.06476912079016e-05, "loss": 1.4463, "step": 14211 }, { "epoch": 0.7921520539546291, "grad_norm": 0.5375114679336548, "learning_rate": 1.0642198861198771e-05, "loss": 1.6579, "step": 14212 }, { "epoch": 0.7922077922077922, "grad_norm": 0.5250227451324463, "learning_rate": 1.0636707762695891e-05, "loss": 1.6839, "step": 14213 }, { "epoch": 0.7922635304609553, "grad_norm": 0.5751910209655762, "learning_rate": 1.0631217912567165e-05, "loss": 1.7319, "step": 14214 }, { "epoch": 0.7923192687141185, "grad_norm": 0.5820494890213013, "learning_rate": 1.0625729310986659e-05, "loss": 1.7223, "step": 14215 }, { "epoch": 0.7923750069672817, "grad_norm": 0.6259032487869263, "learning_rate": 1.0620241958128451e-05, "loss": 1.6946, "step": 14216 }, { "epoch": 0.7924307452204448, "grad_norm": 0.5648552775382996, "learning_rate": 1.061475585416657e-05, "loss": 1.7499, "step": 14217 }, { "epoch": 0.792486483473608, "grad_norm": 0.5858311653137207, "learning_rate": 1.0609270999275e-05, "loss": 1.6454, "step": 14218 }, { "epoch": 0.7925422217267711, "grad_norm": 0.5872727632522583, "learning_rate": 1.0603787393627701e-05, "loss": 1.7257, "step": 14219 }, { "epoch": 0.7925979599799342, "grad_norm": 0.6232999563217163, "learning_rate": 1.0598305037398543e-05, "loss": 1.5123, "step": 14220 }, { "epoch": 0.7926536982330974, "grad_norm": 0.5462108254432678, "learning_rate": 1.0592823930761454e-05, "loss": 1.5896, "step": 14221 }, { "epoch": 0.7927094364862605, "grad_norm": 0.5950632691383362, "learning_rate": 1.0587344073890209e-05, "loss": 1.6032, "step": 14222 }, { "epoch": 0.7927651747394237, "grad_norm": 0.5734551548957825, "learning_rate": 1.058186546695864e-05, "loss": 1.685, "step": 14223 }, { "epoch": 0.7928209129925868, "grad_norm": 0.5197454690933228, "learning_rate": 1.0576388110140444e-05, "loss": 1.4667, "step": 14224 }, { "epoch": 0.7928766512457499, "grad_norm": 0.5591278672218323, "learning_rate": 1.0570912003609374e-05, "loss": 1.7501, "step": 14225 }, { "epoch": 0.7929323894989131, "grad_norm": 0.602046012878418, "learning_rate": 1.0565437147539104e-05, "loss": 1.6845, "step": 14226 }, { "epoch": 0.7929881277520763, "grad_norm": 0.6184342503547668, "learning_rate": 1.055996354210323e-05, "loss": 1.6876, "step": 14227 }, { "epoch": 0.7930438660052394, "grad_norm": 0.5796352028846741, "learning_rate": 1.0554491187475363e-05, "loss": 1.6564, "step": 14228 }, { "epoch": 0.7930996042584025, "grad_norm": 0.5525890588760376, "learning_rate": 1.0549020083829053e-05, "loss": 1.5664, "step": 14229 }, { "epoch": 0.7931553425115657, "grad_norm": 0.641735315322876, "learning_rate": 1.0543550231337824e-05, "loss": 1.6195, "step": 14230 }, { "epoch": 0.7932110807647288, "grad_norm": 0.563994288444519, "learning_rate": 1.0538081630175106e-05, "loss": 1.8589, "step": 14231 }, { "epoch": 0.793266819017892, "grad_norm": 0.5552716851234436, "learning_rate": 1.0532614280514374e-05, "loss": 1.5969, "step": 14232 }, { "epoch": 0.7933225572710552, "grad_norm": 0.6517505049705505, "learning_rate": 1.0527148182529023e-05, "loss": 1.914, "step": 14233 }, { "epoch": 0.7933782955242182, "grad_norm": 0.588067352771759, "learning_rate": 1.0521683336392374e-05, "loss": 1.5224, "step": 14234 }, { "epoch": 0.7934340337773814, "grad_norm": 0.5851812958717346, "learning_rate": 1.0516219742277755e-05, "loss": 1.7211, "step": 14235 }, { "epoch": 0.7934897720305446, "grad_norm": 0.5404538512229919, "learning_rate": 1.051075740035844e-05, "loss": 1.193, "step": 14236 }, { "epoch": 0.7935455102837077, "grad_norm": 0.625626266002655, "learning_rate": 1.050529631080766e-05, "loss": 1.7133, "step": 14237 }, { "epoch": 0.7936012485368709, "grad_norm": 0.5095002055168152, "learning_rate": 1.0499836473798624e-05, "loss": 1.4363, "step": 14238 }, { "epoch": 0.793656986790034, "grad_norm": 0.5581433773040771, "learning_rate": 1.0494377889504448e-05, "loss": 1.4707, "step": 14239 }, { "epoch": 0.7937127250431971, "grad_norm": 0.5656692981719971, "learning_rate": 1.0488920558098298e-05, "loss": 1.5554, "step": 14240 }, { "epoch": 0.7937684632963603, "grad_norm": 0.5666208267211914, "learning_rate": 1.0483464479753207e-05, "loss": 1.7977, "step": 14241 }, { "epoch": 0.7938242015495235, "grad_norm": 0.525331437587738, "learning_rate": 1.0478009654642229e-05, "loss": 1.5221, "step": 14242 }, { "epoch": 0.7938799398026866, "grad_norm": 0.5352795124053955, "learning_rate": 1.047255608293835e-05, "loss": 1.4287, "step": 14243 }, { "epoch": 0.7939356780558497, "grad_norm": 0.5808674693107605, "learning_rate": 1.0467103764814534e-05, "loss": 1.5659, "step": 14244 }, { "epoch": 0.7939914163090128, "grad_norm": 0.7119161486625671, "learning_rate": 1.0461652700443708e-05, "loss": 1.8926, "step": 14245 }, { "epoch": 0.794047154562176, "grad_norm": 0.5350673198699951, "learning_rate": 1.0456202889998706e-05, "loss": 1.5393, "step": 14246 }, { "epoch": 0.7941028928153392, "grad_norm": 0.5706144571304321, "learning_rate": 1.0450754333652423e-05, "loss": 1.7159, "step": 14247 }, { "epoch": 0.7941586310685023, "grad_norm": 0.5782610774040222, "learning_rate": 1.0445307031577606e-05, "loss": 1.7552, "step": 14248 }, { "epoch": 0.7942143693216654, "grad_norm": 0.5863004326820374, "learning_rate": 1.0439860983947031e-05, "loss": 1.7994, "step": 14249 }, { "epoch": 0.7942701075748286, "grad_norm": 0.5793316960334778, "learning_rate": 1.0434416190933415e-05, "loss": 1.6273, "step": 14250 }, { "epoch": 0.7943258458279917, "grad_norm": 0.5680450797080994, "learning_rate": 1.0428972652709435e-05, "loss": 1.7099, "step": 14251 }, { "epoch": 0.7943815840811549, "grad_norm": 0.5190421342849731, "learning_rate": 1.0423530369447736e-05, "loss": 1.4654, "step": 14252 }, { "epoch": 0.7944373223343181, "grad_norm": 0.6031879186630249, "learning_rate": 1.0418089341320902e-05, "loss": 1.4411, "step": 14253 }, { "epoch": 0.7944930605874811, "grad_norm": 0.5635674595832825, "learning_rate": 1.0412649568501487e-05, "loss": 1.6224, "step": 14254 }, { "epoch": 0.7945487988406443, "grad_norm": 0.56245356798172, "learning_rate": 1.0407211051162024e-05, "loss": 1.5888, "step": 14255 }, { "epoch": 0.7946045370938075, "grad_norm": 0.5387111902236938, "learning_rate": 1.0401773789474994e-05, "loss": 1.6535, "step": 14256 }, { "epoch": 0.7946602753469706, "grad_norm": 0.5577722191810608, "learning_rate": 1.0396337783612797e-05, "loss": 1.4711, "step": 14257 }, { "epoch": 0.7947160136001338, "grad_norm": 0.6203587651252747, "learning_rate": 1.0390903033747879e-05, "loss": 1.838, "step": 14258 }, { "epoch": 0.794771751853297, "grad_norm": 0.5994099378585815, "learning_rate": 1.0385469540052589e-05, "loss": 1.7793, "step": 14259 }, { "epoch": 0.79482749010646, "grad_norm": 0.5388332009315491, "learning_rate": 1.0380037302699225e-05, "loss": 1.6085, "step": 14260 }, { "epoch": 0.7948832283596232, "grad_norm": 0.5807412266731262, "learning_rate": 1.0374606321860076e-05, "loss": 1.8404, "step": 14261 }, { "epoch": 0.7949389666127864, "grad_norm": 0.5289828777313232, "learning_rate": 1.0369176597707386e-05, "loss": 1.4937, "step": 14262 }, { "epoch": 0.7949947048659495, "grad_norm": 0.5667517781257629, "learning_rate": 1.0363748130413358e-05, "loss": 1.7009, "step": 14263 }, { "epoch": 0.7950504431191127, "grad_norm": 0.5320255160331726, "learning_rate": 1.0358320920150132e-05, "loss": 1.5766, "step": 14264 }, { "epoch": 0.7951061813722758, "grad_norm": 0.5835577845573425, "learning_rate": 1.0352894967089833e-05, "loss": 1.6995, "step": 14265 }, { "epoch": 0.7951619196254389, "grad_norm": 0.6064572334289551, "learning_rate": 1.0347470271404569e-05, "loss": 1.7647, "step": 14266 }, { "epoch": 0.7952176578786021, "grad_norm": 0.5594108700752258, "learning_rate": 1.0342046833266339e-05, "loss": 1.3858, "step": 14267 }, { "epoch": 0.7952733961317652, "grad_norm": 0.5946968793869019, "learning_rate": 1.033662465284717e-05, "loss": 1.7127, "step": 14268 }, { "epoch": 0.7953291343849284, "grad_norm": 0.5593485236167908, "learning_rate": 1.033120373031901e-05, "loss": 1.5011, "step": 14269 }, { "epoch": 0.7953848726380915, "grad_norm": 0.5262752771377563, "learning_rate": 1.0325784065853783e-05, "loss": 1.5378, "step": 14270 }, { "epoch": 0.7954406108912546, "grad_norm": 0.5922139883041382, "learning_rate": 1.0320365659623377e-05, "loss": 1.7753, "step": 14271 }, { "epoch": 0.7954963491444178, "grad_norm": 0.5745583176612854, "learning_rate": 1.0314948511799605e-05, "loss": 1.7533, "step": 14272 }, { "epoch": 0.795552087397581, "grad_norm": 0.608664870262146, "learning_rate": 1.0309532622554308e-05, "loss": 1.7726, "step": 14273 }, { "epoch": 0.7956078256507441, "grad_norm": 0.5508156418800354, "learning_rate": 1.0304117992059215e-05, "loss": 1.7032, "step": 14274 }, { "epoch": 0.7956635639039072, "grad_norm": 0.5590789318084717, "learning_rate": 1.0298704620486055e-05, "loss": 1.648, "step": 14275 }, { "epoch": 0.7957193021570704, "grad_norm": 0.6094940304756165, "learning_rate": 1.0293292508006507e-05, "loss": 1.8069, "step": 14276 }, { "epoch": 0.7957750404102335, "grad_norm": 0.5808109045028687, "learning_rate": 1.028788165479222e-05, "loss": 1.643, "step": 14277 }, { "epoch": 0.7958307786633967, "grad_norm": 0.5147292613983154, "learning_rate": 1.0282472061014797e-05, "loss": 1.4351, "step": 14278 }, { "epoch": 0.7958865169165599, "grad_norm": 0.5710453391075134, "learning_rate": 1.0277063726845781e-05, "loss": 1.6967, "step": 14279 }, { "epoch": 0.795942255169723, "grad_norm": 0.5748862624168396, "learning_rate": 1.02716566524567e-05, "loss": 1.7828, "step": 14280 }, { "epoch": 0.7959979934228861, "grad_norm": 0.5527694225311279, "learning_rate": 1.0266250838019036e-05, "loss": 1.5625, "step": 14281 }, { "epoch": 0.7960537316760493, "grad_norm": 0.558049738407135, "learning_rate": 1.026084628370425e-05, "loss": 1.607, "step": 14282 }, { "epoch": 0.7961094699292124, "grad_norm": 0.6232607364654541, "learning_rate": 1.0255442989683694e-05, "loss": 1.8911, "step": 14283 }, { "epoch": 0.7961652081823756, "grad_norm": 0.598755419254303, "learning_rate": 1.0250040956128776e-05, "loss": 1.6691, "step": 14284 }, { "epoch": 0.7962209464355388, "grad_norm": 0.5317803025245667, "learning_rate": 1.0244640183210814e-05, "loss": 1.6303, "step": 14285 }, { "epoch": 0.7962766846887018, "grad_norm": 0.5924306511878967, "learning_rate": 1.0239240671101063e-05, "loss": 1.7553, "step": 14286 }, { "epoch": 0.796332422941865, "grad_norm": 0.5458486676216125, "learning_rate": 1.0233842419970773e-05, "loss": 1.58, "step": 14287 }, { "epoch": 0.7963881611950282, "grad_norm": 0.5232000350952148, "learning_rate": 1.0228445429991151e-05, "loss": 1.5677, "step": 14288 }, { "epoch": 0.7964438994481913, "grad_norm": 0.6367863416671753, "learning_rate": 1.0223049701333371e-05, "loss": 1.8649, "step": 14289 }, { "epoch": 0.7964996377013545, "grad_norm": 0.5690382719039917, "learning_rate": 1.0217655234168522e-05, "loss": 1.7821, "step": 14290 }, { "epoch": 0.7965553759545175, "grad_norm": 0.591062605381012, "learning_rate": 1.0212262028667686e-05, "loss": 1.7406, "step": 14291 }, { "epoch": 0.7966111142076807, "grad_norm": 0.5326418876647949, "learning_rate": 1.0206870085001952e-05, "loss": 1.6126, "step": 14292 }, { "epoch": 0.7966668524608439, "grad_norm": 0.5550294518470764, "learning_rate": 1.0201479403342273e-05, "loss": 1.5392, "step": 14293 }, { "epoch": 0.796722590714007, "grad_norm": 0.5567722916603088, "learning_rate": 1.0196089983859624e-05, "loss": 1.4339, "step": 14294 }, { "epoch": 0.7967783289671702, "grad_norm": 0.5251907110214233, "learning_rate": 1.0190701826724929e-05, "loss": 1.6336, "step": 14295 }, { "epoch": 0.7968340672203333, "grad_norm": 0.5811560750007629, "learning_rate": 1.0185314932109069e-05, "loss": 1.5615, "step": 14296 }, { "epoch": 0.7968898054734964, "grad_norm": 0.5523189306259155, "learning_rate": 1.01799293001829e-05, "loss": 1.4878, "step": 14297 }, { "epoch": 0.7969455437266596, "grad_norm": 0.5699687004089355, "learning_rate": 1.0174544931117175e-05, "loss": 1.6865, "step": 14298 }, { "epoch": 0.7970012819798228, "grad_norm": 0.5722973942756653, "learning_rate": 1.0169161825082718e-05, "loss": 1.6865, "step": 14299 }, { "epoch": 0.7970570202329859, "grad_norm": 0.5504626631736755, "learning_rate": 1.0163779982250199e-05, "loss": 1.6237, "step": 14300 }, { "epoch": 0.797112758486149, "grad_norm": 0.6280822157859802, "learning_rate": 1.015839940279032e-05, "loss": 1.7827, "step": 14301 }, { "epoch": 0.7971684967393122, "grad_norm": 0.6179702281951904, "learning_rate": 1.015302008687372e-05, "loss": 1.4354, "step": 14302 }, { "epoch": 0.7972242349924753, "grad_norm": 0.5778931975364685, "learning_rate": 1.0147642034670996e-05, "loss": 1.6831, "step": 14303 }, { "epoch": 0.7972799732456385, "grad_norm": 0.5538243055343628, "learning_rate": 1.0142265246352728e-05, "loss": 1.7572, "step": 14304 }, { "epoch": 0.7973357114988017, "grad_norm": 0.5653696656227112, "learning_rate": 1.0136889722089404e-05, "loss": 1.6094, "step": 14305 }, { "epoch": 0.7973914497519647, "grad_norm": 0.6097986698150635, "learning_rate": 1.0131515462051521e-05, "loss": 1.9664, "step": 14306 }, { "epoch": 0.7974471880051279, "grad_norm": 0.573856770992279, "learning_rate": 1.0126142466409517e-05, "loss": 1.7449, "step": 14307 }, { "epoch": 0.7975029262582911, "grad_norm": 0.5199556946754456, "learning_rate": 1.0120770735333807e-05, "loss": 1.4495, "step": 14308 }, { "epoch": 0.7975586645114542, "grad_norm": 0.5416279435157776, "learning_rate": 1.0115400268994713e-05, "loss": 1.5221, "step": 14309 }, { "epoch": 0.7976144027646174, "grad_norm": 0.5629909634590149, "learning_rate": 1.0110031067562592e-05, "loss": 1.4912, "step": 14310 }, { "epoch": 0.7976701410177806, "grad_norm": 0.5700094699859619, "learning_rate": 1.010466313120772e-05, "loss": 1.7532, "step": 14311 }, { "epoch": 0.7977258792709436, "grad_norm": 0.5885013937950134, "learning_rate": 1.0099296460100322e-05, "loss": 1.7185, "step": 14312 }, { "epoch": 0.7977816175241068, "grad_norm": 0.5609301328659058, "learning_rate": 1.0093931054410594e-05, "loss": 1.5657, "step": 14313 }, { "epoch": 0.7978373557772699, "grad_norm": 0.5494312047958374, "learning_rate": 1.008856691430871e-05, "loss": 1.6364, "step": 14314 }, { "epoch": 0.7978930940304331, "grad_norm": 0.5229134559631348, "learning_rate": 1.0083204039964794e-05, "loss": 1.5217, "step": 14315 }, { "epoch": 0.7979488322835963, "grad_norm": 0.573517918586731, "learning_rate": 1.0077842431548906e-05, "loss": 1.7008, "step": 14316 }, { "epoch": 0.7980045705367593, "grad_norm": 0.5787200927734375, "learning_rate": 1.0072482089231078e-05, "loss": 1.5748, "step": 14317 }, { "epoch": 0.7980603087899225, "grad_norm": 0.5575430989265442, "learning_rate": 1.006712301318135e-05, "loss": 1.6165, "step": 14318 }, { "epoch": 0.7981160470430857, "grad_norm": 0.49361756443977356, "learning_rate": 1.0061765203569639e-05, "loss": 1.1837, "step": 14319 }, { "epoch": 0.7981717852962488, "grad_norm": 0.5558135509490967, "learning_rate": 1.0056408660565885e-05, "loss": 1.7098, "step": 14320 }, { "epoch": 0.798227523549412, "grad_norm": 0.6294339895248413, "learning_rate": 1.0051053384339959e-05, "loss": 1.8916, "step": 14321 }, { "epoch": 0.7982832618025751, "grad_norm": 0.544558048248291, "learning_rate": 1.0045699375061701e-05, "loss": 1.6068, "step": 14322 }, { "epoch": 0.7983390000557382, "grad_norm": 0.6012967228889465, "learning_rate": 1.0040346632900921e-05, "loss": 1.5894, "step": 14323 }, { "epoch": 0.7983947383089014, "grad_norm": 0.5851178765296936, "learning_rate": 1.0034995158027343e-05, "loss": 1.634, "step": 14324 }, { "epoch": 0.7984504765620646, "grad_norm": 0.5557059049606323, "learning_rate": 1.0029644950610728e-05, "loss": 1.6313, "step": 14325 }, { "epoch": 0.7985062148152277, "grad_norm": 0.5574374198913574, "learning_rate": 1.0024296010820721e-05, "loss": 1.5917, "step": 14326 }, { "epoch": 0.7985619530683908, "grad_norm": 0.5546873807907104, "learning_rate": 1.0018948338826972e-05, "loss": 1.6068, "step": 14327 }, { "epoch": 0.798617691321554, "grad_norm": 0.5635491013526917, "learning_rate": 1.0013601934799072e-05, "loss": 1.7385, "step": 14328 }, { "epoch": 0.7986734295747171, "grad_norm": 0.5756046175956726, "learning_rate": 1.0008256798906585e-05, "loss": 1.6175, "step": 14329 }, { "epoch": 0.7987291678278803, "grad_norm": 0.5956593155860901, "learning_rate": 1.0002912931319036e-05, "loss": 1.623, "step": 14330 }, { "epoch": 0.7987849060810435, "grad_norm": 0.5440784096717834, "learning_rate": 9.997570332205875e-06, "loss": 1.4983, "step": 14331 }, { "epoch": 0.7988406443342065, "grad_norm": 0.5485489964485168, "learning_rate": 9.992229001736553e-06, "loss": 1.435, "step": 14332 }, { "epoch": 0.7988963825873697, "grad_norm": 0.5302622318267822, "learning_rate": 9.986888940080468e-06, "loss": 1.4607, "step": 14333 }, { "epoch": 0.7989521208405329, "grad_norm": 0.5820941925048828, "learning_rate": 9.981550147406987e-06, "loss": 1.556, "step": 14334 }, { "epoch": 0.799007859093696, "grad_norm": 0.5871179103851318, "learning_rate": 9.976212623885384e-06, "loss": 1.668, "step": 14335 }, { "epoch": 0.7990635973468592, "grad_norm": 0.5687511563301086, "learning_rate": 9.970876369684973e-06, "loss": 1.6566, "step": 14336 }, { "epoch": 0.7991193356000222, "grad_norm": 0.5481507182121277, "learning_rate": 9.96554138497499e-06, "loss": 1.2366, "step": 14337 }, { "epoch": 0.7991750738531854, "grad_norm": 0.6357203722000122, "learning_rate": 9.960207669924603e-06, "loss": 1.916, "step": 14338 }, { "epoch": 0.7992308121063486, "grad_norm": NaN, "learning_rate": 9.960207669924603e-06, "loss": 1.5393, "step": 14339 }, { "epoch": 0.7992865503595117, "grad_norm": 0.5581541657447815, "learning_rate": 9.954875224702986e-06, "loss": 1.5587, "step": 14340 }, { "epoch": 0.7993422886126749, "grad_norm": 0.5387499332427979, "learning_rate": 9.949544049479247e-06, "loss": 1.4697, "step": 14341 }, { "epoch": 0.799398026865838, "grad_norm": 0.49244141578674316, "learning_rate": 9.94421414442247e-06, "loss": 1.3914, "step": 14342 }, { "epoch": 0.7994537651190011, "grad_norm": 0.5456081628799438, "learning_rate": 9.938885509701657e-06, "loss": 1.4728, "step": 14343 }, { "epoch": 0.7995095033721643, "grad_norm": 0.5390294790267944, "learning_rate": 9.933558145485833e-06, "loss": 1.3721, "step": 14344 }, { "epoch": 0.7995652416253275, "grad_norm": 0.5985755920410156, "learning_rate": 9.928232051943953e-06, "loss": 1.7802, "step": 14345 }, { "epoch": 0.7996209798784906, "grad_norm": 0.5658624172210693, "learning_rate": 9.922907229244904e-06, "loss": 1.4227, "step": 14346 }, { "epoch": 0.7996767181316538, "grad_norm": 0.5901734232902527, "learning_rate": 9.917583677557574e-06, "loss": 1.6922, "step": 14347 }, { "epoch": 0.7997324563848169, "grad_norm": 0.5785011649131775, "learning_rate": 9.912261397050792e-06, "loss": 1.5325, "step": 14348 }, { "epoch": 0.79978819463798, "grad_norm": 0.5889132618904114, "learning_rate": 9.906940387893354e-06, "loss": 1.7558, "step": 14349 }, { "epoch": 0.7998439328911432, "grad_norm": 0.581152617931366, "learning_rate": 9.901620650254017e-06, "loss": 1.3819, "step": 14350 }, { "epoch": 0.7998996711443064, "grad_norm": 0.548949122428894, "learning_rate": 9.896302184301465e-06, "loss": 1.7104, "step": 14351 }, { "epoch": 0.7999554093974695, "grad_norm": 0.5606316924095154, "learning_rate": 9.890984990204404e-06, "loss": 1.7376, "step": 14352 }, { "epoch": 0.8000111476506326, "grad_norm": 0.6483362317085266, "learning_rate": 9.885669068131437e-06, "loss": 2.0308, "step": 14353 }, { "epoch": 0.8000668859037958, "grad_norm": 0.5594815611839294, "learning_rate": 9.880354418251165e-06, "loss": 1.4996, "step": 14354 }, { "epoch": 0.8001226241569589, "grad_norm": 0.5642004609107971, "learning_rate": 9.875041040732136e-06, "loss": 1.6186, "step": 14355 }, { "epoch": 0.8001783624101221, "grad_norm": 0.5526056289672852, "learning_rate": 9.869728935742862e-06, "loss": 1.4683, "step": 14356 }, { "epoch": 0.8002341006632853, "grad_norm": 0.6208131313323975, "learning_rate": 9.864418103451828e-06, "loss": 1.8107, "step": 14357 }, { "epoch": 0.8002898389164483, "grad_norm": 0.5653442740440369, "learning_rate": 9.859108544027423e-06, "loss": 1.6458, "step": 14358 }, { "epoch": 0.8003455771696115, "grad_norm": 0.5809319615364075, "learning_rate": 9.853800257638063e-06, "loss": 1.6334, "step": 14359 }, { "epoch": 0.8004013154227746, "grad_norm": 0.543079137802124, "learning_rate": 9.848493244452089e-06, "loss": 1.6904, "step": 14360 }, { "epoch": 0.8004570536759378, "grad_norm": 0.5740684270858765, "learning_rate": 9.843187504637824e-06, "loss": 1.6743, "step": 14361 }, { "epoch": 0.800512791929101, "grad_norm": 0.5502151846885681, "learning_rate": 9.837883038363494e-06, "loss": 1.6923, "step": 14362 }, { "epoch": 0.800568530182264, "grad_norm": 0.6467139720916748, "learning_rate": 9.832579845797362e-06, "loss": 1.858, "step": 14363 }, { "epoch": 0.8006242684354272, "grad_norm": 0.5684570670127869, "learning_rate": 9.82727792710762e-06, "loss": 1.6314, "step": 14364 }, { "epoch": 0.8006800066885904, "grad_norm": 0.5606323480606079, "learning_rate": 9.821977282462387e-06, "loss": 1.5115, "step": 14365 }, { "epoch": 0.8007357449417535, "grad_norm": 0.5373196005821228, "learning_rate": 9.81667791202978e-06, "loss": 1.5325, "step": 14366 }, { "epoch": 0.8007914831949167, "grad_norm": 0.5519532561302185, "learning_rate": 9.811379815977866e-06, "loss": 1.4287, "step": 14367 }, { "epoch": 0.8008472214480798, "grad_norm": 0.5695307850837708, "learning_rate": 9.80608299447468e-06, "loss": 1.7301, "step": 14368 }, { "epoch": 0.8009029597012429, "grad_norm": 0.5454866290092468, "learning_rate": 9.80078744768817e-06, "loss": 1.4684, "step": 14369 }, { "epoch": 0.8009586979544061, "grad_norm": 0.5738468766212463, "learning_rate": 9.795493175786318e-06, "loss": 1.6985, "step": 14370 }, { "epoch": 0.8010144362075693, "grad_norm": 0.5349743962287903, "learning_rate": 9.790200178937026e-06, "loss": 1.5258, "step": 14371 }, { "epoch": 0.8010701744607324, "grad_norm": 0.5547036528587341, "learning_rate": 9.784908457308128e-06, "loss": 1.6424, "step": 14372 }, { "epoch": 0.8011259127138955, "grad_norm": 0.5633455514907837, "learning_rate": 9.779618011067471e-06, "loss": 1.5519, "step": 14373 }, { "epoch": 0.8011816509670587, "grad_norm": 0.5618358254432678, "learning_rate": 9.774328840382824e-06, "loss": 1.5504, "step": 14374 }, { "epoch": 0.8012373892202218, "grad_norm": 0.5330274105072021, "learning_rate": 9.769040945421948e-06, "loss": 1.5899, "step": 14375 }, { "epoch": 0.801293127473385, "grad_norm": 0.5382915735244751, "learning_rate": 9.76375432635252e-06, "loss": 1.6959, "step": 14376 }, { "epoch": 0.8013488657265482, "grad_norm": 0.541482150554657, "learning_rate": 9.758468983342194e-06, "loss": 1.6766, "step": 14377 }, { "epoch": 0.8014046039797112, "grad_norm": 0.55217045545578, "learning_rate": 9.753184916558633e-06, "loss": 1.4319, "step": 14378 }, { "epoch": 0.8014603422328744, "grad_norm": 0.5737308859825134, "learning_rate": 9.747902126169383e-06, "loss": 1.8583, "step": 14379 }, { "epoch": 0.8015160804860376, "grad_norm": 0.5124214291572571, "learning_rate": 9.74262061234199e-06, "loss": 1.4291, "step": 14380 }, { "epoch": 0.8015718187392007, "grad_norm": 0.5909250378608704, "learning_rate": 9.737340375243953e-06, "loss": 1.6524, "step": 14381 }, { "epoch": 0.8016275569923639, "grad_norm": 0.5784448981285095, "learning_rate": 9.732061415042732e-06, "loss": 1.7465, "step": 14382 }, { "epoch": 0.801683295245527, "grad_norm": 0.5897037982940674, "learning_rate": 9.726783731905759e-06, "loss": 1.6151, "step": 14383 }, { "epoch": 0.8017390334986901, "grad_norm": 0.5656660795211792, "learning_rate": 9.721507326000383e-06, "loss": 1.5089, "step": 14384 }, { "epoch": 0.8017947717518533, "grad_norm": 0.5050958395004272, "learning_rate": 9.716232197493957e-06, "loss": 1.3962, "step": 14385 }, { "epoch": 0.8018505100050164, "grad_norm": 0.5602197647094727, "learning_rate": 9.710958346553772e-06, "loss": 1.6131, "step": 14386 }, { "epoch": 0.8019062482581796, "grad_norm": 0.5962628722190857, "learning_rate": 9.705685773347101e-06, "loss": 1.5955, "step": 14387 }, { "epoch": 0.8019619865113428, "grad_norm": 0.5173510313034058, "learning_rate": 9.70041447804112e-06, "loss": 1.4416, "step": 14388 }, { "epoch": 0.8020177247645058, "grad_norm": 0.5978318452835083, "learning_rate": 9.69514446080304e-06, "loss": 1.4565, "step": 14389 }, { "epoch": 0.802073463017669, "grad_norm": 0.5639249086380005, "learning_rate": 9.689875721799995e-06, "loss": 1.6088, "step": 14390 }, { "epoch": 0.8021292012708322, "grad_norm": 0.5586134195327759, "learning_rate": 9.684608261199058e-06, "loss": 1.6619, "step": 14391 }, { "epoch": 0.8021849395239953, "grad_norm": 0.6344155669212341, "learning_rate": 9.679342079167291e-06, "loss": 1.884, "step": 14392 }, { "epoch": 0.8022406777771585, "grad_norm": 0.5842048525810242, "learning_rate": 9.674077175871709e-06, "loss": 1.6299, "step": 14393 }, { "epoch": 0.8022964160303216, "grad_norm": 0.5197391510009766, "learning_rate": 9.668813551479295e-06, "loss": 1.5954, "step": 14394 }, { "epoch": 0.8023521542834847, "grad_norm": 0.5479873418807983, "learning_rate": 9.66355120615694e-06, "loss": 1.6079, "step": 14395 }, { "epoch": 0.8024078925366479, "grad_norm": 0.544422447681427, "learning_rate": 9.65829014007158e-06, "loss": 1.5324, "step": 14396 }, { "epoch": 0.8024636307898111, "grad_norm": 0.5572550892829895, "learning_rate": 9.653030353390058e-06, "loss": 1.919, "step": 14397 }, { "epoch": 0.8025193690429742, "grad_norm": 0.5628582835197449, "learning_rate": 9.647771846279162e-06, "loss": 1.5565, "step": 14398 }, { "epoch": 0.8025751072961373, "grad_norm": 0.562232255935669, "learning_rate": 9.642514618905673e-06, "loss": 1.429, "step": 14399 }, { "epoch": 0.8026308455493005, "grad_norm": 0.588840663433075, "learning_rate": 9.637258671436317e-06, "loss": 1.6777, "step": 14400 }, { "epoch": 0.8026865838024636, "grad_norm": 0.542707085609436, "learning_rate": 9.632004004037804e-06, "loss": 1.4368, "step": 14401 }, { "epoch": 0.8027423220556268, "grad_norm": 0.5453609824180603, "learning_rate": 9.626750616876745e-06, "loss": 1.4163, "step": 14402 }, { "epoch": 0.80279806030879, "grad_norm": 0.5559371113777161, "learning_rate": 9.621498510119754e-06, "loss": 1.5634, "step": 14403 }, { "epoch": 0.802853798561953, "grad_norm": 0.5142374038696289, "learning_rate": 9.616247683933428e-06, "loss": 1.4767, "step": 14404 }, { "epoch": 0.8029095368151162, "grad_norm": 0.5711025595664978, "learning_rate": 9.610998138484262e-06, "loss": 1.753, "step": 14405 }, { "epoch": 0.8029652750682793, "grad_norm": 0.5574143528938293, "learning_rate": 9.605749873938752e-06, "loss": 1.6291, "step": 14406 }, { "epoch": 0.8030210133214425, "grad_norm": 0.5474604368209839, "learning_rate": 9.600502890463341e-06, "loss": 1.658, "step": 14407 }, { "epoch": 0.8030767515746057, "grad_norm": 0.6181548833847046, "learning_rate": 9.595257188224433e-06, "loss": 1.8136, "step": 14408 }, { "epoch": 0.8031324898277687, "grad_norm": 0.5687413215637207, "learning_rate": 9.590012767388402e-06, "loss": 1.3871, "step": 14409 }, { "epoch": 0.8031882280809319, "grad_norm": 0.5563036799430847, "learning_rate": 9.584769628121548e-06, "loss": 1.4633, "step": 14410 }, { "epoch": 0.8032439663340951, "grad_norm": 0.5676126480102539, "learning_rate": 9.579527770590163e-06, "loss": 1.7256, "step": 14411 }, { "epoch": 0.8032997045872582, "grad_norm": 0.5939561128616333, "learning_rate": 9.574287194960491e-06, "loss": 1.5641, "step": 14412 }, { "epoch": 0.8033554428404214, "grad_norm": 0.5618433356285095, "learning_rate": 9.569047901398742e-06, "loss": 1.5298, "step": 14413 }, { "epoch": 0.8034111810935846, "grad_norm": 0.5444064736366272, "learning_rate": 9.56380989007104e-06, "loss": 1.5445, "step": 14414 }, { "epoch": 0.8034669193467476, "grad_norm": 0.5717688202857971, "learning_rate": 9.558573161143542e-06, "loss": 1.7519, "step": 14415 }, { "epoch": 0.8035226575999108, "grad_norm": 0.5950873494148254, "learning_rate": 9.553337714782324e-06, "loss": 1.7039, "step": 14416 }, { "epoch": 0.803578395853074, "grad_norm": 0.5963200330734253, "learning_rate": 9.548103551153403e-06, "loss": 1.6528, "step": 14417 }, { "epoch": 0.8036341341062371, "grad_norm": 0.5930481553077698, "learning_rate": 9.542870670422787e-06, "loss": 1.6934, "step": 14418 }, { "epoch": 0.8036898723594003, "grad_norm": 0.5762895941734314, "learning_rate": 9.537639072756432e-06, "loss": 1.5557, "step": 14419 }, { "epoch": 0.8037456106125634, "grad_norm": 0.5235835909843445, "learning_rate": 9.532408758320267e-06, "loss": 1.5267, "step": 14420 }, { "epoch": 0.8038013488657265, "grad_norm": 0.5825777649879456, "learning_rate": 9.527179727280122e-06, "loss": 1.8426, "step": 14421 }, { "epoch": 0.8038570871188897, "grad_norm": 0.5870946645736694, "learning_rate": 9.52195197980188e-06, "loss": 1.8076, "step": 14422 }, { "epoch": 0.8039128253720529, "grad_norm": 0.5672115683555603, "learning_rate": 9.516725516051333e-06, "loss": 1.6287, "step": 14423 }, { "epoch": 0.803968563625216, "grad_norm": 0.5582349896430969, "learning_rate": 9.5115003361942e-06, "loss": 1.6721, "step": 14424 }, { "epoch": 0.8040243018783791, "grad_norm": 0.5517037510871887, "learning_rate": 9.506276440396223e-06, "loss": 1.6215, "step": 14425 }, { "epoch": 0.8040800401315423, "grad_norm": 0.5543230772018433, "learning_rate": 9.501053828823053e-06, "loss": 1.7268, "step": 14426 }, { "epoch": 0.8041357783847054, "grad_norm": 0.6084774732589722, "learning_rate": 9.495832501640344e-06, "loss": 1.7804, "step": 14427 }, { "epoch": 0.8041915166378686, "grad_norm": 0.5827273726463318, "learning_rate": 9.490612459013664e-06, "loss": 1.6868, "step": 14428 }, { "epoch": 0.8042472548910317, "grad_norm": 0.5763819217681885, "learning_rate": 9.485393701108552e-06, "loss": 1.4381, "step": 14429 }, { "epoch": 0.8043029931441948, "grad_norm": 0.5460847616195679, "learning_rate": 9.480176228090566e-06, "loss": 1.6462, "step": 14430 }, { "epoch": 0.804358731397358, "grad_norm": 0.5776088833808899, "learning_rate": 9.47496004012513e-06, "loss": 1.6948, "step": 14431 }, { "epoch": 0.8044144696505211, "grad_norm": 0.5703887343406677, "learning_rate": 9.469745137377678e-06, "loss": 1.5765, "step": 14432 }, { "epoch": 0.8044702079036843, "grad_norm": 0.5882792472839355, "learning_rate": 9.464531520013608e-06, "loss": 1.463, "step": 14433 }, { "epoch": 0.8045259461568475, "grad_norm": 0.5276558995246887, "learning_rate": 9.459319188198262e-06, "loss": 1.4668, "step": 14434 }, { "epoch": 0.8045816844100105, "grad_norm": 0.5833683609962463, "learning_rate": 9.454108142096951e-06, "loss": 1.7968, "step": 14435 }, { "epoch": 0.8046374226631737, "grad_norm": 0.5308690667152405, "learning_rate": 9.448898381874904e-06, "loss": 1.5935, "step": 14436 }, { "epoch": 0.8046931609163369, "grad_norm": 0.6034372448921204, "learning_rate": 9.4436899076974e-06, "loss": 1.8367, "step": 14437 }, { "epoch": 0.8047488991695, "grad_norm": 0.5395357012748718, "learning_rate": 9.438482719729579e-06, "loss": 1.726, "step": 14438 }, { "epoch": 0.8048046374226632, "grad_norm": 0.5694220662117004, "learning_rate": 9.43327681813661e-06, "loss": 1.7836, "step": 14439 }, { "epoch": 0.8048603756758264, "grad_norm": 0.5619423389434814, "learning_rate": 9.428072203083554e-06, "loss": 1.7362, "step": 14440 }, { "epoch": 0.8049161139289894, "grad_norm": 0.5950040817260742, "learning_rate": 9.422868874735507e-06, "loss": 1.8533, "step": 14441 }, { "epoch": 0.8049718521821526, "grad_norm": 0.5778230428695679, "learning_rate": 9.417666833257493e-06, "loss": 1.5445, "step": 14442 }, { "epoch": 0.8050275904353158, "grad_norm": 0.5824107527732849, "learning_rate": 9.412466078814463e-06, "loss": 1.7809, "step": 14443 }, { "epoch": 0.8050833286884789, "grad_norm": 0.5468677282333374, "learning_rate": 9.407266611571368e-06, "loss": 1.547, "step": 14444 }, { "epoch": 0.8051390669416421, "grad_norm": 0.5690337419509888, "learning_rate": 9.402068431693101e-06, "loss": 1.5924, "step": 14445 }, { "epoch": 0.8051948051948052, "grad_norm": 0.5694676637649536, "learning_rate": 9.396871539344537e-06, "loss": 1.5457, "step": 14446 }, { "epoch": 0.8052505434479683, "grad_norm": 0.5355550050735474, "learning_rate": 9.391675934690447e-06, "loss": 1.3105, "step": 14447 }, { "epoch": 0.8053062817011315, "grad_norm": 0.6325549483299255, "learning_rate": 9.386481617895648e-06, "loss": 1.9536, "step": 14448 }, { "epoch": 0.8053620199542947, "grad_norm": 0.5932197570800781, "learning_rate": 9.381288589124876e-06, "loss": 1.5554, "step": 14449 }, { "epoch": 0.8054177582074578, "grad_norm": 0.5165389180183411, "learning_rate": 9.376096848542788e-06, "loss": 1.3512, "step": 14450 }, { "epoch": 0.8054734964606209, "grad_norm": 0.5656865835189819, "learning_rate": 9.370906396314055e-06, "loss": 1.5164, "step": 14451 }, { "epoch": 0.805529234713784, "grad_norm": 0.5801335573196411, "learning_rate": 9.365717232603283e-06, "loss": 1.4119, "step": 14452 }, { "epoch": 0.8055849729669472, "grad_norm": 0.5520214438438416, "learning_rate": 9.360529357575066e-06, "loss": 1.5208, "step": 14453 }, { "epoch": 0.8056407112201104, "grad_norm": 0.5672596096992493, "learning_rate": 9.3553427713939e-06, "loss": 1.5729, "step": 14454 }, { "epoch": 0.8056964494732735, "grad_norm": 0.534829318523407, "learning_rate": 9.350157474224268e-06, "loss": 1.615, "step": 14455 }, { "epoch": 0.8057521877264366, "grad_norm": 0.5782783627510071, "learning_rate": 9.344973466230667e-06, "loss": 1.5061, "step": 14456 }, { "epoch": 0.8058079259795998, "grad_norm": 0.5374855399131775, "learning_rate": 9.339790747577453e-06, "loss": 1.2955, "step": 14457 }, { "epoch": 0.8058636642327629, "grad_norm": 0.5761247277259827, "learning_rate": 9.334609318429016e-06, "loss": 1.6353, "step": 14458 }, { "epoch": 0.8059194024859261, "grad_norm": 0.5449190735816956, "learning_rate": 9.329429178949678e-06, "loss": 1.7109, "step": 14459 }, { "epoch": 0.8059751407390893, "grad_norm": 0.5729144215583801, "learning_rate": 9.324250329303713e-06, "loss": 1.4907, "step": 14460 }, { "epoch": 0.8060308789922523, "grad_norm": 0.5700400471687317, "learning_rate": 9.31907276965539e-06, "loss": 1.6438, "step": 14461 }, { "epoch": 0.8060866172454155, "grad_norm": 0.5756001472473145, "learning_rate": 9.313896500168867e-06, "loss": 1.6177, "step": 14462 }, { "epoch": 0.8061423554985787, "grad_norm": 0.5858460664749146, "learning_rate": 9.308721521008357e-06, "loss": 1.7162, "step": 14463 }, { "epoch": 0.8061980937517418, "grad_norm": 0.5806597471237183, "learning_rate": 9.303547832337934e-06, "loss": 1.8492, "step": 14464 }, { "epoch": 0.806253832004905, "grad_norm": 0.5977433323860168, "learning_rate": 9.298375434321716e-06, "loss": 1.7473, "step": 14465 }, { "epoch": 0.8063095702580682, "grad_norm": 0.5730159282684326, "learning_rate": 9.293204327123694e-06, "loss": 1.5024, "step": 14466 }, { "epoch": 0.8063653085112312, "grad_norm": 0.5740247368812561, "learning_rate": 9.288034510907912e-06, "loss": 1.6197, "step": 14467 }, { "epoch": 0.8064210467643944, "grad_norm": 0.5691631436347961, "learning_rate": 9.282865985838313e-06, "loss": 1.7008, "step": 14468 }, { "epoch": 0.8064767850175576, "grad_norm": 0.5945144295692444, "learning_rate": 9.277698752078801e-06, "loss": 1.8471, "step": 14469 }, { "epoch": 0.8065325232707207, "grad_norm": 0.5495025515556335, "learning_rate": 9.272532809793254e-06, "loss": 1.5663, "step": 14470 }, { "epoch": 0.8065882615238839, "grad_norm": 0.5286274552345276, "learning_rate": 9.267368159145506e-06, "loss": 1.4549, "step": 14471 }, { "epoch": 0.806643999777047, "grad_norm": 0.5482826232910156, "learning_rate": 9.262204800299373e-06, "loss": 1.4818, "step": 14472 }, { "epoch": 0.8066997380302101, "grad_norm": 0.5395148992538452, "learning_rate": 9.257042733418552e-06, "loss": 1.5863, "step": 14473 }, { "epoch": 0.8067554762833733, "grad_norm": 0.5677280426025391, "learning_rate": 9.251881958666802e-06, "loss": 1.461, "step": 14474 }, { "epoch": 0.8068112145365364, "grad_norm": 0.5757277011871338, "learning_rate": 9.246722476207797e-06, "loss": 1.4981, "step": 14475 }, { "epoch": 0.8068669527896996, "grad_norm": 0.5508648157119751, "learning_rate": 9.24156428620513e-06, "loss": 1.6121, "step": 14476 }, { "epoch": 0.8069226910428627, "grad_norm": 0.5794610977172852, "learning_rate": 9.236407388822405e-06, "loss": 1.5065, "step": 14477 }, { "epoch": 0.8069784292960258, "grad_norm": 0.5588470101356506, "learning_rate": 9.23125178422317e-06, "loss": 1.47, "step": 14478 }, { "epoch": 0.807034167549189, "grad_norm": 0.5367100834846497, "learning_rate": 9.226097472570943e-06, "loss": 1.4377, "step": 14479 }, { "epoch": 0.8070899058023522, "grad_norm": 0.5730358362197876, "learning_rate": 9.220944454029162e-06, "loss": 1.6211, "step": 14480 }, { "epoch": 0.8071456440555153, "grad_norm": 0.541301429271698, "learning_rate": 9.215792728761253e-06, "loss": 1.5581, "step": 14481 }, { "epoch": 0.8072013823086784, "grad_norm": 0.5392494201660156, "learning_rate": 9.210642296930638e-06, "loss": 1.5311, "step": 14482 }, { "epoch": 0.8072571205618416, "grad_norm": 0.5902514457702637, "learning_rate": 9.205493158700618e-06, "loss": 1.6832, "step": 14483 }, { "epoch": 0.8073128588150047, "grad_norm": 0.5396768450737, "learning_rate": 9.200345314234504e-06, "loss": 1.5659, "step": 14484 }, { "epoch": 0.8073685970681679, "grad_norm": 0.5860647559165955, "learning_rate": 9.195198763695557e-06, "loss": 1.5639, "step": 14485 }, { "epoch": 0.8074243353213311, "grad_norm": 0.6074658632278442, "learning_rate": 9.190053507246999e-06, "loss": 1.7787, "step": 14486 }, { "epoch": 0.8074800735744941, "grad_norm": 0.5613250136375427, "learning_rate": 9.184909545052017e-06, "loss": 1.5598, "step": 14487 }, { "epoch": 0.8075358118276573, "grad_norm": 0.5493916273117065, "learning_rate": 9.17976687727371e-06, "loss": 1.5103, "step": 14488 }, { "epoch": 0.8075915500808205, "grad_norm": 0.578508734703064, "learning_rate": 9.174625504075225e-06, "loss": 1.7456, "step": 14489 }, { "epoch": 0.8076472883339836, "grad_norm": 0.5659584999084473, "learning_rate": 9.169485425619578e-06, "loss": 1.7104, "step": 14490 }, { "epoch": 0.8077030265871468, "grad_norm": 0.6089297533035278, "learning_rate": 9.164346642069804e-06, "loss": 1.814, "step": 14491 }, { "epoch": 0.80775876484031, "grad_norm": 0.5530262589454651, "learning_rate": 9.159209153588849e-06, "loss": 1.6125, "step": 14492 }, { "epoch": 0.807814503093473, "grad_norm": 0.5667465329170227, "learning_rate": 9.154072960339666e-06, "loss": 1.627, "step": 14493 }, { "epoch": 0.8078702413466362, "grad_norm": 0.6102772951126099, "learning_rate": 9.148938062485157e-06, "loss": 1.5063, "step": 14494 }, { "epoch": 0.8079259795997994, "grad_norm": 0.6273038983345032, "learning_rate": 9.143804460188143e-06, "loss": 1.6385, "step": 14495 }, { "epoch": 0.8079817178529625, "grad_norm": 0.554091215133667, "learning_rate": 9.138672153611439e-06, "loss": 1.6554, "step": 14496 }, { "epoch": 0.8080374561061257, "grad_norm": 0.5899942517280579, "learning_rate": 9.133541142917823e-06, "loss": 1.7165, "step": 14497 }, { "epoch": 0.8080931943592887, "grad_norm": 0.6665770411491394, "learning_rate": 9.128411428270018e-06, "loss": 2.0642, "step": 14498 }, { "epoch": 0.8081489326124519, "grad_norm": 0.5284276008605957, "learning_rate": 9.123283009830686e-06, "loss": 1.6783, "step": 14499 }, { "epoch": 0.8082046708656151, "grad_norm": 0.5691483616828918, "learning_rate": 9.118155887762496e-06, "loss": 1.6466, "step": 14500 }, { "epoch": 0.8082604091187782, "grad_norm": 0.5701718330383301, "learning_rate": 9.113030062228063e-06, "loss": 1.4418, "step": 14501 }, { "epoch": 0.8083161473719414, "grad_norm": 0.5520241856575012, "learning_rate": 9.107905533389915e-06, "loss": 1.4944, "step": 14502 }, { "epoch": 0.8083718856251045, "grad_norm": 0.5629130601882935, "learning_rate": 9.102782301410584e-06, "loss": 1.6503, "step": 14503 }, { "epoch": 0.8084276238782676, "grad_norm": 0.5741170644760132, "learning_rate": 9.097660366452548e-06, "loss": 1.7528, "step": 14504 }, { "epoch": 0.8084833621314308, "grad_norm": 0.5423370003700256, "learning_rate": 9.092539728678262e-06, "loss": 1.7108, "step": 14505 }, { "epoch": 0.808539100384594, "grad_norm": 0.5521060228347778, "learning_rate": 9.087420388250101e-06, "loss": 1.5113, "step": 14506 }, { "epoch": 0.808594838637757, "grad_norm": 0.6263614892959595, "learning_rate": 9.082302345330413e-06, "loss": 1.9957, "step": 14507 }, { "epoch": 0.8086505768909202, "grad_norm": 0.5271081328392029, "learning_rate": 9.077185600081551e-06, "loss": 1.4357, "step": 14508 }, { "epoch": 0.8087063151440834, "grad_norm": 0.5640679597854614, "learning_rate": 9.072070152665758e-06, "loss": 1.7057, "step": 14509 }, { "epoch": 0.8087620533972465, "grad_norm": 0.5805985927581787, "learning_rate": 9.066956003245264e-06, "loss": 1.737, "step": 14510 }, { "epoch": 0.8088177916504097, "grad_norm": 0.5537278056144714, "learning_rate": 9.06184315198228e-06, "loss": 1.5738, "step": 14511 }, { "epoch": 0.8088735299035729, "grad_norm": 0.5141084790229797, "learning_rate": 9.056731599038948e-06, "loss": 1.4052, "step": 14512 }, { "epoch": 0.8089292681567359, "grad_norm": 0.6007054448127747, "learning_rate": 9.051621344577371e-06, "loss": 1.9542, "step": 14513 }, { "epoch": 0.8089850064098991, "grad_norm": 0.5462144017219543, "learning_rate": 9.046512388759598e-06, "loss": 1.6902, "step": 14514 }, { "epoch": 0.8090407446630623, "grad_norm": 0.5487377047538757, "learning_rate": 9.041404731747705e-06, "loss": 1.7517, "step": 14515 }, { "epoch": 0.8090964829162254, "grad_norm": 0.6560800075531006, "learning_rate": 9.036298373703638e-06, "loss": 1.7549, "step": 14516 }, { "epoch": 0.8091522211693886, "grad_norm": 0.5306289196014404, "learning_rate": 9.03119331478935e-06, "loss": 1.596, "step": 14517 }, { "epoch": 0.8092079594225517, "grad_norm": 0.5258604884147644, "learning_rate": 9.026089555166745e-06, "loss": 1.5824, "step": 14518 }, { "epoch": 0.8092636976757148, "grad_norm": 0.5263345837593079, "learning_rate": 9.020987094997691e-06, "loss": 1.5729, "step": 14519 }, { "epoch": 0.809319435928878, "grad_norm": 0.6098785400390625, "learning_rate": 9.015885934444007e-06, "loss": 1.7344, "step": 14520 }, { "epoch": 0.8093751741820411, "grad_norm": 0.5672189593315125, "learning_rate": 9.010786073667455e-06, "loss": 1.6726, "step": 14521 }, { "epoch": 0.8094309124352043, "grad_norm": 0.5683502554893494, "learning_rate": 9.005687512829786e-06, "loss": 1.7196, "step": 14522 }, { "epoch": 0.8094866506883674, "grad_norm": 0.5518867373466492, "learning_rate": 9.0005902520927e-06, "loss": 1.6435, "step": 14523 }, { "epoch": 0.8095423889415305, "grad_norm": 0.5819790959358215, "learning_rate": 8.995494291617856e-06, "loss": 1.7616, "step": 14524 }, { "epoch": 0.8095981271946937, "grad_norm": 0.5913581252098083, "learning_rate": 8.990399631566837e-06, "loss": 1.6495, "step": 14525 }, { "epoch": 0.8096538654478569, "grad_norm": 0.5639625787734985, "learning_rate": 8.985306272101252e-06, "loss": 1.6081, "step": 14526 }, { "epoch": 0.80970960370102, "grad_norm": 0.5754690170288086, "learning_rate": 8.980214213382632e-06, "loss": 1.4755, "step": 14527 }, { "epoch": 0.8097653419541831, "grad_norm": 0.5814158916473389, "learning_rate": 8.975123455572443e-06, "loss": 1.5668, "step": 14528 }, { "epoch": 0.8098210802073463, "grad_norm": 0.4973066449165344, "learning_rate": 8.970033998832145e-06, "loss": 1.2416, "step": 14529 }, { "epoch": 0.8098768184605094, "grad_norm": 0.586467444896698, "learning_rate": 8.964945843323147e-06, "loss": 1.8587, "step": 14530 }, { "epoch": 0.8099325567136726, "grad_norm": 0.5752747058868408, "learning_rate": 8.959858989206827e-06, "loss": 1.6583, "step": 14531 }, { "epoch": 0.8099882949668358, "grad_norm": 0.5636700391769409, "learning_rate": 8.954773436644492e-06, "loss": 1.6193, "step": 14532 }, { "epoch": 0.8100440332199988, "grad_norm": 0.596447229385376, "learning_rate": 8.949689185797416e-06, "loss": 1.7473, "step": 14533 }, { "epoch": 0.810099771473162, "grad_norm": 0.5715921521186829, "learning_rate": 8.944606236826885e-06, "loss": 1.6444, "step": 14534 }, { "epoch": 0.8101555097263252, "grad_norm": 0.5635936260223389, "learning_rate": 8.939524589894067e-06, "loss": 1.7083, "step": 14535 }, { "epoch": 0.8102112479794883, "grad_norm": 0.5593386888504028, "learning_rate": 8.934444245160123e-06, "loss": 1.6985, "step": 14536 }, { "epoch": 0.8102669862326515, "grad_norm": 0.5133383274078369, "learning_rate": 8.929365202786183e-06, "loss": 1.487, "step": 14537 }, { "epoch": 0.8103227244858147, "grad_norm": 0.5615258812904358, "learning_rate": 8.924287462933328e-06, "loss": 1.4259, "step": 14538 }, { "epoch": 0.8103784627389777, "grad_norm": 0.5180845260620117, "learning_rate": 8.919211025762581e-06, "loss": 1.4425, "step": 14539 }, { "epoch": 0.8104342009921409, "grad_norm": 0.5557456612586975, "learning_rate": 8.914135891434927e-06, "loss": 1.467, "step": 14540 }, { "epoch": 0.8104899392453041, "grad_norm": 0.5708995461463928, "learning_rate": 8.909062060111357e-06, "loss": 1.6551, "step": 14541 }, { "epoch": 0.8105456774984672, "grad_norm": 0.5808879733085632, "learning_rate": 8.903989531952755e-06, "loss": 1.6874, "step": 14542 }, { "epoch": 0.8106014157516304, "grad_norm": 0.5360985398292542, "learning_rate": 8.89891830711999e-06, "loss": 1.5656, "step": 14543 }, { "epoch": 0.8106571540047934, "grad_norm": 0.556928813457489, "learning_rate": 8.893848385773911e-06, "loss": 1.6318, "step": 14544 }, { "epoch": 0.8107128922579566, "grad_norm": 0.5977469682693481, "learning_rate": 8.88877976807529e-06, "loss": 1.7456, "step": 14545 }, { "epoch": 0.8107686305111198, "grad_norm": 0.6153261661529541, "learning_rate": 8.883712454184894e-06, "loss": 1.7037, "step": 14546 }, { "epoch": 0.8108243687642829, "grad_norm": 0.5547722578048706, "learning_rate": 8.8786464442634e-06, "loss": 1.6516, "step": 14547 }, { "epoch": 0.8108801070174461, "grad_norm": 0.6243407726287842, "learning_rate": 8.873581738471486e-06, "loss": 1.8242, "step": 14548 }, { "epoch": 0.8109358452706092, "grad_norm": 0.571435809135437, "learning_rate": 8.868518336969779e-06, "loss": 1.6742, "step": 14549 }, { "epoch": 0.8109915835237723, "grad_norm": 0.5933339595794678, "learning_rate": 8.863456239918866e-06, "loss": 1.7067, "step": 14550 }, { "epoch": 0.8110473217769355, "grad_norm": 0.5834755301475525, "learning_rate": 8.858395447479257e-06, "loss": 1.5587, "step": 14551 }, { "epoch": 0.8111030600300987, "grad_norm": 0.6342363953590393, "learning_rate": 8.853335959811482e-06, "loss": 1.8265, "step": 14552 }, { "epoch": 0.8111587982832618, "grad_norm": 0.5746006965637207, "learning_rate": 8.848277777076003e-06, "loss": 1.6465, "step": 14553 }, { "epoch": 0.811214536536425, "grad_norm": 0.5476809740066528, "learning_rate": 8.843220899433207e-06, "loss": 1.6282, "step": 14554 }, { "epoch": 0.8112702747895881, "grad_norm": 0.5748486518859863, "learning_rate": 8.838165327043485e-06, "loss": 1.6087, "step": 14555 }, { "epoch": 0.8113260130427512, "grad_norm": 0.5710524320602417, "learning_rate": 8.833111060067172e-06, "loss": 1.5522, "step": 14556 }, { "epoch": 0.8113817512959144, "grad_norm": 0.5666594505310059, "learning_rate": 8.828058098664566e-06, "loss": 1.5523, "step": 14557 }, { "epoch": 0.8114374895490776, "grad_norm": 0.5782667994499207, "learning_rate": 8.823006442995895e-06, "loss": 1.6946, "step": 14558 }, { "epoch": 0.8114932278022406, "grad_norm": 0.5868912935256958, "learning_rate": 8.817956093221369e-06, "loss": 1.7758, "step": 14559 }, { "epoch": 0.8115489660554038, "grad_norm": 0.5910466313362122, "learning_rate": 8.81290704950119e-06, "loss": 1.8656, "step": 14560 }, { "epoch": 0.811604704308567, "grad_norm": 0.5408613085746765, "learning_rate": 8.807859311995454e-06, "loss": 1.4917, "step": 14561 }, { "epoch": 0.8116604425617301, "grad_norm": 0.6216923594474792, "learning_rate": 8.802812880864252e-06, "loss": 1.808, "step": 14562 }, { "epoch": 0.8117161808148933, "grad_norm": 0.5523777008056641, "learning_rate": 8.797767756267628e-06, "loss": 1.4685, "step": 14563 }, { "epoch": 0.8117719190680565, "grad_norm": 0.5775405764579773, "learning_rate": 8.792723938365599e-06, "loss": 1.6824, "step": 14564 }, { "epoch": 0.8118276573212195, "grad_norm": 0.5495176911354065, "learning_rate": 8.787681427318095e-06, "loss": 1.5778, "step": 14565 }, { "epoch": 0.8118833955743827, "grad_norm": 0.5674751400947571, "learning_rate": 8.782640223285043e-06, "loss": 1.7507, "step": 14566 }, { "epoch": 0.8119391338275458, "grad_norm": 0.5787277221679688, "learning_rate": 8.777600326426356e-06, "loss": 1.647, "step": 14567 }, { "epoch": 0.811994872080709, "grad_norm": 0.6019443273544312, "learning_rate": 8.77256173690183e-06, "loss": 1.6628, "step": 14568 }, { "epoch": 0.8120506103338722, "grad_norm": 0.5538434386253357, "learning_rate": 8.767524454871273e-06, "loss": 1.4677, "step": 14569 }, { "epoch": 0.8121063485870352, "grad_norm": 0.5707783102989197, "learning_rate": 8.762488480494435e-06, "loss": 1.6471, "step": 14570 }, { "epoch": 0.8121620868401984, "grad_norm": 0.576706051826477, "learning_rate": 8.757453813931032e-06, "loss": 1.5768, "step": 14571 }, { "epoch": 0.8122178250933616, "grad_norm": 0.5679410696029663, "learning_rate": 8.752420455340749e-06, "loss": 1.6616, "step": 14572 }, { "epoch": 0.8122735633465247, "grad_norm": 0.5615427494049072, "learning_rate": 8.747388404883183e-06, "loss": 1.659, "step": 14573 }, { "epoch": 0.8123293015996879, "grad_norm": 0.5732202529907227, "learning_rate": 8.742357662717943e-06, "loss": 1.5695, "step": 14574 }, { "epoch": 0.812385039852851, "grad_norm": 0.5816728472709656, "learning_rate": 8.737328229004565e-06, "loss": 1.6397, "step": 14575 }, { "epoch": 0.8124407781060141, "grad_norm": 0.5549823045730591, "learning_rate": 8.732300103902568e-06, "loss": 1.639, "step": 14576 }, { "epoch": 0.8124965163591773, "grad_norm": 0.6017770171165466, "learning_rate": 8.72727328757138e-06, "loss": 1.6974, "step": 14577 }, { "epoch": 0.8125522546123405, "grad_norm": 0.5807628631591797, "learning_rate": 8.722247780170461e-06, "loss": 1.5893, "step": 14578 }, { "epoch": 0.8126079928655036, "grad_norm": 0.5604943633079529, "learning_rate": 8.717223581859191e-06, "loss": 1.7204, "step": 14579 }, { "epoch": 0.8126637311186667, "grad_norm": 0.529071569442749, "learning_rate": 8.71220069279688e-06, "loss": 1.6458, "step": 14580 }, { "epoch": 0.8127194693718299, "grad_norm": 0.5336666703224182, "learning_rate": 8.707179113142839e-06, "loss": 1.3501, "step": 14581 }, { "epoch": 0.812775207624993, "grad_norm": 0.5635989308357239, "learning_rate": 8.702158843056319e-06, "loss": 1.694, "step": 14582 }, { "epoch": 0.8128309458781562, "grad_norm": 0.5581356287002563, "learning_rate": 8.697139882696548e-06, "loss": 1.5596, "step": 14583 }, { "epoch": 0.8128866841313194, "grad_norm": 0.5320961475372314, "learning_rate": 8.692122232222683e-06, "loss": 1.7084, "step": 14584 }, { "epoch": 0.8129424223844824, "grad_norm": 0.5928415060043335, "learning_rate": 8.68710589179384e-06, "loss": 1.4451, "step": 14585 }, { "epoch": 0.8129981606376456, "grad_norm": 0.5560922622680664, "learning_rate": 8.682090861569153e-06, "loss": 1.4804, "step": 14586 }, { "epoch": 0.8130538988908088, "grad_norm": 0.5927940011024475, "learning_rate": 8.677077141707635e-06, "loss": 1.6313, "step": 14587 }, { "epoch": 0.8131096371439719, "grad_norm": 0.511622965335846, "learning_rate": 8.672064732368301e-06, "loss": 1.4384, "step": 14588 }, { "epoch": 0.8131653753971351, "grad_norm": 0.6018781661987305, "learning_rate": 8.667053633710109e-06, "loss": 1.7108, "step": 14589 }, { "epoch": 0.8132211136502983, "grad_norm": 0.5497088432312012, "learning_rate": 8.662043845892004e-06, "loss": 1.4508, "step": 14590 }, { "epoch": 0.8132768519034613, "grad_norm": 0.5855251550674438, "learning_rate": 8.65703536907284e-06, "loss": 1.7348, "step": 14591 }, { "epoch": 0.8133325901566245, "grad_norm": 0.628377377986908, "learning_rate": 8.652028203411455e-06, "loss": 1.6383, "step": 14592 }, { "epoch": 0.8133883284097876, "grad_norm": 0.6113680601119995, "learning_rate": 8.647022349066686e-06, "loss": 1.8188, "step": 14593 }, { "epoch": 0.8134440666629508, "grad_norm": 0.536659300327301, "learning_rate": 8.64201780619725e-06, "loss": 1.5536, "step": 14594 }, { "epoch": 0.813499804916114, "grad_norm": 0.5933912396430969, "learning_rate": 8.637014574961872e-06, "loss": 1.68, "step": 14595 }, { "epoch": 0.813555543169277, "grad_norm": 0.5632370114326477, "learning_rate": 8.632012655519234e-06, "loss": 1.4752, "step": 14596 }, { "epoch": 0.8136112814224402, "grad_norm": 0.5764100551605225, "learning_rate": 8.62701204802796e-06, "loss": 1.6428, "step": 14597 }, { "epoch": 0.8136670196756034, "grad_norm": 0.5657497048377991, "learning_rate": 8.622012752646652e-06, "loss": 1.6523, "step": 14598 }, { "epoch": 0.8137227579287665, "grad_norm": 0.6426599621772766, "learning_rate": 8.617014769533843e-06, "loss": 1.8712, "step": 14599 }, { "epoch": 0.8137784961819297, "grad_norm": 0.5723824501037598, "learning_rate": 8.612018098848041e-06, "loss": 1.6655, "step": 14600 }, { "epoch": 0.8138342344350928, "grad_norm": 0.5591287612915039, "learning_rate": 8.607022740747716e-06, "loss": 1.7306, "step": 14601 }, { "epoch": 0.8138899726882559, "grad_norm": 0.5826481580734253, "learning_rate": 8.602028695391307e-06, "loss": 1.5182, "step": 14602 }, { "epoch": 0.8139457109414191, "grad_norm": 0.5714852809906006, "learning_rate": 8.597035962937156e-06, "loss": 1.5408, "step": 14603 }, { "epoch": 0.8140014491945823, "grad_norm": 0.5411056876182556, "learning_rate": 8.592044543543643e-06, "loss": 1.5651, "step": 14604 }, { "epoch": 0.8140571874477454, "grad_norm": 0.586496114730835, "learning_rate": 8.587054437369057e-06, "loss": 1.7391, "step": 14605 }, { "epoch": 0.8141129257009085, "grad_norm": 0.5315595865249634, "learning_rate": 8.582065644571647e-06, "loss": 1.5022, "step": 14606 }, { "epoch": 0.8141686639540717, "grad_norm": 0.5513681769371033, "learning_rate": 8.577078165309621e-06, "loss": 1.6308, "step": 14607 }, { "epoch": 0.8142244022072348, "grad_norm": 0.6040627956390381, "learning_rate": 8.572091999741172e-06, "loss": 1.825, "step": 14608 }, { "epoch": 0.814280140460398, "grad_norm": 0.5821855664253235, "learning_rate": 8.567107148024434e-06, "loss": 1.5367, "step": 14609 }, { "epoch": 0.8143358787135612, "grad_norm": 0.5883117318153381, "learning_rate": 8.562123610317457e-06, "loss": 1.7769, "step": 14610 }, { "epoch": 0.8143916169667242, "grad_norm": 0.5612747669219971, "learning_rate": 8.557141386778334e-06, "loss": 1.4503, "step": 14611 }, { "epoch": 0.8144473552198874, "grad_norm": 0.5363609194755554, "learning_rate": 8.552160477565075e-06, "loss": 1.3287, "step": 14612 }, { "epoch": 0.8145030934730506, "grad_norm": 0.5557144284248352, "learning_rate": 8.547180882835609e-06, "loss": 1.8482, "step": 14613 }, { "epoch": 0.8145588317262137, "grad_norm": 0.5327957272529602, "learning_rate": 8.542202602747884e-06, "loss": 1.4672, "step": 14614 }, { "epoch": 0.8146145699793769, "grad_norm": 0.5746212601661682, "learning_rate": 8.537225637459773e-06, "loss": 1.7671, "step": 14615 }, { "epoch": 0.8146703082325399, "grad_norm": 0.5310134887695312, "learning_rate": 8.532249987129132e-06, "loss": 1.6654, "step": 14616 }, { "epoch": 0.8147260464857031, "grad_norm": 0.574122965335846, "learning_rate": 8.527275651913735e-06, "loss": 1.7017, "step": 14617 }, { "epoch": 0.8147817847388663, "grad_norm": 0.5795066952705383, "learning_rate": 8.522302631971341e-06, "loss": 1.6954, "step": 14618 }, { "epoch": 0.8148375229920294, "grad_norm": 0.5737940669059753, "learning_rate": 8.517330927459704e-06, "loss": 1.6882, "step": 14619 }, { "epoch": 0.8148932612451926, "grad_norm": 0.5952860116958618, "learning_rate": 8.512360538536452e-06, "loss": 1.6532, "step": 14620 }, { "epoch": 0.8149489994983558, "grad_norm": 0.5542854070663452, "learning_rate": 8.507391465359238e-06, "loss": 1.6373, "step": 14621 }, { "epoch": 0.8150047377515188, "grad_norm": 0.5282446146011353, "learning_rate": 8.502423708085644e-06, "loss": 1.5834, "step": 14622 }, { "epoch": 0.815060476004682, "grad_norm": 0.576572835445404, "learning_rate": 8.497457266873233e-06, "loss": 1.5779, "step": 14623 }, { "epoch": 0.8151162142578452, "grad_norm": 0.5445130467414856, "learning_rate": 8.492492141879493e-06, "loss": 1.7272, "step": 14624 }, { "epoch": 0.8151719525110083, "grad_norm": 0.5699845552444458, "learning_rate": 8.487528333261896e-06, "loss": 1.597, "step": 14625 }, { "epoch": 0.8152276907641715, "grad_norm": 0.5669733285903931, "learning_rate": 8.482565841177864e-06, "loss": 1.7181, "step": 14626 }, { "epoch": 0.8152834290173346, "grad_norm": 0.5513604283332825, "learning_rate": 8.477604665784782e-06, "loss": 1.4208, "step": 14627 }, { "epoch": 0.8153391672704977, "grad_norm": 0.5793091654777527, "learning_rate": 8.47264480724e-06, "loss": 1.6251, "step": 14628 }, { "epoch": 0.8153949055236609, "grad_norm": 0.6061859130859375, "learning_rate": 8.467686265700775e-06, "loss": 1.5518, "step": 14629 }, { "epoch": 0.8154506437768241, "grad_norm": 0.5648293495178223, "learning_rate": 8.462729041324407e-06, "loss": 1.751, "step": 14630 }, { "epoch": 0.8155063820299872, "grad_norm": 0.5499643683433533, "learning_rate": 8.45777313426811e-06, "loss": 1.4874, "step": 14631 }, { "epoch": 0.8155621202831503, "grad_norm": 0.5458365082740784, "learning_rate": 8.452818544689023e-06, "loss": 1.5309, "step": 14632 }, { "epoch": 0.8156178585363135, "grad_norm": 0.5470486283302307, "learning_rate": 8.447865272744299e-06, "loss": 1.598, "step": 14633 }, { "epoch": 0.8156735967894766, "grad_norm": 0.5594566464424133, "learning_rate": 8.442913318591022e-06, "loss": 1.6158, "step": 14634 }, { "epoch": 0.8157293350426398, "grad_norm": 0.5831910371780396, "learning_rate": 8.437962682386252e-06, "loss": 1.6812, "step": 14635 }, { "epoch": 0.815785073295803, "grad_norm": 0.5766580104827881, "learning_rate": 8.433013364286957e-06, "loss": 1.5881, "step": 14636 }, { "epoch": 0.815840811548966, "grad_norm": 0.612402081489563, "learning_rate": 8.428065364450138e-06, "loss": 1.7401, "step": 14637 }, { "epoch": 0.8158965498021292, "grad_norm": 0.5256620645523071, "learning_rate": 8.423118683032715e-06, "loss": 1.6026, "step": 14638 }, { "epoch": 0.8159522880552923, "grad_norm": 0.5555899143218994, "learning_rate": 8.418173320191547e-06, "loss": 1.764, "step": 14639 }, { "epoch": 0.8160080263084555, "grad_norm": 0.626512348651886, "learning_rate": 8.413229276083484e-06, "loss": 1.813, "step": 14640 }, { "epoch": 0.8160637645616187, "grad_norm": 0.5965732932090759, "learning_rate": 8.408286550865318e-06, "loss": 1.7731, "step": 14641 }, { "epoch": 0.8161195028147817, "grad_norm": 0.5264768004417419, "learning_rate": 8.40334514469382e-06, "loss": 1.606, "step": 14642 }, { "epoch": 0.8161752410679449, "grad_norm": 0.5863091349601746, "learning_rate": 8.398405057725678e-06, "loss": 1.6438, "step": 14643 }, { "epoch": 0.8162309793211081, "grad_norm": 0.6372930407524109, "learning_rate": 8.393466290117557e-06, "loss": 1.5923, "step": 14644 }, { "epoch": 0.8162867175742712, "grad_norm": 0.574329674243927, "learning_rate": 8.388528842026128e-06, "loss": 1.5047, "step": 14645 }, { "epoch": 0.8163424558274344, "grad_norm": 0.5910654664039612, "learning_rate": 8.38359271360794e-06, "loss": 1.669, "step": 14646 }, { "epoch": 0.8163981940805976, "grad_norm": 0.5745288133621216, "learning_rate": 8.378657905019555e-06, "loss": 1.6667, "step": 14647 }, { "epoch": 0.8164539323337606, "grad_norm": 0.5561100244522095, "learning_rate": 8.373724416417467e-06, "loss": 1.8452, "step": 14648 }, { "epoch": 0.8165096705869238, "grad_norm": 0.634443461894989, "learning_rate": 8.368792247958157e-06, "loss": 1.755, "step": 14649 }, { "epoch": 0.816565408840087, "grad_norm": 0.5481244921684265, "learning_rate": 8.363861399798018e-06, "loss": 1.6841, "step": 14650 }, { "epoch": 0.8166211470932501, "grad_norm": 0.5906131863594055, "learning_rate": 8.358931872093439e-06, "loss": 1.6548, "step": 14651 }, { "epoch": 0.8166768853464133, "grad_norm": 0.5448386073112488, "learning_rate": 8.354003665000754e-06, "loss": 1.6098, "step": 14652 }, { "epoch": 0.8167326235995764, "grad_norm": 0.5544820427894592, "learning_rate": 8.349076778676262e-06, "loss": 1.4819, "step": 14653 }, { "epoch": 0.8167883618527395, "grad_norm": 0.5842088460922241, "learning_rate": 8.34415121327623e-06, "loss": 2.0069, "step": 14654 }, { "epoch": 0.8168441001059027, "grad_norm": 0.5592513084411621, "learning_rate": 8.33922696895682e-06, "loss": 1.6903, "step": 14655 }, { "epoch": 0.8168998383590659, "grad_norm": 0.5489197969436646, "learning_rate": 8.334304045874247e-06, "loss": 1.5395, "step": 14656 }, { "epoch": 0.816955576612229, "grad_norm": 0.5454195737838745, "learning_rate": 8.329382444184636e-06, "loss": 1.6031, "step": 14657 }, { "epoch": 0.8170113148653921, "grad_norm": 0.5950064063072205, "learning_rate": 8.32446216404404e-06, "loss": 1.6411, "step": 14658 }, { "epoch": 0.8170670531185553, "grad_norm": 0.5340271592140198, "learning_rate": 8.319543205608522e-06, "loss": 1.5835, "step": 14659 }, { "epoch": 0.8171227913717184, "grad_norm": 0.5560121536254883, "learning_rate": 8.31462556903408e-06, "loss": 1.6486, "step": 14660 }, { "epoch": 0.8171785296248816, "grad_norm": 0.5711260437965393, "learning_rate": 8.309709254476682e-06, "loss": 1.6764, "step": 14661 }, { "epoch": 0.8172342678780447, "grad_norm": 0.5732410550117493, "learning_rate": 8.304794262092208e-06, "loss": 1.6329, "step": 14662 }, { "epoch": 0.8172900061312078, "grad_norm": 0.5843484401702881, "learning_rate": 8.299880592036579e-06, "loss": 2.0122, "step": 14663 }, { "epoch": 0.817345744384371, "grad_norm": 0.5985366702079773, "learning_rate": 8.294968244465618e-06, "loss": 1.7526, "step": 14664 }, { "epoch": 0.8174014826375341, "grad_norm": 0.5528346300125122, "learning_rate": 8.290057219535097e-06, "loss": 1.7203, "step": 14665 }, { "epoch": 0.8174572208906973, "grad_norm": 0.5621989965438843, "learning_rate": 8.28514751740077e-06, "loss": 1.5719, "step": 14666 }, { "epoch": 0.8175129591438605, "grad_norm": 0.532791793346405, "learning_rate": 8.280239138218354e-06, "loss": 1.6364, "step": 14667 }, { "epoch": 0.8175686973970235, "grad_norm": 0.5551378726959229, "learning_rate": 8.275332082143522e-06, "loss": 1.4977, "step": 14668 }, { "epoch": 0.8176244356501867, "grad_norm": 0.5460422039031982, "learning_rate": 8.270426349331872e-06, "loss": 1.5391, "step": 14669 }, { "epoch": 0.8176801739033499, "grad_norm": 0.5884209871292114, "learning_rate": 8.265521939938987e-06, "loss": 1.8799, "step": 14670 }, { "epoch": 0.817735912156513, "grad_norm": 0.5831186175346375, "learning_rate": 8.260618854120439e-06, "loss": 1.637, "step": 14671 }, { "epoch": 0.8177916504096762, "grad_norm": 0.5479006767272949, "learning_rate": 8.25571709203169e-06, "loss": 1.5, "step": 14672 }, { "epoch": 0.8178473886628393, "grad_norm": 0.5728062987327576, "learning_rate": 8.250816653828208e-06, "loss": 1.5887, "step": 14673 }, { "epoch": 0.8179031269160024, "grad_norm": 0.5844617486000061, "learning_rate": 8.245917539665409e-06, "loss": 1.7618, "step": 14674 }, { "epoch": 0.8179588651691656, "grad_norm": 0.6304042935371399, "learning_rate": 8.241019749698675e-06, "loss": 1.9131, "step": 14675 }, { "epoch": 0.8180146034223288, "grad_norm": 0.560624897480011, "learning_rate": 8.236123284083314e-06, "loss": 1.4943, "step": 14676 }, { "epoch": 0.8180703416754919, "grad_norm": 0.6337041854858398, "learning_rate": 8.231228142974606e-06, "loss": 1.9758, "step": 14677 }, { "epoch": 0.818126079928655, "grad_norm": 0.58197021484375, "learning_rate": 8.226334326527834e-06, "loss": 1.6154, "step": 14678 }, { "epoch": 0.8181818181818182, "grad_norm": 0.6239030957221985, "learning_rate": 8.221441834898175e-06, "loss": 1.6926, "step": 14679 }, { "epoch": 0.8182375564349813, "grad_norm": 0.5664547085762024, "learning_rate": 8.216550668240803e-06, "loss": 1.7325, "step": 14680 }, { "epoch": 0.8182932946881445, "grad_norm": 0.5848506093025208, "learning_rate": 8.211660826710804e-06, "loss": 1.6233, "step": 14681 }, { "epoch": 0.8183490329413077, "grad_norm": 0.5485044717788696, "learning_rate": 8.206772310463295e-06, "loss": 1.5409, "step": 14682 }, { "epoch": 0.8184047711944707, "grad_norm": 0.6160194873809814, "learning_rate": 8.201885119653308e-06, "loss": 1.9611, "step": 14683 }, { "epoch": 0.8184605094476339, "grad_norm": 0.5616006851196289, "learning_rate": 8.196999254435816e-06, "loss": 1.7156, "step": 14684 }, { "epoch": 0.818516247700797, "grad_norm": 0.6323941946029663, "learning_rate": 8.192114714965776e-06, "loss": 1.7105, "step": 14685 }, { "epoch": 0.8185719859539602, "grad_norm": 0.6214513778686523, "learning_rate": 8.187231501398102e-06, "loss": 1.7029, "step": 14686 }, { "epoch": 0.8186277242071234, "grad_norm": 0.5549011826515198, "learning_rate": 8.18234961388767e-06, "loss": 1.3432, "step": 14687 }, { "epoch": 0.8186834624602864, "grad_norm": 0.5615776181221008, "learning_rate": 8.17746905258927e-06, "loss": 1.6022, "step": 14688 }, { "epoch": 0.8187392007134496, "grad_norm": 0.5588061213493347, "learning_rate": 8.172589817657721e-06, "loss": 1.4637, "step": 14689 }, { "epoch": 0.8187949389666128, "grad_norm": 0.5842549800872803, "learning_rate": 8.167711909247766e-06, "loss": 1.7603, "step": 14690 }, { "epoch": 0.8188506772197759, "grad_norm": 0.6246482729911804, "learning_rate": 8.16283532751408e-06, "loss": 1.8689, "step": 14691 }, { "epoch": 0.8189064154729391, "grad_norm": 0.5793601274490356, "learning_rate": 8.157960072611326e-06, "loss": 1.5218, "step": 14692 }, { "epoch": 0.8189621537261023, "grad_norm": 0.6303258538246155, "learning_rate": 8.153086144694122e-06, "loss": 1.676, "step": 14693 }, { "epoch": 0.8190178919792653, "grad_norm": 0.6256571412086487, "learning_rate": 8.148213543917055e-06, "loss": 1.7805, "step": 14694 }, { "epoch": 0.8190736302324285, "grad_norm": 0.6186008453369141, "learning_rate": 8.143342270434629e-06, "loss": 1.524, "step": 14695 }, { "epoch": 0.8191293684855917, "grad_norm": 0.519763171672821, "learning_rate": 8.138472324401335e-06, "loss": 1.3755, "step": 14696 }, { "epoch": 0.8191851067387548, "grad_norm": 0.5263442993164062, "learning_rate": 8.133603705971649e-06, "loss": 1.5122, "step": 14697 }, { "epoch": 0.819240844991918, "grad_norm": 0.5439835786819458, "learning_rate": 8.128736415299948e-06, "loss": 1.4538, "step": 14698 }, { "epoch": 0.8192965832450811, "grad_norm": 0.6475786566734314, "learning_rate": 8.1238704525406e-06, "loss": 1.8112, "step": 14699 }, { "epoch": 0.8193523214982442, "grad_norm": 0.6076223254203796, "learning_rate": 8.119005817847924e-06, "loss": 1.6804, "step": 14700 }, { "epoch": 0.8194080597514074, "grad_norm": 0.5878423452377319, "learning_rate": 8.114142511376215e-06, "loss": 1.7354, "step": 14701 }, { "epoch": 0.8194637980045706, "grad_norm": 0.5754333138465881, "learning_rate": 8.109280533279684e-06, "loss": 1.6038, "step": 14702 }, { "epoch": 0.8195195362577337, "grad_norm": 0.6017590165138245, "learning_rate": 8.104419883712517e-06, "loss": 1.7552, "step": 14703 }, { "epoch": 0.8195752745108968, "grad_norm": 0.5602265000343323, "learning_rate": 8.099560562828911e-06, "loss": 1.5637, "step": 14704 }, { "epoch": 0.81963101276406, "grad_norm": 0.513415515422821, "learning_rate": 8.094702570782936e-06, "loss": 1.495, "step": 14705 }, { "epoch": 0.8196867510172231, "grad_norm": 0.5594004392623901, "learning_rate": 8.089845907728682e-06, "loss": 1.7242, "step": 14706 }, { "epoch": 0.8197424892703863, "grad_norm": 0.5501028299331665, "learning_rate": 8.084990573820133e-06, "loss": 1.4354, "step": 14707 }, { "epoch": 0.8197982275235494, "grad_norm": 0.5331346392631531, "learning_rate": 8.080136569211322e-06, "loss": 1.7557, "step": 14708 }, { "epoch": 0.8198539657767125, "grad_norm": 0.5430104732513428, "learning_rate": 8.075283894056178e-06, "loss": 1.6358, "step": 14709 }, { "epoch": 0.8199097040298757, "grad_norm": 0.5166445970535278, "learning_rate": 8.070432548508578e-06, "loss": 1.572, "step": 14710 }, { "epoch": 0.8199654422830388, "grad_norm": 0.5667523741722107, "learning_rate": 8.065582532722394e-06, "loss": 1.6363, "step": 14711 }, { "epoch": 0.820021180536202, "grad_norm": 0.5528684258460999, "learning_rate": 8.060733846851432e-06, "loss": 1.4092, "step": 14712 }, { "epoch": 0.8200769187893652, "grad_norm": 0.5996767282485962, "learning_rate": 8.055886491049486e-06, "loss": 1.8248, "step": 14713 }, { "epoch": 0.8201326570425282, "grad_norm": 0.5381787419319153, "learning_rate": 8.051040465470245e-06, "loss": 1.517, "step": 14714 }, { "epoch": 0.8201883952956914, "grad_norm": 0.5631827712059021, "learning_rate": 8.046195770267428e-06, "loss": 1.7852, "step": 14715 }, { "epoch": 0.8202441335488546, "grad_norm": 0.5505089163780212, "learning_rate": 8.041352405594692e-06, "loss": 1.615, "step": 14716 }, { "epoch": 0.8202998718020177, "grad_norm": 0.5934506058692932, "learning_rate": 8.03651037160561e-06, "loss": 1.6671, "step": 14717 }, { "epoch": 0.8203556100551809, "grad_norm": 0.5772705674171448, "learning_rate": 8.031669668453752e-06, "loss": 1.682, "step": 14718 }, { "epoch": 0.8204113483083441, "grad_norm": 0.5619063973426819, "learning_rate": 8.026830296292636e-06, "loss": 1.762, "step": 14719 }, { "epoch": 0.8204670865615071, "grad_norm": 0.5829328894615173, "learning_rate": 8.021992255275763e-06, "loss": 1.73, "step": 14720 }, { "epoch": 0.8205228248146703, "grad_norm": 0.5251481533050537, "learning_rate": 8.017155545556527e-06, "loss": 1.6057, "step": 14721 }, { "epoch": 0.8205785630678335, "grad_norm": 0.5804221630096436, "learning_rate": 8.012320167288334e-06, "loss": 1.4836, "step": 14722 }, { "epoch": 0.8206343013209966, "grad_norm": 0.5928997993469238, "learning_rate": 8.007486120624559e-06, "loss": 1.8501, "step": 14723 }, { "epoch": 0.8206900395741598, "grad_norm": 0.5831340551376343, "learning_rate": 8.002653405718485e-06, "loss": 1.6392, "step": 14724 }, { "epoch": 0.8207457778273229, "grad_norm": 0.5551106929779053, "learning_rate": 7.997822022723378e-06, "loss": 1.5372, "step": 14725 }, { "epoch": 0.820801516080486, "grad_norm": 0.5780943036079407, "learning_rate": 7.992991971792469e-06, "loss": 1.6999, "step": 14726 }, { "epoch": 0.8208572543336492, "grad_norm": 0.5924753546714783, "learning_rate": 7.988163253078952e-06, "loss": 1.6596, "step": 14727 }, { "epoch": 0.8209129925868124, "grad_norm": 0.5651062726974487, "learning_rate": 7.98333586673593e-06, "loss": 1.4913, "step": 14728 }, { "epoch": 0.8209687308399755, "grad_norm": 0.5695908069610596, "learning_rate": 7.978509812916513e-06, "loss": 1.7483, "step": 14729 }, { "epoch": 0.8210244690931386, "grad_norm": 0.575892984867096, "learning_rate": 7.973685091773792e-06, "loss": 1.5914, "step": 14730 }, { "epoch": 0.8210802073463017, "grad_norm": 0.5623947381973267, "learning_rate": 7.968861703460728e-06, "loss": 1.5902, "step": 14731 }, { "epoch": 0.8211359455994649, "grad_norm": 0.5500161647796631, "learning_rate": 7.964039648130328e-06, "loss": 1.6152, "step": 14732 }, { "epoch": 0.8211916838526281, "grad_norm": 0.5211798548698425, "learning_rate": 7.95921892593548e-06, "loss": 1.3945, "step": 14733 }, { "epoch": 0.8212474221057912, "grad_norm": 0.5861966609954834, "learning_rate": 7.954399537029106e-06, "loss": 1.5801, "step": 14734 }, { "epoch": 0.8213031603589543, "grad_norm": 0.5749439001083374, "learning_rate": 7.94958148156405e-06, "loss": 1.56, "step": 14735 }, { "epoch": 0.8213588986121175, "grad_norm": 0.5986553430557251, "learning_rate": 7.94476475969308e-06, "loss": 1.6339, "step": 14736 }, { "epoch": 0.8214146368652806, "grad_norm": 0.5730953216552734, "learning_rate": 7.939949371568977e-06, "loss": 1.7142, "step": 14737 }, { "epoch": 0.8214703751184438, "grad_norm": 0.5454463362693787, "learning_rate": 7.935135317344455e-06, "loss": 1.5694, "step": 14738 }, { "epoch": 0.821526113371607, "grad_norm": 0.5606818795204163, "learning_rate": 7.930322597172191e-06, "loss": 1.5784, "step": 14739 }, { "epoch": 0.82158185162477, "grad_norm": 0.5257277488708496, "learning_rate": 7.925511211204795e-06, "loss": 1.6162, "step": 14740 }, { "epoch": 0.8216375898779332, "grad_norm": 0.5445712208747864, "learning_rate": 7.92070115959488e-06, "loss": 1.6094, "step": 14741 }, { "epoch": 0.8216933281310964, "grad_norm": 0.5786468386650085, "learning_rate": 7.915892442494994e-06, "loss": 1.6552, "step": 14742 }, { "epoch": 0.8217490663842595, "grad_norm": 0.5710452795028687, "learning_rate": 7.911085060057621e-06, "loss": 1.7177, "step": 14743 }, { "epoch": 0.8218048046374227, "grad_norm": 0.6072984933853149, "learning_rate": 7.906279012435237e-06, "loss": 1.8521, "step": 14744 }, { "epoch": 0.8218605428905859, "grad_norm": 0.5253605842590332, "learning_rate": 7.901474299780258e-06, "loss": 1.3688, "step": 14745 }, { "epoch": 0.8219162811437489, "grad_norm": 0.6044268012046814, "learning_rate": 7.89667092224508e-06, "loss": 1.7278, "step": 14746 }, { "epoch": 0.8219720193969121, "grad_norm": 0.5338749885559082, "learning_rate": 7.891868879982001e-06, "loss": 1.5499, "step": 14747 }, { "epoch": 0.8220277576500753, "grad_norm": 0.5516805648803711, "learning_rate": 7.887068173143325e-06, "loss": 1.5249, "step": 14748 }, { "epoch": 0.8220834959032384, "grad_norm": 0.5159933567047119, "learning_rate": 7.882268801881337e-06, "loss": 1.4663, "step": 14749 }, { "epoch": 0.8221392341564016, "grad_norm": 0.5163145661354065, "learning_rate": 7.877470766348206e-06, "loss": 1.3222, "step": 14750 }, { "epoch": 0.8221949724095647, "grad_norm": 0.5524355173110962, "learning_rate": 7.872674066696112e-06, "loss": 1.6223, "step": 14751 }, { "epoch": 0.8222507106627278, "grad_norm": 0.5480507016181946, "learning_rate": 7.867878703077175e-06, "loss": 1.6545, "step": 14752 }, { "epoch": 0.822306448915891, "grad_norm": 0.5572043061256409, "learning_rate": 7.86308467564349e-06, "loss": 1.5277, "step": 14753 }, { "epoch": 0.8223621871690541, "grad_norm": 0.6016210317611694, "learning_rate": 7.858291984547072e-06, "loss": 1.6744, "step": 14754 }, { "epoch": 0.8224179254222173, "grad_norm": 0.5957650542259216, "learning_rate": 7.85350062993992e-06, "loss": 1.7234, "step": 14755 }, { "epoch": 0.8224736636753804, "grad_norm": 0.5599290728569031, "learning_rate": 7.848710611974019e-06, "loss": 1.6461, "step": 14756 }, { "epoch": 0.8225294019285435, "grad_norm": 0.5692183971405029, "learning_rate": 7.843921930801245e-06, "loss": 1.427, "step": 14757 }, { "epoch": 0.8225851401817067, "grad_norm": 0.5449408888816833, "learning_rate": 7.839134586573493e-06, "loss": 1.5931, "step": 14758 }, { "epoch": 0.8226408784348699, "grad_norm": 0.5386254787445068, "learning_rate": 7.834348579442553e-06, "loss": 1.6162, "step": 14759 }, { "epoch": 0.822696616688033, "grad_norm": 0.5870130658149719, "learning_rate": 7.829563909560256e-06, "loss": 1.6442, "step": 14760 }, { "epoch": 0.8227523549411961, "grad_norm": 0.5991342663764954, "learning_rate": 7.824780577078311e-06, "loss": 1.7833, "step": 14761 }, { "epoch": 0.8228080931943593, "grad_norm": 0.5483075380325317, "learning_rate": 7.81999858214843e-06, "loss": 1.7263, "step": 14762 }, { "epoch": 0.8228638314475224, "grad_norm": 0.5456960797309875, "learning_rate": 7.815217924922264e-06, "loss": 1.6074, "step": 14763 }, { "epoch": 0.8229195697006856, "grad_norm": 0.5569692850112915, "learning_rate": 7.81043860555143e-06, "loss": 1.7478, "step": 14764 }, { "epoch": 0.8229753079538488, "grad_norm": 0.5834555625915527, "learning_rate": 7.805660624187516e-06, "loss": 1.6435, "step": 14765 }, { "epoch": 0.8230310462070118, "grad_norm": 0.5573313236236572, "learning_rate": 7.80088398098201e-06, "loss": 1.5407, "step": 14766 }, { "epoch": 0.823086784460175, "grad_norm": 0.557045578956604, "learning_rate": 7.796108676086445e-06, "loss": 1.7211, "step": 14767 }, { "epoch": 0.8231425227133382, "grad_norm": 0.5935823321342468, "learning_rate": 7.791334709652254e-06, "loss": 1.6505, "step": 14768 }, { "epoch": 0.8231982609665013, "grad_norm": 0.6052401661872864, "learning_rate": 7.786562081830817e-06, "loss": 1.6837, "step": 14769 }, { "epoch": 0.8232539992196645, "grad_norm": 0.5678144693374634, "learning_rate": 7.781790792773514e-06, "loss": 1.642, "step": 14770 }, { "epoch": 0.8233097374728277, "grad_norm": 0.5625177025794983, "learning_rate": 7.777020842631656e-06, "loss": 1.6248, "step": 14771 }, { "epoch": 0.8233654757259907, "grad_norm": 0.5575246214866638, "learning_rate": 7.772252231556531e-06, "loss": 1.7027, "step": 14772 }, { "epoch": 0.8234212139791539, "grad_norm": 0.5748698711395264, "learning_rate": 7.76748495969935e-06, "loss": 1.5712, "step": 14773 }, { "epoch": 0.8234769522323171, "grad_norm": 0.5593873262405396, "learning_rate": 7.762719027211308e-06, "loss": 1.3786, "step": 14774 }, { "epoch": 0.8235326904854802, "grad_norm": 0.5477203130722046, "learning_rate": 7.75795443424357e-06, "loss": 1.5349, "step": 14775 }, { "epoch": 0.8235884287386434, "grad_norm": 0.6124054193496704, "learning_rate": 7.753191180947223e-06, "loss": 1.7259, "step": 14776 }, { "epoch": 0.8236441669918064, "grad_norm": 0.6015963554382324, "learning_rate": 7.74842926747334e-06, "loss": 1.5049, "step": 14777 }, { "epoch": 0.8236999052449696, "grad_norm": 0.5595274567604065, "learning_rate": 7.743668693972927e-06, "loss": 1.5613, "step": 14778 }, { "epoch": 0.8237556434981328, "grad_norm": 0.575369119644165, "learning_rate": 7.738909460596994e-06, "loss": 1.744, "step": 14779 }, { "epoch": 0.8238113817512959, "grad_norm": 0.5448950529098511, "learning_rate": 7.734151567496434e-06, "loss": 1.5413, "step": 14780 }, { "epoch": 0.823867120004459, "grad_norm": 0.5308200716972351, "learning_rate": 7.729395014822149e-06, "loss": 1.6224, "step": 14781 }, { "epoch": 0.8239228582576222, "grad_norm": 0.5744593143463135, "learning_rate": 7.724639802725025e-06, "loss": 1.7152, "step": 14782 }, { "epoch": 0.8239785965107853, "grad_norm": 0.552858293056488, "learning_rate": 7.71988593135583e-06, "loss": 1.7338, "step": 14783 }, { "epoch": 0.8240343347639485, "grad_norm": 0.567445695400238, "learning_rate": 7.715133400865342e-06, "loss": 1.735, "step": 14784 }, { "epoch": 0.8240900730171117, "grad_norm": 0.5928866267204285, "learning_rate": 7.710382211404288e-06, "loss": 1.7797, "step": 14785 }, { "epoch": 0.8241458112702748, "grad_norm": 0.5809508562088013, "learning_rate": 7.705632363123355e-06, "loss": 1.7308, "step": 14786 }, { "epoch": 0.8242015495234379, "grad_norm": 0.5642045736312866, "learning_rate": 7.700883856173164e-06, "loss": 1.4567, "step": 14787 }, { "epoch": 0.8242572877766011, "grad_norm": 0.5252789855003357, "learning_rate": 7.696136690704309e-06, "loss": 1.3691, "step": 14788 }, { "epoch": 0.8243130260297642, "grad_norm": 0.6067389845848083, "learning_rate": 7.691390866867348e-06, "loss": 1.578, "step": 14789 }, { "epoch": 0.8243687642829274, "grad_norm": 0.5476150512695312, "learning_rate": 7.686646384812802e-06, "loss": 1.5294, "step": 14790 }, { "epoch": 0.8244245025360906, "grad_norm": 0.5800880193710327, "learning_rate": 7.68190324469113e-06, "loss": 1.6699, "step": 14791 }, { "epoch": 0.8244802407892536, "grad_norm": 0.6214286088943481, "learning_rate": 7.677161446652736e-06, "loss": 1.6154, "step": 14792 }, { "epoch": 0.8245359790424168, "grad_norm": 0.61076819896698, "learning_rate": 7.672420990848033e-06, "loss": 1.7302, "step": 14793 }, { "epoch": 0.82459171729558, "grad_norm": 0.5480033755302429, "learning_rate": 7.667681877427363e-06, "loss": 1.5814, "step": 14794 }, { "epoch": 0.8246474555487431, "grad_norm": 0.5674887299537659, "learning_rate": 7.662944106540998e-06, "loss": 1.6489, "step": 14795 }, { "epoch": 0.8247031938019063, "grad_norm": 0.6398147940635681, "learning_rate": 7.658207678339202e-06, "loss": 1.8268, "step": 14796 }, { "epoch": 0.8247589320550694, "grad_norm": 0.5531885623931885, "learning_rate": 7.653472592972188e-06, "loss": 1.6168, "step": 14797 }, { "epoch": 0.8248146703082325, "grad_norm": 0.5649216175079346, "learning_rate": 7.648738850590137e-06, "loss": 1.5265, "step": 14798 }, { "epoch": 0.8248704085613957, "grad_norm": 0.5621973276138306, "learning_rate": 7.644006451343156e-06, "loss": 1.774, "step": 14799 }, { "epoch": 0.8249261468145588, "grad_norm": 0.5101774334907532, "learning_rate": 7.639275395381324e-06, "loss": 1.5918, "step": 14800 }, { "epoch": 0.824981885067722, "grad_norm": 0.5356602072715759, "learning_rate": 7.63454568285472e-06, "loss": 1.5562, "step": 14801 }, { "epoch": 0.8250376233208851, "grad_norm": 0.58839350938797, "learning_rate": 7.629817313913306e-06, "loss": 1.6387, "step": 14802 }, { "epoch": 0.8250933615740482, "grad_norm": 0.5281109809875488, "learning_rate": 7.625090288707054e-06, "loss": 1.5421, "step": 14803 }, { "epoch": 0.8251490998272114, "grad_norm": 0.5732079744338989, "learning_rate": 7.620364607385877e-06, "loss": 1.6407, "step": 14804 }, { "epoch": 0.8252048380803746, "grad_norm": 0.5893906354904175, "learning_rate": 7.61564027009965e-06, "loss": 1.7231, "step": 14805 }, { "epoch": 0.8252605763335377, "grad_norm": 0.5752212405204773, "learning_rate": 7.610917276998192e-06, "loss": 1.4615, "step": 14806 }, { "epoch": 0.8253163145867008, "grad_norm": 0.5605239272117615, "learning_rate": 7.606195628231272e-06, "loss": 1.6183, "step": 14807 }, { "epoch": 0.825372052839864, "grad_norm": 0.5010392069816589, "learning_rate": 7.6014753239486815e-06, "loss": 1.4315, "step": 14808 }, { "epoch": 0.8254277910930271, "grad_norm": 0.6002615690231323, "learning_rate": 7.596756364300084e-06, "loss": 1.7067, "step": 14809 }, { "epoch": 0.8254835293461903, "grad_norm": 0.6006166934967041, "learning_rate": 7.592038749435143e-06, "loss": 1.7114, "step": 14810 }, { "epoch": 0.8255392675993535, "grad_norm": 0.5413761138916016, "learning_rate": 7.587322479503478e-06, "loss": 1.5799, "step": 14811 }, { "epoch": 0.8255950058525166, "grad_norm": 0.5678083896636963, "learning_rate": 7.582607554654669e-06, "loss": 1.6757, "step": 14812 }, { "epoch": 0.8256507441056797, "grad_norm": 0.5252918601036072, "learning_rate": 7.577893975038231e-06, "loss": 1.6212, "step": 14813 }, { "epoch": 0.8257064823588429, "grad_norm": 0.5357844233512878, "learning_rate": 7.573181740803659e-06, "loss": 1.5161, "step": 14814 }, { "epoch": 0.825762220612006, "grad_norm": 0.4912710189819336, "learning_rate": 7.568470852100396e-06, "loss": 1.2616, "step": 14815 }, { "epoch": 0.8258179588651692, "grad_norm": 0.5815874934196472, "learning_rate": 7.563761309077838e-06, "loss": 1.7431, "step": 14816 }, { "epoch": 0.8258736971183324, "grad_norm": 0.5636022090911865, "learning_rate": 7.559053111885372e-06, "loss": 1.5336, "step": 14817 }, { "epoch": 0.8259294353714954, "grad_norm": 0.5679888129234314, "learning_rate": 7.5543462606722624e-06, "loss": 1.5636, "step": 14818 }, { "epoch": 0.8259851736246586, "grad_norm": 0.5726489424705505, "learning_rate": 7.5496407555878276e-06, "loss": 1.6993, "step": 14819 }, { "epoch": 0.8260409118778218, "grad_norm": 0.5466410517692566, "learning_rate": 7.544936596781299e-06, "loss": 1.4985, "step": 14820 }, { "epoch": 0.8260966501309849, "grad_norm": 0.5589420795440674, "learning_rate": 7.54023378440184e-06, "loss": 1.4959, "step": 14821 }, { "epoch": 0.8261523883841481, "grad_norm": 0.5536938309669495, "learning_rate": 7.535532318598609e-06, "loss": 1.5709, "step": 14822 }, { "epoch": 0.8262081266373111, "grad_norm": 0.5842770338058472, "learning_rate": 7.530832199520705e-06, "loss": 1.8395, "step": 14823 }, { "epoch": 0.8262638648904743, "grad_norm": 0.6140356659889221, "learning_rate": 7.5261334273172e-06, "loss": 1.7728, "step": 14824 }, { "epoch": 0.8263196031436375, "grad_norm": 0.6124187111854553, "learning_rate": 7.5214360021371e-06, "loss": 1.6189, "step": 14825 }, { "epoch": 0.8263753413968006, "grad_norm": 0.5564613938331604, "learning_rate": 7.516739924129362e-06, "loss": 1.4719, "step": 14826 }, { "epoch": 0.8264310796499638, "grad_norm": 0.5939016938209534, "learning_rate": 7.512045193442968e-06, "loss": 1.6933, "step": 14827 }, { "epoch": 0.826486817903127, "grad_norm": 0.6369741559028625, "learning_rate": 7.507351810226765e-06, "loss": 1.9048, "step": 14828 }, { "epoch": 0.82654255615629, "grad_norm": 0.5417369604110718, "learning_rate": 7.502659774629612e-06, "loss": 1.6225, "step": 14829 }, { "epoch": 0.8265982944094532, "grad_norm": 0.5475333333015442, "learning_rate": 7.4979690868003165e-06, "loss": 1.6123, "step": 14830 }, { "epoch": 0.8266540326626164, "grad_norm": 0.6035535931587219, "learning_rate": 7.493279746887649e-06, "loss": 1.5044, "step": 14831 }, { "epoch": 0.8267097709157795, "grad_norm": 0.5344101190567017, "learning_rate": 7.488591755040303e-06, "loss": 1.4167, "step": 14832 }, { "epoch": 0.8267655091689426, "grad_norm": 0.5738053321838379, "learning_rate": 7.483905111406958e-06, "loss": 1.565, "step": 14833 }, { "epoch": 0.8268212474221058, "grad_norm": 0.5430249571800232, "learning_rate": 7.479219816136279e-06, "loss": 1.503, "step": 14834 }, { "epoch": 0.8268769856752689, "grad_norm": 0.5295128226280212, "learning_rate": 7.474535869376819e-06, "loss": 1.4863, "step": 14835 }, { "epoch": 0.8269327239284321, "grad_norm": 0.5527878403663635, "learning_rate": 7.46985327127715e-06, "loss": 1.5387, "step": 14836 }, { "epoch": 0.8269884621815953, "grad_norm": 0.5978548526763916, "learning_rate": 7.465172021985761e-06, "loss": 1.6446, "step": 14837 }, { "epoch": 0.8270442004347583, "grad_norm": 0.5778266191482544, "learning_rate": 7.46049212165113e-06, "loss": 1.3285, "step": 14838 }, { "epoch": 0.8270999386879215, "grad_norm": 0.5653694868087769, "learning_rate": 7.45581357042166e-06, "loss": 1.6796, "step": 14839 }, { "epoch": 0.8271556769410847, "grad_norm": 0.5550215244293213, "learning_rate": 7.451136368445727e-06, "loss": 1.513, "step": 14840 }, { "epoch": 0.8272114151942478, "grad_norm": 0.5472756624221802, "learning_rate": 7.446460515871678e-06, "loss": 1.4862, "step": 14841 }, { "epoch": 0.827267153447411, "grad_norm": 0.5686060786247253, "learning_rate": 7.441786012847795e-06, "loss": 1.6166, "step": 14842 }, { "epoch": 0.8273228917005742, "grad_norm": 0.5672643184661865, "learning_rate": 7.437112859522339e-06, "loss": 1.7089, "step": 14843 }, { "epoch": 0.8273786299537372, "grad_norm": 0.5945203900337219, "learning_rate": 7.4324410560434825e-06, "loss": 1.6842, "step": 14844 }, { "epoch": 0.8274343682069004, "grad_norm": 0.5317419171333313, "learning_rate": 7.42777060255942e-06, "loss": 1.7236, "step": 14845 }, { "epoch": 0.8274901064600635, "grad_norm": 0.5510501265525818, "learning_rate": 7.423101499218272e-06, "loss": 1.7382, "step": 14846 }, { "epoch": 0.8275458447132267, "grad_norm": 0.5681928396224976, "learning_rate": 7.4184337461680905e-06, "loss": 1.5124, "step": 14847 }, { "epoch": 0.8276015829663899, "grad_norm": 0.5582361817359924, "learning_rate": 7.4137673435569266e-06, "loss": 1.5824, "step": 14848 }, { "epoch": 0.8276573212195529, "grad_norm": 0.5670194029808044, "learning_rate": 7.409102291532766e-06, "loss": 1.5791, "step": 14849 }, { "epoch": 0.8277130594727161, "grad_norm": 0.5458555817604065, "learning_rate": 7.404438590243568e-06, "loss": 1.69, "step": 14850 }, { "epoch": 0.8277687977258793, "grad_norm": 0.588135838508606, "learning_rate": 7.399776239837208e-06, "loss": 1.9097, "step": 14851 }, { "epoch": 0.8278245359790424, "grad_norm": 0.5528299808502197, "learning_rate": 7.395115240461581e-06, "loss": 1.6214, "step": 14852 }, { "epoch": 0.8278802742322056, "grad_norm": 0.616841197013855, "learning_rate": 7.390455592264506e-06, "loss": 1.8655, "step": 14853 }, { "epoch": 0.8279360124853687, "grad_norm": 0.5567806959152222, "learning_rate": 7.385797295393732e-06, "loss": 1.6002, "step": 14854 }, { "epoch": 0.8279917507385318, "grad_norm": 0.5780771374702454, "learning_rate": 7.381140349997018e-06, "loss": 1.7274, "step": 14855 }, { "epoch": 0.828047488991695, "grad_norm": 0.6161444783210754, "learning_rate": 7.376484756222041e-06, "loss": 1.8171, "step": 14856 }, { "epoch": 0.8281032272448582, "grad_norm": 0.6308067440986633, "learning_rate": 7.371830514216471e-06, "loss": 1.6465, "step": 14857 }, { "epoch": 0.8281589654980213, "grad_norm": 0.5672606229782104, "learning_rate": 7.3671776241278856e-06, "loss": 1.5607, "step": 14858 }, { "epoch": 0.8282147037511844, "grad_norm": 0.5984612107276917, "learning_rate": 7.362526086103844e-06, "loss": 1.7207, "step": 14859 }, { "epoch": 0.8282704420043476, "grad_norm": 0.5337589979171753, "learning_rate": 7.357875900291905e-06, "loss": 1.5266, "step": 14860 }, { "epoch": 0.8283261802575107, "grad_norm": 0.5842846035957336, "learning_rate": 7.353227066839513e-06, "loss": 1.6808, "step": 14861 }, { "epoch": 0.8283819185106739, "grad_norm": 0.5503394603729248, "learning_rate": 7.348579585894111e-06, "loss": 1.5349, "step": 14862 }, { "epoch": 0.8284376567638371, "grad_norm": 0.5789114236831665, "learning_rate": 7.3439334576030864e-06, "loss": 1.7059, "step": 14863 }, { "epoch": 0.8284933950170001, "grad_norm": 0.5610846877098083, "learning_rate": 7.339288682113804e-06, "loss": 1.7997, "step": 14864 }, { "epoch": 0.8285491332701633, "grad_norm": 0.5403236746788025, "learning_rate": 7.334645259573541e-06, "loss": 1.4615, "step": 14865 }, { "epoch": 0.8286048715233265, "grad_norm": 0.5676838755607605, "learning_rate": 7.330003190129575e-06, "loss": 1.6691, "step": 14866 }, { "epoch": 0.8286606097764896, "grad_norm": 0.5780337452888489, "learning_rate": 7.325362473929126e-06, "loss": 1.6924, "step": 14867 }, { "epoch": 0.8287163480296528, "grad_norm": 0.5940248966217041, "learning_rate": 7.320723111119371e-06, "loss": 1.8302, "step": 14868 }, { "epoch": 0.8287720862828158, "grad_norm": 0.5656587481498718, "learning_rate": 7.316085101847453e-06, "loss": 1.5856, "step": 14869 }, { "epoch": 0.828827824535979, "grad_norm": 0.5414813160896301, "learning_rate": 7.311448446260422e-06, "loss": 1.5342, "step": 14870 }, { "epoch": 0.8288835627891422, "grad_norm": 0.5124301910400391, "learning_rate": 7.306813144505381e-06, "loss": 1.4847, "step": 14871 }, { "epoch": 0.8289393010423053, "grad_norm": 0.6682723164558411, "learning_rate": 7.3021791967292976e-06, "loss": 1.8871, "step": 14872 }, { "epoch": 0.8289950392954685, "grad_norm": 0.5085262060165405, "learning_rate": 7.297546603079147e-06, "loss": 1.5112, "step": 14873 }, { "epoch": 0.8290507775486317, "grad_norm": 0.5488193035125732, "learning_rate": 7.292915363701841e-06, "loss": 1.2755, "step": 14874 }, { "epoch": 0.8291065158017947, "grad_norm": 0.5370182991027832, "learning_rate": 7.288285478744261e-06, "loss": 1.5412, "step": 14875 }, { "epoch": 0.8291622540549579, "grad_norm": 0.5484380125999451, "learning_rate": 7.283656948353251e-06, "loss": 1.7214, "step": 14876 }, { "epoch": 0.8292179923081211, "grad_norm": 0.5984296202659607, "learning_rate": 7.2790297726755716e-06, "loss": 1.9109, "step": 14877 }, { "epoch": 0.8292737305612842, "grad_norm": 0.5639859437942505, "learning_rate": 7.274403951857994e-06, "loss": 1.5359, "step": 14878 }, { "epoch": 0.8293294688144474, "grad_norm": 0.5411209464073181, "learning_rate": 7.2697794860472235e-06, "loss": 1.6992, "step": 14879 }, { "epoch": 0.8293852070676105, "grad_norm": 0.5661779046058655, "learning_rate": 7.265156375389909e-06, "loss": 1.6467, "step": 14880 }, { "epoch": 0.8294409453207736, "grad_norm": 0.5306726098060608, "learning_rate": 7.260534620032667e-06, "loss": 1.7689, "step": 14881 }, { "epoch": 0.8294966835739368, "grad_norm": 0.5563758611679077, "learning_rate": 7.255914220122078e-06, "loss": 1.7601, "step": 14882 }, { "epoch": 0.8295524218271, "grad_norm": 0.5583815574645996, "learning_rate": 7.25129517580469e-06, "loss": 1.731, "step": 14883 }, { "epoch": 0.8296081600802631, "grad_norm": 0.5571249723434448, "learning_rate": 7.246677487226966e-06, "loss": 1.6389, "step": 14884 }, { "epoch": 0.8296638983334262, "grad_norm": 0.5696853399276733, "learning_rate": 7.242061154535346e-06, "loss": 1.6421, "step": 14885 }, { "epoch": 0.8297196365865894, "grad_norm": 0.5597503781318665, "learning_rate": 7.237446177876278e-06, "loss": 1.4844, "step": 14886 }, { "epoch": 0.8297753748397525, "grad_norm": 0.5343501567840576, "learning_rate": 7.23283255739608e-06, "loss": 1.478, "step": 14887 }, { "epoch": 0.8298311130929157, "grad_norm": 0.5455690026283264, "learning_rate": 7.228220293241084e-06, "loss": 1.7255, "step": 14888 }, { "epoch": 0.8298868513460789, "grad_norm": 0.5314241051673889, "learning_rate": 7.223609385557567e-06, "loss": 1.4374, "step": 14889 }, { "epoch": 0.8299425895992419, "grad_norm": 0.589161217212677, "learning_rate": 7.2189998344917635e-06, "loss": 1.5534, "step": 14890 }, { "epoch": 0.8299983278524051, "grad_norm": 0.5816118121147156, "learning_rate": 7.214391640189844e-06, "loss": 1.683, "step": 14891 }, { "epoch": 0.8300540661055682, "grad_norm": 0.5937685966491699, "learning_rate": 7.209784802797964e-06, "loss": 1.6317, "step": 14892 }, { "epoch": 0.8301098043587314, "grad_norm": 0.5852888822555542, "learning_rate": 7.20517932246223e-06, "loss": 1.8489, "step": 14893 }, { "epoch": 0.8301655426118946, "grad_norm": 0.5392382740974426, "learning_rate": 7.200575199328691e-06, "loss": 1.6475, "step": 14894 }, { "epoch": 0.8302212808650576, "grad_norm": 0.5379775166511536, "learning_rate": 7.195972433543386e-06, "loss": 1.5613, "step": 14895 }, { "epoch": 0.8302770191182208, "grad_norm": 0.5524624586105347, "learning_rate": 7.191371025252242e-06, "loss": 1.5859, "step": 14896 }, { "epoch": 0.830332757371384, "grad_norm": 0.5549662113189697, "learning_rate": 7.186770974601242e-06, "loss": 1.7449, "step": 14897 }, { "epoch": 0.8303884956245471, "grad_norm": 0.9866027235984802, "learning_rate": 7.182172281736244e-06, "loss": 1.6864, "step": 14898 }, { "epoch": 0.8304442338777103, "grad_norm": 0.5486053228378296, "learning_rate": 7.177574946803084e-06, "loss": 1.752, "step": 14899 }, { "epoch": 0.8304999721308735, "grad_norm": 0.5818338990211487, "learning_rate": 7.172978969947586e-06, "loss": 1.5253, "step": 14900 }, { "epoch": 0.8305557103840365, "grad_norm": 0.5546401739120483, "learning_rate": 7.168384351315488e-06, "loss": 1.7083, "step": 14901 }, { "epoch": 0.8306114486371997, "grad_norm": 0.5562866926193237, "learning_rate": 7.163791091052524e-06, "loss": 1.5918, "step": 14902 }, { "epoch": 0.8306671868903629, "grad_norm": 0.5581058859825134, "learning_rate": 7.1591991893043384e-06, "loss": 1.673, "step": 14903 }, { "epoch": 0.830722925143526, "grad_norm": 0.6358676552772522, "learning_rate": 7.1546086462165816e-06, "loss": 1.837, "step": 14904 }, { "epoch": 0.8307786633966892, "grad_norm": 0.5933240652084351, "learning_rate": 7.150019461934843e-06, "loss": 1.7407, "step": 14905 }, { "epoch": 0.8308344016498523, "grad_norm": 0.5273723006248474, "learning_rate": 7.145431636604644e-06, "loss": 1.4998, "step": 14906 }, { "epoch": 0.8308901399030154, "grad_norm": 0.5660163164138794, "learning_rate": 7.140845170371496e-06, "loss": 1.5029, "step": 14907 }, { "epoch": 0.8309458781561786, "grad_norm": 0.5751037001609802, "learning_rate": 7.136260063380851e-06, "loss": 1.5846, "step": 14908 }, { "epoch": 0.8310016164093418, "grad_norm": 0.601863443851471, "learning_rate": 7.131676315778135e-06, "loss": 1.749, "step": 14909 }, { "epoch": 0.8310573546625049, "grad_norm": 0.586399495601654, "learning_rate": 7.1270939277087e-06, "loss": 1.6114, "step": 14910 }, { "epoch": 0.831113092915668, "grad_norm": 0.5683432221412659, "learning_rate": 7.122512899317862e-06, "loss": 1.7493, "step": 14911 }, { "epoch": 0.8311688311688312, "grad_norm": 0.5420761704444885, "learning_rate": 7.11793323075095e-06, "loss": 1.5338, "step": 14912 }, { "epoch": 0.8312245694219943, "grad_norm": 0.5505176186561584, "learning_rate": 7.113354922153159e-06, "loss": 1.6078, "step": 14913 }, { "epoch": 0.8312803076751575, "grad_norm": 0.6434391140937805, "learning_rate": 7.108777973669706e-06, "loss": 1.8323, "step": 14914 }, { "epoch": 0.8313360459283206, "grad_norm": 0.5409782528877258, "learning_rate": 7.104202385445741e-06, "loss": 1.6329, "step": 14915 }, { "epoch": 0.8313917841814837, "grad_norm": 0.5738489627838135, "learning_rate": 7.099628157626392e-06, "loss": 1.5872, "step": 14916 }, { "epoch": 0.8314475224346469, "grad_norm": 0.5797942876815796, "learning_rate": 7.095055290356694e-06, "loss": 1.6175, "step": 14917 }, { "epoch": 0.83150326068781, "grad_norm": 0.5602861642837524, "learning_rate": 7.090483783781693e-06, "loss": 1.5957, "step": 14918 }, { "epoch": 0.8315589989409732, "grad_norm": 0.5227814316749573, "learning_rate": 7.085913638046366e-06, "loss": 1.3375, "step": 14919 }, { "epoch": 0.8316147371941364, "grad_norm": 0.5705005526542664, "learning_rate": 7.081344853295652e-06, "loss": 1.6729, "step": 14920 }, { "epoch": 0.8316704754472994, "grad_norm": 0.6214076280593872, "learning_rate": 7.076777429674458e-06, "loss": 1.5751, "step": 14921 }, { "epoch": 0.8317262137004626, "grad_norm": 0.5561441779136658, "learning_rate": 7.072211367327603e-06, "loss": 1.6183, "step": 14922 }, { "epoch": 0.8317819519536258, "grad_norm": 0.5700360536575317, "learning_rate": 7.0676466663999355e-06, "loss": 1.5727, "step": 14923 }, { "epoch": 0.8318376902067889, "grad_norm": 0.5978094339370728, "learning_rate": 7.063083327036191e-06, "loss": 1.8146, "step": 14924 }, { "epoch": 0.8318934284599521, "grad_norm": 0.6397820711135864, "learning_rate": 7.058521349381109e-06, "loss": 1.8472, "step": 14925 }, { "epoch": 0.8319491667131153, "grad_norm": 0.5918593406677246, "learning_rate": 7.0539607335793565e-06, "loss": 1.5908, "step": 14926 }, { "epoch": 0.8320049049662783, "grad_norm": 0.6069926619529724, "learning_rate": 7.04940147977558e-06, "loss": 1.7789, "step": 14927 }, { "epoch": 0.8320606432194415, "grad_norm": 0.5574516654014587, "learning_rate": 7.044843588114386e-06, "loss": 1.7592, "step": 14928 }, { "epoch": 0.8321163814726047, "grad_norm": 0.6075261831283569, "learning_rate": 7.04028705874028e-06, "loss": 1.8732, "step": 14929 }, { "epoch": 0.8321721197257678, "grad_norm": 0.5926170945167542, "learning_rate": 7.035731891797803e-06, "loss": 1.7596, "step": 14930 }, { "epoch": 0.832227857978931, "grad_norm": 0.5655992031097412, "learning_rate": 7.031178087431428e-06, "loss": 1.738, "step": 14931 }, { "epoch": 0.8322835962320941, "grad_norm": 0.5602476000785828, "learning_rate": 7.026625645785551e-06, "loss": 1.5169, "step": 14932 }, { "epoch": 0.8323393344852572, "grad_norm": 0.5783279538154602, "learning_rate": 7.022074567004549e-06, "loss": 1.7271, "step": 14933 }, { "epoch": 0.8323950727384204, "grad_norm": 0.5969105362892151, "learning_rate": 7.017524851232765e-06, "loss": 1.6145, "step": 14934 }, { "epoch": 0.8324508109915836, "grad_norm": 0.5629395842552185, "learning_rate": 7.012976498614498e-06, "loss": 1.6458, "step": 14935 }, { "epoch": 0.8325065492447467, "grad_norm": 0.595198929309845, "learning_rate": 7.008429509293979e-06, "loss": 1.5222, "step": 14936 }, { "epoch": 0.8325622874979098, "grad_norm": 0.6046950817108154, "learning_rate": 7.003883883415402e-06, "loss": 1.6804, "step": 14937 }, { "epoch": 0.8326180257510729, "grad_norm": 0.5445270538330078, "learning_rate": 6.9993396211229635e-06, "loss": 1.4962, "step": 14938 }, { "epoch": 0.8326737640042361, "grad_norm": 0.6198094487190247, "learning_rate": 6.994796722560754e-06, "loss": 1.9543, "step": 14939 }, { "epoch": 0.8327295022573993, "grad_norm": 0.542130708694458, "learning_rate": 6.990255187872851e-06, "loss": 1.6622, "step": 14940 }, { "epoch": 0.8327852405105624, "grad_norm": 0.5684085488319397, "learning_rate": 6.985715017203293e-06, "loss": 1.4973, "step": 14941 }, { "epoch": 0.8328409787637255, "grad_norm": 0.5942514538764954, "learning_rate": 6.981176210696077e-06, "loss": 1.7641, "step": 14942 }, { "epoch": 0.8328967170168887, "grad_norm": 0.592943549156189, "learning_rate": 6.97663876849512e-06, "loss": 1.6925, "step": 14943 }, { "epoch": 0.8329524552700518, "grad_norm": 0.5728437304496765, "learning_rate": 6.972102690744325e-06, "loss": 1.7473, "step": 14944 }, { "epoch": 0.833008193523215, "grad_norm": 0.5813961029052734, "learning_rate": 6.967567977587586e-06, "loss": 1.7243, "step": 14945 }, { "epoch": 0.8330639317763782, "grad_norm": 0.5490429401397705, "learning_rate": 6.963034629168685e-06, "loss": 1.4805, "step": 14946 }, { "epoch": 0.8331196700295412, "grad_norm": 0.4701632857322693, "learning_rate": 6.958502645631409e-06, "loss": 0.9261, "step": 14947 }, { "epoch": 0.8331754082827044, "grad_norm": 0.570267379283905, "learning_rate": 6.953972027119466e-06, "loss": 1.5988, "step": 14948 }, { "epoch": 0.8332311465358676, "grad_norm": 0.5397829413414001, "learning_rate": 6.949442773776571e-06, "loss": 1.399, "step": 14949 }, { "epoch": 0.8332868847890307, "grad_norm": 0.5675944685935974, "learning_rate": 6.944914885746334e-06, "loss": 1.6314, "step": 14950 }, { "epoch": 0.8333426230421939, "grad_norm": 0.5543584227561951, "learning_rate": 6.940388363172373e-06, "loss": 1.5388, "step": 14951 }, { "epoch": 0.833398361295357, "grad_norm": 0.5559143424034119, "learning_rate": 6.93586320619824e-06, "loss": 1.5226, "step": 14952 }, { "epoch": 0.8334540995485201, "grad_norm": 0.5080550312995911, "learning_rate": 6.931339414967441e-06, "loss": 1.4901, "step": 14953 }, { "epoch": 0.8335098378016833, "grad_norm": 0.5370514392852783, "learning_rate": 6.926816989623464e-06, "loss": 1.5107, "step": 14954 }, { "epoch": 0.8335655760548465, "grad_norm": 0.6255368590354919, "learning_rate": 6.922295930309691e-06, "loss": 1.5848, "step": 14955 }, { "epoch": 0.8336213143080096, "grad_norm": 0.5875033736228943, "learning_rate": 6.917776237169543e-06, "loss": 1.5865, "step": 14956 }, { "epoch": 0.8336770525611727, "grad_norm": 0.5647172927856445, "learning_rate": 6.91325791034636e-06, "loss": 1.6705, "step": 14957 }, { "epoch": 0.8337327908143359, "grad_norm": 0.5676584839820862, "learning_rate": 6.908740949983411e-06, "loss": 1.5818, "step": 14958 }, { "epoch": 0.833788529067499, "grad_norm": 0.5705934166908264, "learning_rate": 6.904225356223954e-06, "loss": 1.6364, "step": 14959 }, { "epoch": 0.8338442673206622, "grad_norm": 0.5556856393814087, "learning_rate": 6.899711129211206e-06, "loss": 1.5349, "step": 14960 }, { "epoch": 0.8339000055738253, "grad_norm": 0.560831606388092, "learning_rate": 6.895198269088343e-06, "loss": 1.6409, "step": 14961 }, { "epoch": 0.8339557438269884, "grad_norm": 0.5645913481712341, "learning_rate": 6.890686775998462e-06, "loss": 1.8578, "step": 14962 }, { "epoch": 0.8340114820801516, "grad_norm": 0.5440537333488464, "learning_rate": 6.8861766500846356e-06, "loss": 1.4316, "step": 14963 }, { "epoch": 0.8340672203333147, "grad_norm": 0.5634580254554749, "learning_rate": 6.88166789148994e-06, "loss": 1.6223, "step": 14964 }, { "epoch": 0.8341229585864779, "grad_norm": 0.6253748536109924, "learning_rate": 6.877160500357327e-06, "loss": 1.5812, "step": 14965 }, { "epoch": 0.8341786968396411, "grad_norm": 0.5785320997238159, "learning_rate": 6.872654476829765e-06, "loss": 1.6649, "step": 14966 }, { "epoch": 0.8342344350928041, "grad_norm": 0.5737482905387878, "learning_rate": 6.868149821050152e-06, "loss": 1.6446, "step": 14967 }, { "epoch": 0.8342901733459673, "grad_norm": 0.554928183555603, "learning_rate": 6.8636465331613555e-06, "loss": 1.702, "step": 14968 }, { "epoch": 0.8343459115991305, "grad_norm": 0.5656299591064453, "learning_rate": 6.859144613306185e-06, "loss": 1.8365, "step": 14969 }, { "epoch": 0.8344016498522936, "grad_norm": 0.6206011176109314, "learning_rate": 6.8546440616274024e-06, "loss": 1.6186, "step": 14970 }, { "epoch": 0.8344573881054568, "grad_norm": 0.6015493869781494, "learning_rate": 6.850144878267784e-06, "loss": 1.6608, "step": 14971 }, { "epoch": 0.83451312635862, "grad_norm": 0.6501861214637756, "learning_rate": 6.84564706336997e-06, "loss": 1.4615, "step": 14972 }, { "epoch": 0.834568864611783, "grad_norm": 0.5718386173248291, "learning_rate": 6.841150617076636e-06, "loss": 1.7102, "step": 14973 }, { "epoch": 0.8346246028649462, "grad_norm": 0.5772673487663269, "learning_rate": 6.836655539530351e-06, "loss": 1.5415, "step": 14974 }, { "epoch": 0.8346803411181094, "grad_norm": 0.5746707916259766, "learning_rate": 6.832161830873718e-06, "loss": 1.6088, "step": 14975 }, { "epoch": 0.8347360793712725, "grad_norm": 0.534925103187561, "learning_rate": 6.827669491249211e-06, "loss": 1.5481, "step": 14976 }, { "epoch": 0.8347918176244357, "grad_norm": 0.6033921837806702, "learning_rate": 6.823178520799317e-06, "loss": 1.6031, "step": 14977 }, { "epoch": 0.8348475558775988, "grad_norm": 0.5789549350738525, "learning_rate": 6.8186889196664605e-06, "loss": 1.647, "step": 14978 }, { "epoch": 0.8349032941307619, "grad_norm": 0.5535509586334229, "learning_rate": 6.814200687993028e-06, "loss": 1.5902, "step": 14979 }, { "epoch": 0.8349590323839251, "grad_norm": 0.5693678855895996, "learning_rate": 6.809713825921371e-06, "loss": 1.5765, "step": 14980 }, { "epoch": 0.8350147706370883, "grad_norm": 0.5999974012374878, "learning_rate": 6.80522833359375e-06, "loss": 1.6666, "step": 14981 }, { "epoch": 0.8350705088902514, "grad_norm": 0.5667532086372375, "learning_rate": 6.800744211152454e-06, "loss": 1.7652, "step": 14982 }, { "epoch": 0.8351262471434145, "grad_norm": 0.562842607498169, "learning_rate": 6.796261458739695e-06, "loss": 1.6158, "step": 14983 }, { "epoch": 0.8351819853965776, "grad_norm": 0.5929235219955444, "learning_rate": 6.791780076497617e-06, "loss": 1.6253, "step": 14984 }, { "epoch": 0.8352377236497408, "grad_norm": 0.6288977861404419, "learning_rate": 6.787300064568353e-06, "loss": 1.7899, "step": 14985 }, { "epoch": 0.835293461902904, "grad_norm": 0.5721529126167297, "learning_rate": 6.7828214230939825e-06, "loss": 1.8485, "step": 14986 }, { "epoch": 0.8353492001560671, "grad_norm": 0.5428566932678223, "learning_rate": 6.778344152216553e-06, "loss": 1.5262, "step": 14987 }, { "epoch": 0.8354049384092302, "grad_norm": 0.6128736734390259, "learning_rate": 6.7738682520780415e-06, "loss": 1.6569, "step": 14988 }, { "epoch": 0.8354606766623934, "grad_norm": 0.5812713503837585, "learning_rate": 6.7693937228203885e-06, "loss": 1.7483, "step": 14989 }, { "epoch": 0.8355164149155565, "grad_norm": 0.6197991371154785, "learning_rate": 6.764920564585536e-06, "loss": 1.708, "step": 14990 }, { "epoch": 0.8355721531687197, "grad_norm": 0.613953173160553, "learning_rate": 6.760448777515316e-06, "loss": 1.9626, "step": 14991 }, { "epoch": 0.8356278914218829, "grad_norm": 0.5072037577629089, "learning_rate": 6.755978361751553e-06, "loss": 1.2251, "step": 14992 }, { "epoch": 0.835683629675046, "grad_norm": 0.6177765130996704, "learning_rate": 6.7515093174360275e-06, "loss": 1.7765, "step": 14993 }, { "epoch": 0.8357393679282091, "grad_norm": 0.5812098979949951, "learning_rate": 6.7470416447104834e-06, "loss": 1.777, "step": 14994 }, { "epoch": 0.8357951061813723, "grad_norm": 0.5786961913108826, "learning_rate": 6.742575343716584e-06, "loss": 1.695, "step": 14995 }, { "epoch": 0.8358508444345354, "grad_norm": 0.6079100966453552, "learning_rate": 6.738110414595977e-06, "loss": 1.6019, "step": 14996 }, { "epoch": 0.8359065826876986, "grad_norm": 0.5513747334480286, "learning_rate": 6.733646857490294e-06, "loss": 1.4083, "step": 14997 }, { "epoch": 0.8359623209408618, "grad_norm": 0.5464707016944885, "learning_rate": 6.729184672541061e-06, "loss": 1.5974, "step": 14998 }, { "epoch": 0.8360180591940248, "grad_norm": 0.5555058121681213, "learning_rate": 6.7247238598898145e-06, "loss": 1.5756, "step": 14999 }, { "epoch": 0.836073797447188, "grad_norm": 0.636162519454956, "learning_rate": 6.720264419677996e-06, "loss": 1.8554, "step": 15000 } ], "logging_steps": 1, "max_steps": 17941, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.119015678246912e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }