{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 13070, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001530221882172915, "grad_norm": 13.516160750382397, "learning_rate": 5.0890585241730285e-08, "loss": 1.0816, "step": 1 }, { "epoch": 0.000306044376434583, "grad_norm": 13.656487119130613, "learning_rate": 1.0178117048346057e-07, "loss": 1.0528, "step": 2 }, { "epoch": 0.0004590665646518745, "grad_norm": 16.846354425859488, "learning_rate": 1.5267175572519085e-07, "loss": 1.1708, "step": 3 }, { "epoch": 0.000612088752869166, "grad_norm": 13.046572036423532, "learning_rate": 2.0356234096692114e-07, "loss": 1.0517, "step": 4 }, { "epoch": 0.0007651109410864575, "grad_norm": 12.49233347364335, "learning_rate": 2.544529262086514e-07, "loss": 1.0145, "step": 5 }, { "epoch": 0.000918133129303749, "grad_norm": 14.751771565656806, "learning_rate": 3.053435114503817e-07, "loss": 1.0758, "step": 6 }, { "epoch": 0.0010711553175210406, "grad_norm": 11.653404038618469, "learning_rate": 3.56234096692112e-07, "loss": 1.0636, "step": 7 }, { "epoch": 0.001224177505738332, "grad_norm": 13.668951288685138, "learning_rate": 4.071246819338423e-07, "loss": 1.1952, "step": 8 }, { "epoch": 0.0013771996939556236, "grad_norm": 12.046531599063504, "learning_rate": 4.5801526717557257e-07, "loss": 1.0257, "step": 9 }, { "epoch": 0.001530221882172915, "grad_norm": 13.723654296994994, "learning_rate": 5.089058524173028e-07, "loss": 1.059, "step": 10 }, { "epoch": 0.0016832440703902067, "grad_norm": 13.902265225322944, "learning_rate": 5.597964376590332e-07, "loss": 1.0858, "step": 11 }, { "epoch": 0.001836266258607498, "grad_norm": 12.528149564606679, "learning_rate": 6.106870229007634e-07, "loss": 1.0277, "step": 12 }, { "epoch": 0.0019892884468247895, "grad_norm": 13.1123093332078, "learning_rate": 6.615776081424936e-07, "loss": 0.9474, "step": 13 }, { "epoch": 0.002142310635042081, "grad_norm": 11.039557385133689, "learning_rate": 7.12468193384224e-07, "loss": 1.0024, "step": 14 }, { "epoch": 0.0022953328232593728, "grad_norm": 8.906284572299981, "learning_rate": 7.633587786259543e-07, "loss": 0.9381, "step": 15 }, { "epoch": 0.002448355011476664, "grad_norm": 10.05716027554367, "learning_rate": 8.142493638676846e-07, "loss": 0.9707, "step": 16 }, { "epoch": 0.0026013771996939556, "grad_norm": 9.006177898036631, "learning_rate": 8.651399491094148e-07, "loss": 0.9394, "step": 17 }, { "epoch": 0.0027543993879112472, "grad_norm": 21.183255574001187, "learning_rate": 9.160305343511451e-07, "loss": 1.0931, "step": 18 }, { "epoch": 0.002907421576128539, "grad_norm": 10.995986966246456, "learning_rate": 9.669211195928755e-07, "loss": 0.984, "step": 19 }, { "epoch": 0.00306044376434583, "grad_norm": 11.307646984532525, "learning_rate": 1.0178117048346056e-06, "loss": 0.9666, "step": 20 }, { "epoch": 0.0032134659525631217, "grad_norm": 14.964095843531805, "learning_rate": 1.068702290076336e-06, "loss": 1.0085, "step": 21 }, { "epoch": 0.0033664881407804133, "grad_norm": 9.3514966763225, "learning_rate": 1.1195928753180663e-06, "loss": 0.9647, "step": 22 }, { "epoch": 0.0035195103289977045, "grad_norm": 7.7748996347946, "learning_rate": 1.1704834605597967e-06, "loss": 1.0044, "step": 23 }, { "epoch": 0.003672532517214996, "grad_norm": 9.478805120179091, "learning_rate": 1.2213740458015268e-06, "loss": 0.9446, "step": 24 }, { "epoch": 0.0038255547054322878, "grad_norm": 6.758033563189933, "learning_rate": 1.2722646310432571e-06, "loss": 0.9163, "step": 25 }, { "epoch": 0.003978576893649579, "grad_norm": 5.984168999533934, "learning_rate": 1.3231552162849873e-06, "loss": 0.8452, "step": 26 }, { "epoch": 0.004131599081866871, "grad_norm": 6.141293673107684, "learning_rate": 1.3740458015267178e-06, "loss": 0.9215, "step": 27 }, { "epoch": 0.004284621270084162, "grad_norm": 8.443582458602195, "learning_rate": 1.424936386768448e-06, "loss": 0.9543, "step": 28 }, { "epoch": 0.004437643458301454, "grad_norm": 6.219529039996243, "learning_rate": 1.475826972010178e-06, "loss": 1.1055, "step": 29 }, { "epoch": 0.0045906656465187455, "grad_norm": 6.723218647037651, "learning_rate": 1.5267175572519086e-06, "loss": 0.8657, "step": 30 }, { "epoch": 0.004743687834736037, "grad_norm": 6.925216418396402, "learning_rate": 1.5776081424936388e-06, "loss": 1.0145, "step": 31 }, { "epoch": 0.004896710022953328, "grad_norm": 5.817861415648484, "learning_rate": 1.6284987277353691e-06, "loss": 0.9494, "step": 32 }, { "epoch": 0.0050497322111706195, "grad_norm": 6.517661419074156, "learning_rate": 1.6793893129770995e-06, "loss": 1.0573, "step": 33 }, { "epoch": 0.005202754399387911, "grad_norm": 7.507479750968244, "learning_rate": 1.7302798982188296e-06, "loss": 0.8793, "step": 34 }, { "epoch": 0.005355776587605203, "grad_norm": 6.961283303110031, "learning_rate": 1.78117048346056e-06, "loss": 0.9334, "step": 35 }, { "epoch": 0.0055087987758224944, "grad_norm": 7.154082162667194, "learning_rate": 1.8320610687022903e-06, "loss": 0.8061, "step": 36 }, { "epoch": 0.005661820964039786, "grad_norm": 6.110703133559988, "learning_rate": 1.8829516539440204e-06, "loss": 0.8045, "step": 37 }, { "epoch": 0.005814843152257078, "grad_norm": 6.144648736565948, "learning_rate": 1.933842239185751e-06, "loss": 0.7255, "step": 38 }, { "epoch": 0.0059678653404743685, "grad_norm": 6.104408552237334, "learning_rate": 1.984732824427481e-06, "loss": 0.8799, "step": 39 }, { "epoch": 0.00612088752869166, "grad_norm": 6.224216203164156, "learning_rate": 2.0356234096692112e-06, "loss": 0.8629, "step": 40 }, { "epoch": 0.006273909716908952, "grad_norm": 5.354540996861285, "learning_rate": 2.0865139949109416e-06, "loss": 0.9332, "step": 41 }, { "epoch": 0.006426931905126243, "grad_norm": 7.248232082302233, "learning_rate": 2.137404580152672e-06, "loss": 0.879, "step": 42 }, { "epoch": 0.006579954093343535, "grad_norm": 6.526634157834574, "learning_rate": 2.1882951653944023e-06, "loss": 0.8931, "step": 43 }, { "epoch": 0.006732976281560827, "grad_norm": 5.558985637856043, "learning_rate": 2.2391857506361326e-06, "loss": 0.9254, "step": 44 }, { "epoch": 0.006885998469778117, "grad_norm": 5.143083416175365, "learning_rate": 2.2900763358778625e-06, "loss": 0.7931, "step": 45 }, { "epoch": 0.007039020657995409, "grad_norm": 6.230004131099689, "learning_rate": 2.3409669211195933e-06, "loss": 0.8783, "step": 46 }, { "epoch": 0.007192042846212701, "grad_norm": 6.96270555642169, "learning_rate": 2.3918575063613232e-06, "loss": 0.8966, "step": 47 }, { "epoch": 0.007345065034429992, "grad_norm": 4.725287444763657, "learning_rate": 2.4427480916030536e-06, "loss": 0.792, "step": 48 }, { "epoch": 0.007498087222647284, "grad_norm": 4.516169085236729, "learning_rate": 2.493638676844784e-06, "loss": 0.8188, "step": 49 }, { "epoch": 0.0076511094108645756, "grad_norm": 6.169099136800104, "learning_rate": 2.5445292620865143e-06, "loss": 0.9132, "step": 50 }, { "epoch": 0.007804131599081867, "grad_norm": 5.030562592462142, "learning_rate": 2.595419847328244e-06, "loss": 0.9125, "step": 51 }, { "epoch": 0.007957153787299158, "grad_norm": 5.741056522914908, "learning_rate": 2.6463104325699745e-06, "loss": 0.8294, "step": 52 }, { "epoch": 0.00811017597551645, "grad_norm": 5.3502817029143515, "learning_rate": 2.6972010178117053e-06, "loss": 0.8005, "step": 53 }, { "epoch": 0.008263198163733741, "grad_norm": 5.153661369992958, "learning_rate": 2.7480916030534356e-06, "loss": 0.8175, "step": 54 }, { "epoch": 0.008416220351951033, "grad_norm": 6.046120010370828, "learning_rate": 2.7989821882951656e-06, "loss": 0.8508, "step": 55 }, { "epoch": 0.008569242540168324, "grad_norm": 6.292867560774823, "learning_rate": 2.849872773536896e-06, "loss": 0.9253, "step": 56 }, { "epoch": 0.008722264728385616, "grad_norm": 6.105008240723325, "learning_rate": 2.900763358778626e-06, "loss": 0.9678, "step": 57 }, { "epoch": 0.008875286916602908, "grad_norm": 4.459519530250151, "learning_rate": 2.951653944020356e-06, "loss": 0.857, "step": 58 }, { "epoch": 0.0090283091048202, "grad_norm": 4.966961705007526, "learning_rate": 3.002544529262087e-06, "loss": 0.8162, "step": 59 }, { "epoch": 0.009181331293037491, "grad_norm": 5.179614147649331, "learning_rate": 3.0534351145038173e-06, "loss": 0.7892, "step": 60 }, { "epoch": 0.009334353481254783, "grad_norm": 6.585863543513201, "learning_rate": 3.104325699745547e-06, "loss": 0.81, "step": 61 }, { "epoch": 0.009487375669472074, "grad_norm": 5.684014842731852, "learning_rate": 3.1552162849872775e-06, "loss": 0.9146, "step": 62 }, { "epoch": 0.009640397857689364, "grad_norm": 4.941334064996768, "learning_rate": 3.206106870229008e-06, "loss": 0.8218, "step": 63 }, { "epoch": 0.009793420045906656, "grad_norm": 5.436505901693147, "learning_rate": 3.2569974554707382e-06, "loss": 0.7455, "step": 64 }, { "epoch": 0.009946442234123947, "grad_norm": 4.827550970928569, "learning_rate": 3.3078880407124686e-06, "loss": 0.7799, "step": 65 }, { "epoch": 0.010099464422341239, "grad_norm": 5.1126560035818125, "learning_rate": 3.358778625954199e-06, "loss": 0.8829, "step": 66 }, { "epoch": 0.01025248661055853, "grad_norm": 4.734944949714936, "learning_rate": 3.409669211195929e-06, "loss": 0.8362, "step": 67 }, { "epoch": 0.010405508798775822, "grad_norm": 5.1152605564609726, "learning_rate": 3.460559796437659e-06, "loss": 0.806, "step": 68 }, { "epoch": 0.010558530986993114, "grad_norm": 5.501335018085708, "learning_rate": 3.5114503816793895e-06, "loss": 0.8836, "step": 69 }, { "epoch": 0.010711553175210406, "grad_norm": 5.6907042929238, "learning_rate": 3.56234096692112e-06, "loss": 0.7729, "step": 70 }, { "epoch": 0.010864575363427697, "grad_norm": 4.7874920288700755, "learning_rate": 3.6132315521628502e-06, "loss": 0.7401, "step": 71 }, { "epoch": 0.011017597551644989, "grad_norm": 5.34946991015153, "learning_rate": 3.6641221374045806e-06, "loss": 0.804, "step": 72 }, { "epoch": 0.01117061973986228, "grad_norm": 6.0799857986381, "learning_rate": 3.7150127226463105e-06, "loss": 0.845, "step": 73 }, { "epoch": 0.011323641928079572, "grad_norm": 6.075178425573325, "learning_rate": 3.765903307888041e-06, "loss": 0.7568, "step": 74 }, { "epoch": 0.011476664116296864, "grad_norm": 4.759854629433448, "learning_rate": 3.816793893129772e-06, "loss": 0.7735, "step": 75 }, { "epoch": 0.011629686304514155, "grad_norm": 5.486398129731232, "learning_rate": 3.867684478371502e-06, "loss": 0.788, "step": 76 }, { "epoch": 0.011782708492731445, "grad_norm": 5.097255872801085, "learning_rate": 3.918575063613232e-06, "loss": 0.7627, "step": 77 }, { "epoch": 0.011935730680948737, "grad_norm": 4.242028635114688, "learning_rate": 3.969465648854962e-06, "loss": 0.7489, "step": 78 }, { "epoch": 0.012088752869166029, "grad_norm": 5.948491901588452, "learning_rate": 4.020356234096692e-06, "loss": 0.8135, "step": 79 }, { "epoch": 0.01224177505738332, "grad_norm": 6.895223832667884, "learning_rate": 4.0712468193384225e-06, "loss": 0.84, "step": 80 }, { "epoch": 0.012394797245600612, "grad_norm": 4.672739171576903, "learning_rate": 4.122137404580153e-06, "loss": 0.7619, "step": 81 }, { "epoch": 0.012547819433817903, "grad_norm": 4.6442577318217815, "learning_rate": 4.173027989821883e-06, "loss": 0.8648, "step": 82 }, { "epoch": 0.012700841622035195, "grad_norm": 5.053558792090741, "learning_rate": 4.2239185750636135e-06, "loss": 0.8448, "step": 83 }, { "epoch": 0.012853863810252487, "grad_norm": 4.530911050956989, "learning_rate": 4.274809160305344e-06, "loss": 0.8076, "step": 84 }, { "epoch": 0.013006885998469778, "grad_norm": 6.262686478254499, "learning_rate": 4.325699745547074e-06, "loss": 0.9201, "step": 85 }, { "epoch": 0.01315990818668707, "grad_norm": 4.640895988619081, "learning_rate": 4.3765903307888045e-06, "loss": 0.7195, "step": 86 }, { "epoch": 0.013312930374904362, "grad_norm": 5.0565684924592755, "learning_rate": 4.427480916030535e-06, "loss": 0.8627, "step": 87 }, { "epoch": 0.013465952563121653, "grad_norm": 5.283088934487553, "learning_rate": 4.478371501272265e-06, "loss": 0.7922, "step": 88 }, { "epoch": 0.013618974751338945, "grad_norm": 5.026171701957064, "learning_rate": 4.5292620865139956e-06, "loss": 0.8261, "step": 89 }, { "epoch": 0.013771996939556235, "grad_norm": 5.884463834573161, "learning_rate": 4.580152671755725e-06, "loss": 0.8093, "step": 90 }, { "epoch": 0.013925019127773526, "grad_norm": 4.438561387505246, "learning_rate": 4.631043256997455e-06, "loss": 0.7371, "step": 91 }, { "epoch": 0.014078041315990818, "grad_norm": 5.305169117726312, "learning_rate": 4.681933842239187e-06, "loss": 0.8009, "step": 92 }, { "epoch": 0.01423106350420811, "grad_norm": 5.419418332275794, "learning_rate": 4.732824427480917e-06, "loss": 0.8181, "step": 93 }, { "epoch": 0.014384085692425401, "grad_norm": 5.040887136301689, "learning_rate": 4.7837150127226464e-06, "loss": 0.7963, "step": 94 }, { "epoch": 0.014537107880642693, "grad_norm": 6.45075012480531, "learning_rate": 4.834605597964377e-06, "loss": 0.6717, "step": 95 }, { "epoch": 0.014690130068859985, "grad_norm": 5.015628494483557, "learning_rate": 4.885496183206107e-06, "loss": 0.7777, "step": 96 }, { "epoch": 0.014843152257077276, "grad_norm": 4.517147075856258, "learning_rate": 4.9363867684478375e-06, "loss": 0.7682, "step": 97 }, { "epoch": 0.014996174445294568, "grad_norm": 5.015648995454229, "learning_rate": 4.987277353689568e-06, "loss": 0.793, "step": 98 }, { "epoch": 0.01514919663351186, "grad_norm": 4.91359741535642, "learning_rate": 5.038167938931297e-06, "loss": 0.7853, "step": 99 }, { "epoch": 0.015302218821729151, "grad_norm": 4.88280014065132, "learning_rate": 5.0890585241730285e-06, "loss": 0.7867, "step": 100 }, { "epoch": 0.015455241009946443, "grad_norm": 5.310203953123644, "learning_rate": 5.139949109414759e-06, "loss": 0.8291, "step": 101 }, { "epoch": 0.015608263198163734, "grad_norm": 5.063524860125988, "learning_rate": 5.190839694656488e-06, "loss": 0.8207, "step": 102 }, { "epoch": 0.015761285386381024, "grad_norm": 5.476946259775138, "learning_rate": 5.2417302798982195e-06, "loss": 0.7438, "step": 103 }, { "epoch": 0.015914307574598316, "grad_norm": 4.563268298277021, "learning_rate": 5.292620865139949e-06, "loss": 0.8014, "step": 104 }, { "epoch": 0.016067329762815608, "grad_norm": 4.766384946361356, "learning_rate": 5.34351145038168e-06, "loss": 0.7385, "step": 105 }, { "epoch": 0.0162203519510329, "grad_norm": 4.959052123725921, "learning_rate": 5.394402035623411e-06, "loss": 0.6631, "step": 106 }, { "epoch": 0.01637337413925019, "grad_norm": 4.660517289576387, "learning_rate": 5.44529262086514e-06, "loss": 0.7539, "step": 107 }, { "epoch": 0.016526396327467482, "grad_norm": 4.760332379137938, "learning_rate": 5.496183206106871e-06, "loss": 0.8044, "step": 108 }, { "epoch": 0.016679418515684774, "grad_norm": 5.213877297259624, "learning_rate": 5.547073791348601e-06, "loss": 0.8032, "step": 109 }, { "epoch": 0.016832440703902066, "grad_norm": 4.500414357173698, "learning_rate": 5.597964376590331e-06, "loss": 0.851, "step": 110 }, { "epoch": 0.016985462892119357, "grad_norm": 5.20758083198332, "learning_rate": 5.648854961832062e-06, "loss": 0.8376, "step": 111 }, { "epoch": 0.01713848508033665, "grad_norm": 4.313432158821755, "learning_rate": 5.699745547073792e-06, "loss": 0.7425, "step": 112 }, { "epoch": 0.01729150726855394, "grad_norm": 4.384745758633966, "learning_rate": 5.750636132315522e-06, "loss": 0.9117, "step": 113 }, { "epoch": 0.017444529456771232, "grad_norm": 5.289188278919928, "learning_rate": 5.801526717557252e-06, "loss": 0.7709, "step": 114 }, { "epoch": 0.017597551644988524, "grad_norm": 4.718040596929263, "learning_rate": 5.852417302798983e-06, "loss": 0.825, "step": 115 }, { "epoch": 0.017750573833205815, "grad_norm": 4.4740329916884045, "learning_rate": 5.903307888040712e-06, "loss": 0.8031, "step": 116 }, { "epoch": 0.017903596021423107, "grad_norm": 3.8294489939326657, "learning_rate": 5.9541984732824435e-06, "loss": 0.7537, "step": 117 }, { "epoch": 0.0180566182096404, "grad_norm": 5.3921477281799906, "learning_rate": 6.005089058524174e-06, "loss": 0.8343, "step": 118 }, { "epoch": 0.01820964039785769, "grad_norm": 4.585186066396906, "learning_rate": 6.055979643765903e-06, "loss": 0.8059, "step": 119 }, { "epoch": 0.018362662586074982, "grad_norm": 4.164993298962277, "learning_rate": 6.1068702290076346e-06, "loss": 0.7707, "step": 120 }, { "epoch": 0.018515684774292274, "grad_norm": 4.7470966220472715, "learning_rate": 6.157760814249364e-06, "loss": 0.7635, "step": 121 }, { "epoch": 0.018668706962509565, "grad_norm": 4.57053122827436, "learning_rate": 6.208651399491094e-06, "loss": 0.7802, "step": 122 }, { "epoch": 0.018821729150726857, "grad_norm": 4.764792340656931, "learning_rate": 6.259541984732826e-06, "loss": 0.7754, "step": 123 }, { "epoch": 0.01897475133894415, "grad_norm": 4.673906229696043, "learning_rate": 6.310432569974555e-06, "loss": 0.7227, "step": 124 }, { "epoch": 0.019127773527161437, "grad_norm": 4.254889278087384, "learning_rate": 6.3613231552162854e-06, "loss": 0.7967, "step": 125 }, { "epoch": 0.01928079571537873, "grad_norm": 3.9781988510098314, "learning_rate": 6.412213740458016e-06, "loss": 0.6657, "step": 126 }, { "epoch": 0.01943381790359602, "grad_norm": 4.294430988412905, "learning_rate": 6.463104325699746e-06, "loss": 0.8075, "step": 127 }, { "epoch": 0.01958684009181331, "grad_norm": 4.562471061393905, "learning_rate": 6.5139949109414765e-06, "loss": 0.8503, "step": 128 }, { "epoch": 0.019739862280030603, "grad_norm": 4.549436052521284, "learning_rate": 6.564885496183207e-06, "loss": 0.8009, "step": 129 }, { "epoch": 0.019892884468247895, "grad_norm": 3.835829653833588, "learning_rate": 6.615776081424937e-06, "loss": 0.7992, "step": 130 }, { "epoch": 0.020045906656465187, "grad_norm": 4.267030813968025, "learning_rate": 6.666666666666667e-06, "loss": 0.7283, "step": 131 }, { "epoch": 0.020198928844682478, "grad_norm": 3.839774979418662, "learning_rate": 6.717557251908398e-06, "loss": 0.8088, "step": 132 }, { "epoch": 0.02035195103289977, "grad_norm": 4.05482208608787, "learning_rate": 6.768447837150128e-06, "loss": 0.691, "step": 133 }, { "epoch": 0.02050497322111706, "grad_norm": 4.39540221039343, "learning_rate": 6.819338422391858e-06, "loss": 0.769, "step": 134 }, { "epoch": 0.020657995409334353, "grad_norm": 4.889279260558998, "learning_rate": 6.870229007633589e-06, "loss": 0.7327, "step": 135 }, { "epoch": 0.020811017597551645, "grad_norm": 4.256993693098611, "learning_rate": 6.921119592875318e-06, "loss": 0.8079, "step": 136 }, { "epoch": 0.020964039785768936, "grad_norm": 3.907777183976378, "learning_rate": 6.972010178117049e-06, "loss": 0.7274, "step": 137 }, { "epoch": 0.021117061973986228, "grad_norm": 5.169921684229104, "learning_rate": 7.022900763358779e-06, "loss": 0.7762, "step": 138 }, { "epoch": 0.02127008416220352, "grad_norm": 3.705954278589657, "learning_rate": 7.073791348600509e-06, "loss": 0.7769, "step": 139 }, { "epoch": 0.02142310635042081, "grad_norm": 4.415407291310754, "learning_rate": 7.12468193384224e-06, "loss": 0.7743, "step": 140 }, { "epoch": 0.021576128538638103, "grad_norm": 3.9001765524312577, "learning_rate": 7.17557251908397e-06, "loss": 0.7702, "step": 141 }, { "epoch": 0.021729150726855394, "grad_norm": 4.010966551248577, "learning_rate": 7.2264631043257004e-06, "loss": 0.794, "step": 142 }, { "epoch": 0.021882172915072686, "grad_norm": 4.547219140713333, "learning_rate": 7.27735368956743e-06, "loss": 0.8035, "step": 143 }, { "epoch": 0.022035195103289978, "grad_norm": 3.895437930156407, "learning_rate": 7.328244274809161e-06, "loss": 0.7342, "step": 144 }, { "epoch": 0.02218821729150727, "grad_norm": 3.7831765310187784, "learning_rate": 7.3791348600508915e-06, "loss": 0.7207, "step": 145 }, { "epoch": 0.02234123947972456, "grad_norm": 4.02944403589167, "learning_rate": 7.430025445292621e-06, "loss": 0.6893, "step": 146 }, { "epoch": 0.022494261667941853, "grad_norm": 3.955758005563262, "learning_rate": 7.480916030534352e-06, "loss": 0.744, "step": 147 }, { "epoch": 0.022647283856159144, "grad_norm": 4.067883709052432, "learning_rate": 7.531806615776082e-06, "loss": 0.755, "step": 148 }, { "epoch": 0.022800306044376436, "grad_norm": 4.2402099809259, "learning_rate": 7.582697201017812e-06, "loss": 0.7422, "step": 149 }, { "epoch": 0.022953328232593728, "grad_norm": 4.262612871583228, "learning_rate": 7.633587786259543e-06, "loss": 0.8447, "step": 150 }, { "epoch": 0.02310635042081102, "grad_norm": 3.7947232149122043, "learning_rate": 7.684478371501274e-06, "loss": 0.745, "step": 151 }, { "epoch": 0.02325937260902831, "grad_norm": 4.143133247827625, "learning_rate": 7.735368956743004e-06, "loss": 0.8246, "step": 152 }, { "epoch": 0.0234123947972456, "grad_norm": 3.9840463660552916, "learning_rate": 7.786259541984733e-06, "loss": 0.817, "step": 153 }, { "epoch": 0.02356541698546289, "grad_norm": 4.101939658807749, "learning_rate": 7.837150127226465e-06, "loss": 0.8084, "step": 154 }, { "epoch": 0.023718439173680182, "grad_norm": 4.061501850729827, "learning_rate": 7.888040712468195e-06, "loss": 0.8093, "step": 155 }, { "epoch": 0.023871461361897474, "grad_norm": 4.822404469329433, "learning_rate": 7.938931297709924e-06, "loss": 0.8627, "step": 156 }, { "epoch": 0.024024483550114765, "grad_norm": 4.64280647954761, "learning_rate": 7.989821882951656e-06, "loss": 0.7448, "step": 157 }, { "epoch": 0.024177505738332057, "grad_norm": 4.03108430157329, "learning_rate": 8.040712468193384e-06, "loss": 0.7671, "step": 158 }, { "epoch": 0.02433052792654935, "grad_norm": 4.4845780168203575, "learning_rate": 8.091603053435115e-06, "loss": 0.813, "step": 159 }, { "epoch": 0.02448355011476664, "grad_norm": 4.345621286114522, "learning_rate": 8.142493638676845e-06, "loss": 0.7408, "step": 160 }, { "epoch": 0.024636572302983932, "grad_norm": 4.6919797838381605, "learning_rate": 8.193384223918575e-06, "loss": 0.8106, "step": 161 }, { "epoch": 0.024789594491201224, "grad_norm": 4.333359656214333, "learning_rate": 8.244274809160306e-06, "loss": 0.8115, "step": 162 }, { "epoch": 0.024942616679418515, "grad_norm": 3.81217702704086, "learning_rate": 8.295165394402036e-06, "loss": 0.7482, "step": 163 }, { "epoch": 0.025095638867635807, "grad_norm": 4.44804919658539, "learning_rate": 8.346055979643766e-06, "loss": 0.8544, "step": 164 }, { "epoch": 0.0252486610558531, "grad_norm": 4.131700271663872, "learning_rate": 8.396946564885497e-06, "loss": 0.7262, "step": 165 }, { "epoch": 0.02540168324407039, "grad_norm": 4.951792235136878, "learning_rate": 8.447837150127227e-06, "loss": 0.853, "step": 166 }, { "epoch": 0.025554705432287682, "grad_norm": 4.326303123701613, "learning_rate": 8.498727735368957e-06, "loss": 0.7414, "step": 167 }, { "epoch": 0.025707727620504973, "grad_norm": 3.955742466785765, "learning_rate": 8.549618320610688e-06, "loss": 0.6963, "step": 168 }, { "epoch": 0.025860749808722265, "grad_norm": 3.9376734300269995, "learning_rate": 8.600508905852418e-06, "loss": 0.655, "step": 169 }, { "epoch": 0.026013771996939557, "grad_norm": 5.053922872384057, "learning_rate": 8.651399491094148e-06, "loss": 0.8598, "step": 170 }, { "epoch": 0.02616679418515685, "grad_norm": 3.940496090207759, "learning_rate": 8.702290076335879e-06, "loss": 0.7512, "step": 171 }, { "epoch": 0.02631981637337414, "grad_norm": 4.22917739285267, "learning_rate": 8.753180661577609e-06, "loss": 0.9123, "step": 172 }, { "epoch": 0.02647283856159143, "grad_norm": 4.508693336682513, "learning_rate": 8.80407124681934e-06, "loss": 0.8793, "step": 173 }, { "epoch": 0.026625860749808723, "grad_norm": 4.233775278965739, "learning_rate": 8.85496183206107e-06, "loss": 0.8819, "step": 174 }, { "epoch": 0.026778882938026015, "grad_norm": 3.7511210956873544, "learning_rate": 8.9058524173028e-06, "loss": 0.7657, "step": 175 }, { "epoch": 0.026931905126243307, "grad_norm": 4.143781255150889, "learning_rate": 8.95674300254453e-06, "loss": 0.7149, "step": 176 }, { "epoch": 0.027084927314460598, "grad_norm": 4.671966201492468, "learning_rate": 9.007633587786259e-06, "loss": 0.9099, "step": 177 }, { "epoch": 0.02723794950267789, "grad_norm": 3.8699485585110627, "learning_rate": 9.058524173027991e-06, "loss": 0.7838, "step": 178 }, { "epoch": 0.02739097169089518, "grad_norm": 3.7570884060183305, "learning_rate": 9.109414758269721e-06, "loss": 0.8138, "step": 179 }, { "epoch": 0.02754399387911247, "grad_norm": 4.294913936841194, "learning_rate": 9.16030534351145e-06, "loss": 0.8253, "step": 180 }, { "epoch": 0.02769701606732976, "grad_norm": 4.424497607816517, "learning_rate": 9.211195928753182e-06, "loss": 0.7881, "step": 181 }, { "epoch": 0.027850038255547053, "grad_norm": 3.9878850555273604, "learning_rate": 9.26208651399491e-06, "loss": 0.7683, "step": 182 }, { "epoch": 0.028003060443764344, "grad_norm": 4.214766952361433, "learning_rate": 9.312977099236641e-06, "loss": 0.811, "step": 183 }, { "epoch": 0.028156082631981636, "grad_norm": 4.299160854226878, "learning_rate": 9.363867684478373e-06, "loss": 0.8099, "step": 184 }, { "epoch": 0.028309104820198928, "grad_norm": 4.261601197119955, "learning_rate": 9.414758269720102e-06, "loss": 0.8327, "step": 185 }, { "epoch": 0.02846212700841622, "grad_norm": 4.348418153113838, "learning_rate": 9.465648854961834e-06, "loss": 0.8351, "step": 186 }, { "epoch": 0.02861514919663351, "grad_norm": 4.159462607341161, "learning_rate": 9.516539440203563e-06, "loss": 0.8109, "step": 187 }, { "epoch": 0.028768171384850803, "grad_norm": 4.139615856283388, "learning_rate": 9.567430025445293e-06, "loss": 0.808, "step": 188 }, { "epoch": 0.028921193573068094, "grad_norm": 3.966142369811451, "learning_rate": 9.618320610687025e-06, "loss": 0.8221, "step": 189 }, { "epoch": 0.029074215761285386, "grad_norm": 4.2883136258405585, "learning_rate": 9.669211195928754e-06, "loss": 0.7766, "step": 190 }, { "epoch": 0.029227237949502678, "grad_norm": 3.5038128496057706, "learning_rate": 9.720101781170484e-06, "loss": 0.802, "step": 191 }, { "epoch": 0.02938026013771997, "grad_norm": 4.018460389006565, "learning_rate": 9.770992366412214e-06, "loss": 0.8793, "step": 192 }, { "epoch": 0.02953328232593726, "grad_norm": 4.197653542836699, "learning_rate": 9.821882951653945e-06, "loss": 0.7636, "step": 193 }, { "epoch": 0.029686304514154552, "grad_norm": 4.3491719420032275, "learning_rate": 9.872773536895675e-06, "loss": 0.8217, "step": 194 }, { "epoch": 0.029839326702371844, "grad_norm": 3.896319127881109, "learning_rate": 9.923664122137405e-06, "loss": 0.6789, "step": 195 }, { "epoch": 0.029992348890589136, "grad_norm": 4.135458574765948, "learning_rate": 9.974554707379136e-06, "loss": 0.7865, "step": 196 }, { "epoch": 0.030145371078806427, "grad_norm": 4.497645124133756, "learning_rate": 1.0025445292620866e-05, "loss": 0.8283, "step": 197 }, { "epoch": 0.03029839326702372, "grad_norm": 3.5421499415575246, "learning_rate": 1.0076335877862595e-05, "loss": 0.9806, "step": 198 }, { "epoch": 0.03045141545524101, "grad_norm": 3.5582689056601446, "learning_rate": 1.0127226463104327e-05, "loss": 0.7229, "step": 199 }, { "epoch": 0.030604437643458302, "grad_norm": 4.505265157757878, "learning_rate": 1.0178117048346057e-05, "loss": 0.8239, "step": 200 }, { "epoch": 0.030757459831675594, "grad_norm": 3.785002672226262, "learning_rate": 1.0229007633587786e-05, "loss": 0.8285, "step": 201 }, { "epoch": 0.030910482019892885, "grad_norm": 4.056563880467215, "learning_rate": 1.0279898218829518e-05, "loss": 0.8093, "step": 202 }, { "epoch": 0.031063504208110177, "grad_norm": 3.8406342164711114, "learning_rate": 1.0330788804071248e-05, "loss": 0.8487, "step": 203 }, { "epoch": 0.03121652639632747, "grad_norm": 4.52064054660571, "learning_rate": 1.0381679389312977e-05, "loss": 0.7581, "step": 204 }, { "epoch": 0.03136954858454476, "grad_norm": 3.6506482696933977, "learning_rate": 1.0432569974554709e-05, "loss": 0.7612, "step": 205 }, { "epoch": 0.03152257077276205, "grad_norm": 4.383431897980942, "learning_rate": 1.0483460559796439e-05, "loss": 0.8776, "step": 206 }, { "epoch": 0.03167559296097934, "grad_norm": 3.741616408368712, "learning_rate": 1.0534351145038168e-05, "loss": 0.7634, "step": 207 }, { "epoch": 0.03182861514919663, "grad_norm": 4.448598087029719, "learning_rate": 1.0585241730279898e-05, "loss": 0.8928, "step": 208 }, { "epoch": 0.03198163733741392, "grad_norm": 3.7260572298800136, "learning_rate": 1.063613231552163e-05, "loss": 0.7409, "step": 209 }, { "epoch": 0.032134659525631215, "grad_norm": 4.082446929366836, "learning_rate": 1.068702290076336e-05, "loss": 0.7702, "step": 210 }, { "epoch": 0.03228768171384851, "grad_norm": 4.276111120838432, "learning_rate": 1.0737913486005089e-05, "loss": 0.8451, "step": 211 }, { "epoch": 0.0324407039020658, "grad_norm": 3.5982082879998303, "learning_rate": 1.0788804071246821e-05, "loss": 0.7412, "step": 212 }, { "epoch": 0.03259372609028309, "grad_norm": 3.7182399171777427, "learning_rate": 1.0839694656488552e-05, "loss": 0.7734, "step": 213 }, { "epoch": 0.03274674827850038, "grad_norm": 4.45090671467595, "learning_rate": 1.089058524173028e-05, "loss": 0.7803, "step": 214 }, { "epoch": 0.03289977046671767, "grad_norm": 4.10877525262828, "learning_rate": 1.094147582697201e-05, "loss": 0.8259, "step": 215 }, { "epoch": 0.033052792654934965, "grad_norm": 3.6500837791600693, "learning_rate": 1.0992366412213743e-05, "loss": 0.789, "step": 216 }, { "epoch": 0.033205814843152257, "grad_norm": 3.5491002374247267, "learning_rate": 1.1043256997455471e-05, "loss": 0.7059, "step": 217 }, { "epoch": 0.03335883703136955, "grad_norm": 3.4521743275023624, "learning_rate": 1.1094147582697202e-05, "loss": 0.7247, "step": 218 }, { "epoch": 0.03351185921958684, "grad_norm": 3.8737746868152994, "learning_rate": 1.1145038167938934e-05, "loss": 0.7646, "step": 219 }, { "epoch": 0.03366488140780413, "grad_norm": 3.8562641801906192, "learning_rate": 1.1195928753180662e-05, "loss": 0.8318, "step": 220 }, { "epoch": 0.03381790359602142, "grad_norm": 3.912581720585126, "learning_rate": 1.1246819338422393e-05, "loss": 0.8093, "step": 221 }, { "epoch": 0.033970925784238715, "grad_norm": 4.065770061675511, "learning_rate": 1.1297709923664125e-05, "loss": 0.7937, "step": 222 }, { "epoch": 0.034123947972456006, "grad_norm": 3.6237024117778986, "learning_rate": 1.1348600508905853e-05, "loss": 0.7211, "step": 223 }, { "epoch": 0.0342769701606733, "grad_norm": 3.199937206085619, "learning_rate": 1.1399491094147584e-05, "loss": 0.6572, "step": 224 }, { "epoch": 0.03442999234889059, "grad_norm": 3.9340874687404392, "learning_rate": 1.1450381679389312e-05, "loss": 0.7167, "step": 225 }, { "epoch": 0.03458301453710788, "grad_norm": 4.280709448985111, "learning_rate": 1.1501272264631044e-05, "loss": 0.8466, "step": 226 }, { "epoch": 0.03473603672532517, "grad_norm": 4.079222952395199, "learning_rate": 1.1552162849872775e-05, "loss": 0.7506, "step": 227 }, { "epoch": 0.034889058913542464, "grad_norm": 3.5818500329635308, "learning_rate": 1.1603053435114503e-05, "loss": 0.8351, "step": 228 }, { "epoch": 0.035042081101759756, "grad_norm": 3.683692326325265, "learning_rate": 1.1653944020356235e-05, "loss": 0.8308, "step": 229 }, { "epoch": 0.03519510328997705, "grad_norm": 4.314842659628774, "learning_rate": 1.1704834605597966e-05, "loss": 0.863, "step": 230 }, { "epoch": 0.03534812547819434, "grad_norm": 3.8577935082344057, "learning_rate": 1.1755725190839696e-05, "loss": 0.7947, "step": 231 }, { "epoch": 0.03550114766641163, "grad_norm": 4.001739000653889, "learning_rate": 1.1806615776081425e-05, "loss": 0.8155, "step": 232 }, { "epoch": 0.03565416985462892, "grad_norm": 4.413074080050747, "learning_rate": 1.1857506361323157e-05, "loss": 0.8612, "step": 233 }, { "epoch": 0.035807192042846214, "grad_norm": 3.7562800982675433, "learning_rate": 1.1908396946564887e-05, "loss": 0.7496, "step": 234 }, { "epoch": 0.035960214231063506, "grad_norm": 4.676164618927764, "learning_rate": 1.1959287531806616e-05, "loss": 0.7961, "step": 235 }, { "epoch": 0.0361132364192808, "grad_norm": 4.1865326091807376, "learning_rate": 1.2010178117048348e-05, "loss": 0.7285, "step": 236 }, { "epoch": 0.03626625860749809, "grad_norm": 4.330640395701582, "learning_rate": 1.2061068702290078e-05, "loss": 0.86, "step": 237 }, { "epoch": 0.03641928079571538, "grad_norm": 4.221924703378774, "learning_rate": 1.2111959287531807e-05, "loss": 0.8088, "step": 238 }, { "epoch": 0.03657230298393267, "grad_norm": 4.08774124838906, "learning_rate": 1.2162849872773539e-05, "loss": 0.7768, "step": 239 }, { "epoch": 0.036725325172149964, "grad_norm": 3.6681049073966387, "learning_rate": 1.2213740458015269e-05, "loss": 0.8089, "step": 240 }, { "epoch": 0.036878347360367256, "grad_norm": 4.339040545843134, "learning_rate": 1.2264631043256998e-05, "loss": 0.9708, "step": 241 }, { "epoch": 0.03703136954858455, "grad_norm": 4.090377047572124, "learning_rate": 1.2315521628498728e-05, "loss": 0.7973, "step": 242 }, { "epoch": 0.03718439173680184, "grad_norm": 3.8573977126801324, "learning_rate": 1.236641221374046e-05, "loss": 0.8493, "step": 243 }, { "epoch": 0.03733741392501913, "grad_norm": 3.8417101718993036, "learning_rate": 1.2417302798982189e-05, "loss": 0.7898, "step": 244 }, { "epoch": 0.03749043611323642, "grad_norm": 4.167929367567143, "learning_rate": 1.2468193384223919e-05, "loss": 0.8347, "step": 245 }, { "epoch": 0.037643458301453714, "grad_norm": 4.304125499094029, "learning_rate": 1.2519083969465651e-05, "loss": 0.9077, "step": 246 }, { "epoch": 0.037796480489671005, "grad_norm": 3.7732211137563523, "learning_rate": 1.256997455470738e-05, "loss": 0.7202, "step": 247 }, { "epoch": 0.0379495026778883, "grad_norm": 4.028839146378367, "learning_rate": 1.262086513994911e-05, "loss": 0.9032, "step": 248 }, { "epoch": 0.03810252486610559, "grad_norm": 3.571061110080041, "learning_rate": 1.2671755725190839e-05, "loss": 0.8451, "step": 249 }, { "epoch": 0.03825554705432287, "grad_norm": 4.231210681726749, "learning_rate": 1.2722646310432571e-05, "loss": 0.8744, "step": 250 }, { "epoch": 0.038408569242540165, "grad_norm": 3.8963900051843092, "learning_rate": 1.2773536895674301e-05, "loss": 0.8427, "step": 251 }, { "epoch": 0.03856159143075746, "grad_norm": 3.983079418182341, "learning_rate": 1.2824427480916032e-05, "loss": 0.7799, "step": 252 }, { "epoch": 0.03871461361897475, "grad_norm": 3.3692269874643967, "learning_rate": 1.2875318066157762e-05, "loss": 0.7349, "step": 253 }, { "epoch": 0.03886763580719204, "grad_norm": 4.081444797385999, "learning_rate": 1.2926208651399492e-05, "loss": 0.8248, "step": 254 }, { "epoch": 0.03902065799540933, "grad_norm": 3.5071517749127143, "learning_rate": 1.2977099236641223e-05, "loss": 0.7729, "step": 255 }, { "epoch": 0.03917368018362662, "grad_norm": 3.983554583470299, "learning_rate": 1.3027989821882953e-05, "loss": 0.7358, "step": 256 }, { "epoch": 0.039326702371843915, "grad_norm": 4.156162140200395, "learning_rate": 1.3078880407124683e-05, "loss": 0.7827, "step": 257 }, { "epoch": 0.039479724560061207, "grad_norm": 3.7356062298023005, "learning_rate": 1.3129770992366414e-05, "loss": 0.8187, "step": 258 }, { "epoch": 0.0396327467482785, "grad_norm": 4.41491523535067, "learning_rate": 1.3180661577608142e-05, "loss": 0.9266, "step": 259 }, { "epoch": 0.03978576893649579, "grad_norm": 3.823408041477082, "learning_rate": 1.3231552162849874e-05, "loss": 0.7646, "step": 260 }, { "epoch": 0.03993879112471308, "grad_norm": 4.187964678105524, "learning_rate": 1.3282442748091605e-05, "loss": 0.7775, "step": 261 }, { "epoch": 0.04009181331293037, "grad_norm": 3.677752698776951, "learning_rate": 1.3333333333333333e-05, "loss": 0.7735, "step": 262 }, { "epoch": 0.040244835501147665, "grad_norm": 4.163618150951032, "learning_rate": 1.3384223918575065e-05, "loss": 0.8633, "step": 263 }, { "epoch": 0.040397857689364956, "grad_norm": 4.340339901550398, "learning_rate": 1.3435114503816796e-05, "loss": 0.8143, "step": 264 }, { "epoch": 0.04055087987758225, "grad_norm": 3.779345498739458, "learning_rate": 1.3486005089058524e-05, "loss": 0.7737, "step": 265 }, { "epoch": 0.04070390206579954, "grad_norm": 3.4660216974591944, "learning_rate": 1.3536895674300256e-05, "loss": 0.7818, "step": 266 }, { "epoch": 0.04085692425401683, "grad_norm": 3.460945209038202, "learning_rate": 1.3587786259541987e-05, "loss": 0.8819, "step": 267 }, { "epoch": 0.04100994644223412, "grad_norm": 3.174432254917918, "learning_rate": 1.3638676844783715e-05, "loss": 0.8147, "step": 268 }, { "epoch": 0.041162968630451414, "grad_norm": 3.6606958430717436, "learning_rate": 1.3689567430025446e-05, "loss": 0.7882, "step": 269 }, { "epoch": 0.041315990818668706, "grad_norm": 3.4598901409929566, "learning_rate": 1.3740458015267178e-05, "loss": 0.7396, "step": 270 }, { "epoch": 0.041469013006886, "grad_norm": 3.837680859754261, "learning_rate": 1.3791348600508906e-05, "loss": 0.9517, "step": 271 }, { "epoch": 0.04162203519510329, "grad_norm": 4.205620824295019, "learning_rate": 1.3842239185750637e-05, "loss": 0.8136, "step": 272 }, { "epoch": 0.04177505738332058, "grad_norm": 4.593353976664115, "learning_rate": 1.3893129770992369e-05, "loss": 0.8764, "step": 273 }, { "epoch": 0.04192807957153787, "grad_norm": 3.674973105300058, "learning_rate": 1.3944020356234097e-05, "loss": 0.7451, "step": 274 }, { "epoch": 0.042081101759755164, "grad_norm": 4.080257867421216, "learning_rate": 1.3994910941475828e-05, "loss": 0.938, "step": 275 }, { "epoch": 0.042234123947972456, "grad_norm": 3.6825330496591513, "learning_rate": 1.4045801526717558e-05, "loss": 0.9145, "step": 276 }, { "epoch": 0.04238714613618975, "grad_norm": 3.6676110960298263, "learning_rate": 1.4096692111959288e-05, "loss": 0.8281, "step": 277 }, { "epoch": 0.04254016832440704, "grad_norm": 3.9692318784638876, "learning_rate": 1.4147582697201019e-05, "loss": 0.8391, "step": 278 }, { "epoch": 0.04269319051262433, "grad_norm": 4.003302405983541, "learning_rate": 1.4198473282442749e-05, "loss": 0.8807, "step": 279 }, { "epoch": 0.04284621270084162, "grad_norm": 3.6051890207501125, "learning_rate": 1.424936386768448e-05, "loss": 0.8002, "step": 280 }, { "epoch": 0.042999234889058914, "grad_norm": 3.741199905910224, "learning_rate": 1.430025445292621e-05, "loss": 0.9184, "step": 281 }, { "epoch": 0.043152257077276206, "grad_norm": 3.989340278328752, "learning_rate": 1.435114503816794e-05, "loss": 0.8947, "step": 282 }, { "epoch": 0.0433052792654935, "grad_norm": 3.64277436441139, "learning_rate": 1.4402035623409672e-05, "loss": 0.8931, "step": 283 }, { "epoch": 0.04345830145371079, "grad_norm": 4.1014237450789635, "learning_rate": 1.4452926208651401e-05, "loss": 0.8706, "step": 284 }, { "epoch": 0.04361132364192808, "grad_norm": 3.4366912250691297, "learning_rate": 1.4503816793893131e-05, "loss": 0.8406, "step": 285 }, { "epoch": 0.04376434583014537, "grad_norm": 3.580030889617023, "learning_rate": 1.455470737913486e-05, "loss": 0.8221, "step": 286 }, { "epoch": 0.043917368018362664, "grad_norm": 3.298085781868641, "learning_rate": 1.4605597964376592e-05, "loss": 0.7627, "step": 287 }, { "epoch": 0.044070390206579955, "grad_norm": 3.6026238179183316, "learning_rate": 1.4656488549618322e-05, "loss": 0.8868, "step": 288 }, { "epoch": 0.04422341239479725, "grad_norm": 3.5628106762524663, "learning_rate": 1.4707379134860051e-05, "loss": 0.8383, "step": 289 }, { "epoch": 0.04437643458301454, "grad_norm": 4.098960888954263, "learning_rate": 1.4758269720101783e-05, "loss": 0.8245, "step": 290 }, { "epoch": 0.04452945677123183, "grad_norm": 4.019679742440305, "learning_rate": 1.4809160305343513e-05, "loss": 0.9617, "step": 291 }, { "epoch": 0.04468247895944912, "grad_norm": 3.870786617215219, "learning_rate": 1.4860050890585242e-05, "loss": 0.7536, "step": 292 }, { "epoch": 0.044835501147666414, "grad_norm": 3.624983481704036, "learning_rate": 1.4910941475826972e-05, "loss": 0.7665, "step": 293 }, { "epoch": 0.044988523335883705, "grad_norm": 3.4579342237989863, "learning_rate": 1.4961832061068704e-05, "loss": 0.7475, "step": 294 }, { "epoch": 0.045141545524101, "grad_norm": 3.825987018762197, "learning_rate": 1.5012722646310433e-05, "loss": 0.7701, "step": 295 }, { "epoch": 0.04529456771231829, "grad_norm": 3.925285067712268, "learning_rate": 1.5063613231552163e-05, "loss": 0.8424, "step": 296 }, { "epoch": 0.04544758990053558, "grad_norm": 4.536531873258212, "learning_rate": 1.5114503816793895e-05, "loss": 0.8194, "step": 297 }, { "epoch": 0.04560061208875287, "grad_norm": 3.849198877547217, "learning_rate": 1.5165394402035624e-05, "loss": 0.8097, "step": 298 }, { "epoch": 0.04575363427697016, "grad_norm": 3.4268881311529, "learning_rate": 1.5216284987277354e-05, "loss": 0.7637, "step": 299 }, { "epoch": 0.045906656465187455, "grad_norm": 3.844837610723275, "learning_rate": 1.5267175572519086e-05, "loss": 0.9146, "step": 300 }, { "epoch": 0.04605967865340475, "grad_norm": 4.200860333219284, "learning_rate": 1.5318066157760817e-05, "loss": 0.8694, "step": 301 }, { "epoch": 0.04621270084162204, "grad_norm": 3.708632369987497, "learning_rate": 1.5368956743002547e-05, "loss": 0.7955, "step": 302 }, { "epoch": 0.04636572302983933, "grad_norm": 4.560895491616818, "learning_rate": 1.5419847328244274e-05, "loss": 0.8735, "step": 303 }, { "epoch": 0.04651874521805662, "grad_norm": 4.1460005936560576, "learning_rate": 1.5470737913486008e-05, "loss": 0.9344, "step": 304 }, { "epoch": 0.046671767406273906, "grad_norm": 3.5819446783097875, "learning_rate": 1.5521628498727738e-05, "loss": 0.8728, "step": 305 }, { "epoch": 0.0468247895944912, "grad_norm": 3.5673030396101755, "learning_rate": 1.5572519083969465e-05, "loss": 0.8558, "step": 306 }, { "epoch": 0.04697781178270849, "grad_norm": 4.465799436044937, "learning_rate": 1.56234096692112e-05, "loss": 0.9466, "step": 307 }, { "epoch": 0.04713083397092578, "grad_norm": 3.8186144985338335, "learning_rate": 1.567430025445293e-05, "loss": 0.8226, "step": 308 }, { "epoch": 0.04728385615914307, "grad_norm": 3.0526556169711796, "learning_rate": 1.5725190839694656e-05, "loss": 0.8292, "step": 309 }, { "epoch": 0.047436878347360364, "grad_norm": 3.8395154224833963, "learning_rate": 1.577608142493639e-05, "loss": 0.9308, "step": 310 }, { "epoch": 0.047589900535577656, "grad_norm": 3.3739437105583048, "learning_rate": 1.582697201017812e-05, "loss": 0.8354, "step": 311 }, { "epoch": 0.04774292272379495, "grad_norm": 3.428552157235765, "learning_rate": 1.5877862595419847e-05, "loss": 0.7738, "step": 312 }, { "epoch": 0.04789594491201224, "grad_norm": 3.8083642028444857, "learning_rate": 1.5928753180661577e-05, "loss": 0.8379, "step": 313 }, { "epoch": 0.04804896710022953, "grad_norm": 3.7472390334350854, "learning_rate": 1.597964376590331e-05, "loss": 0.7635, "step": 314 }, { "epoch": 0.04820198928844682, "grad_norm": 3.5905420969046977, "learning_rate": 1.6030534351145038e-05, "loss": 0.84, "step": 315 }, { "epoch": 0.048355011476664114, "grad_norm": 3.371134238680956, "learning_rate": 1.608142493638677e-05, "loss": 0.822, "step": 316 }, { "epoch": 0.048508033664881406, "grad_norm": 3.251211187350607, "learning_rate": 1.6132315521628502e-05, "loss": 0.8663, "step": 317 }, { "epoch": 0.0486610558530987, "grad_norm": 3.710129923001892, "learning_rate": 1.618320610687023e-05, "loss": 0.885, "step": 318 }, { "epoch": 0.04881407804131599, "grad_norm": 3.4804407042961243, "learning_rate": 1.623409669211196e-05, "loss": 0.8388, "step": 319 }, { "epoch": 0.04896710022953328, "grad_norm": 3.3398691087172763, "learning_rate": 1.628498727735369e-05, "loss": 0.8033, "step": 320 }, { "epoch": 0.04912012241775057, "grad_norm": 3.4336060940387156, "learning_rate": 1.633587786259542e-05, "loss": 0.8656, "step": 321 }, { "epoch": 0.049273144605967864, "grad_norm": 3.6774669512700693, "learning_rate": 1.638676844783715e-05, "loss": 0.9224, "step": 322 }, { "epoch": 0.049426166794185156, "grad_norm": 3.707606788471563, "learning_rate": 1.643765903307888e-05, "loss": 0.8309, "step": 323 }, { "epoch": 0.04957918898240245, "grad_norm": 3.235739424905745, "learning_rate": 1.648854961832061e-05, "loss": 0.7052, "step": 324 }, { "epoch": 0.04973221117061974, "grad_norm": 3.7080138914780796, "learning_rate": 1.653944020356234e-05, "loss": 0.8157, "step": 325 }, { "epoch": 0.04988523335883703, "grad_norm": 3.166803594516287, "learning_rate": 1.6590330788804072e-05, "loss": 0.8085, "step": 326 }, { "epoch": 0.05003825554705432, "grad_norm": 3.7042887553890647, "learning_rate": 1.6641221374045802e-05, "loss": 0.8331, "step": 327 }, { "epoch": 0.050191277735271614, "grad_norm": 4.6732971094593125, "learning_rate": 1.6692111959287533e-05, "loss": 0.8703, "step": 328 }, { "epoch": 0.050344299923488905, "grad_norm": 3.5191964993901137, "learning_rate": 1.6743002544529263e-05, "loss": 0.7757, "step": 329 }, { "epoch": 0.0504973221117062, "grad_norm": 3.62621311940619, "learning_rate": 1.6793893129770993e-05, "loss": 0.9091, "step": 330 }, { "epoch": 0.05065034429992349, "grad_norm": 3.4566006756009857, "learning_rate": 1.6844783715012724e-05, "loss": 0.8907, "step": 331 }, { "epoch": 0.05080336648814078, "grad_norm": 4.226884860942899, "learning_rate": 1.6895674300254454e-05, "loss": 0.9568, "step": 332 }, { "epoch": 0.05095638867635807, "grad_norm": 3.8334891651194445, "learning_rate": 1.6946564885496184e-05, "loss": 0.802, "step": 333 }, { "epoch": 0.051109410864575364, "grad_norm": 3.643779521200893, "learning_rate": 1.6997455470737915e-05, "loss": 0.7904, "step": 334 }, { "epoch": 0.051262433052792655, "grad_norm": 3.6109019840921177, "learning_rate": 1.7048346055979645e-05, "loss": 0.9173, "step": 335 }, { "epoch": 0.05141545524100995, "grad_norm": 3.556033056937106, "learning_rate": 1.7099236641221375e-05, "loss": 0.6736, "step": 336 }, { "epoch": 0.05156847742922724, "grad_norm": 3.4590858648740457, "learning_rate": 1.7150127226463106e-05, "loss": 0.7459, "step": 337 }, { "epoch": 0.05172149961744453, "grad_norm": 3.883487644337928, "learning_rate": 1.7201017811704836e-05, "loss": 0.8293, "step": 338 }, { "epoch": 0.05187452180566182, "grad_norm": 3.6817700556181308, "learning_rate": 1.7251908396946566e-05, "loss": 0.8543, "step": 339 }, { "epoch": 0.05202754399387911, "grad_norm": 3.6092941197838107, "learning_rate": 1.7302798982188297e-05, "loss": 0.7518, "step": 340 }, { "epoch": 0.052180566182096405, "grad_norm": 3.521936034926192, "learning_rate": 1.7353689567430027e-05, "loss": 0.7999, "step": 341 }, { "epoch": 0.0523335883703137, "grad_norm": 3.2423067286052283, "learning_rate": 1.7404580152671757e-05, "loss": 0.8241, "step": 342 }, { "epoch": 0.05248661055853099, "grad_norm": 3.5515781332359797, "learning_rate": 1.7455470737913488e-05, "loss": 0.8267, "step": 343 }, { "epoch": 0.05263963274674828, "grad_norm": 3.361362826153268, "learning_rate": 1.7506361323155218e-05, "loss": 0.8923, "step": 344 }, { "epoch": 0.05279265493496557, "grad_norm": 4.352751368237682, "learning_rate": 1.755725190839695e-05, "loss": 0.8396, "step": 345 }, { "epoch": 0.05294567712318286, "grad_norm": 3.405295125419368, "learning_rate": 1.760814249363868e-05, "loss": 0.8015, "step": 346 }, { "epoch": 0.053098699311400155, "grad_norm": 4.0707162407530255, "learning_rate": 1.765903307888041e-05, "loss": 0.9149, "step": 347 }, { "epoch": 0.053251721499617446, "grad_norm": 3.199089427504116, "learning_rate": 1.770992366412214e-05, "loss": 0.859, "step": 348 }, { "epoch": 0.05340474368783474, "grad_norm": 3.3830728370139505, "learning_rate": 1.776081424936387e-05, "loss": 0.9282, "step": 349 }, { "epoch": 0.05355776587605203, "grad_norm": 3.3892952078432086, "learning_rate": 1.78117048346056e-05, "loss": 0.8592, "step": 350 }, { "epoch": 0.05371078806426932, "grad_norm": 3.3837045648684434, "learning_rate": 1.786259541984733e-05, "loss": 0.8109, "step": 351 }, { "epoch": 0.05386381025248661, "grad_norm": 4.01981710203223, "learning_rate": 1.791348600508906e-05, "loss": 0.8086, "step": 352 }, { "epoch": 0.054016832440703905, "grad_norm": 3.268564561162783, "learning_rate": 1.796437659033079e-05, "loss": 0.8009, "step": 353 }, { "epoch": 0.054169854628921196, "grad_norm": 3.207034469484103, "learning_rate": 1.8015267175572518e-05, "loss": 0.7459, "step": 354 }, { "epoch": 0.05432287681713849, "grad_norm": 3.7905236555543698, "learning_rate": 1.8066157760814252e-05, "loss": 0.9694, "step": 355 }, { "epoch": 0.05447589900535578, "grad_norm": 3.899335977334721, "learning_rate": 1.8117048346055982e-05, "loss": 0.8516, "step": 356 }, { "epoch": 0.05462892119357307, "grad_norm": 3.6683342704748183, "learning_rate": 1.816793893129771e-05, "loss": 0.8954, "step": 357 }, { "epoch": 0.05478194338179036, "grad_norm": 3.2948530407738317, "learning_rate": 1.8218829516539443e-05, "loss": 0.8569, "step": 358 }, { "epoch": 0.054934965570007654, "grad_norm": 3.107869632993156, "learning_rate": 1.8269720101781173e-05, "loss": 0.7997, "step": 359 }, { "epoch": 0.05508798775822494, "grad_norm": 3.740911956061003, "learning_rate": 1.83206106870229e-05, "loss": 0.7949, "step": 360 }, { "epoch": 0.05524100994644223, "grad_norm": 3.5260469337243086, "learning_rate": 1.8371501272264634e-05, "loss": 0.8832, "step": 361 }, { "epoch": 0.05539403213465952, "grad_norm": 4.024532626989753, "learning_rate": 1.8422391857506364e-05, "loss": 0.8864, "step": 362 }, { "epoch": 0.055547054322876814, "grad_norm": 3.7363845688636372, "learning_rate": 1.847328244274809e-05, "loss": 0.7486, "step": 363 }, { "epoch": 0.055700076511094106, "grad_norm": 2.989844465377914, "learning_rate": 1.852417302798982e-05, "loss": 0.7627, "step": 364 }, { "epoch": 0.0558530986993114, "grad_norm": 3.4845633849092814, "learning_rate": 1.8575063613231555e-05, "loss": 0.8626, "step": 365 }, { "epoch": 0.05600612088752869, "grad_norm": 3.482589493869205, "learning_rate": 1.8625954198473282e-05, "loss": 0.7346, "step": 366 }, { "epoch": 0.05615914307574598, "grad_norm": 3.2941484050459255, "learning_rate": 1.8676844783715013e-05, "loss": 0.8794, "step": 367 }, { "epoch": 0.05631216526396327, "grad_norm": 3.7695913912244796, "learning_rate": 1.8727735368956746e-05, "loss": 0.8083, "step": 368 }, { "epoch": 0.056465187452180564, "grad_norm": 3.247852909314503, "learning_rate": 1.8778625954198473e-05, "loss": 0.7414, "step": 369 }, { "epoch": 0.056618209640397855, "grad_norm": 3.3192936130223103, "learning_rate": 1.8829516539440204e-05, "loss": 0.9026, "step": 370 }, { "epoch": 0.05677123182861515, "grad_norm": 3.41682048630867, "learning_rate": 1.8880407124681937e-05, "loss": 0.7388, "step": 371 }, { "epoch": 0.05692425401683244, "grad_norm": 3.623094017822295, "learning_rate": 1.8931297709923668e-05, "loss": 0.9649, "step": 372 }, { "epoch": 0.05707727620504973, "grad_norm": 3.7002949571305903, "learning_rate": 1.8982188295165395e-05, "loss": 0.8871, "step": 373 }, { "epoch": 0.05723029839326702, "grad_norm": 3.325309840511273, "learning_rate": 1.9033078880407125e-05, "loss": 0.9166, "step": 374 }, { "epoch": 0.057383320581484314, "grad_norm": 3.1858695475335383, "learning_rate": 1.908396946564886e-05, "loss": 0.8516, "step": 375 }, { "epoch": 0.057536342769701605, "grad_norm": 3.507805903591568, "learning_rate": 1.9134860050890586e-05, "loss": 0.9728, "step": 376 }, { "epoch": 0.0576893649579189, "grad_norm": 2.953880567303911, "learning_rate": 1.9185750636132316e-05, "loss": 0.9371, "step": 377 }, { "epoch": 0.05784238714613619, "grad_norm": 3.2343974617308273, "learning_rate": 1.923664122137405e-05, "loss": 0.8903, "step": 378 }, { "epoch": 0.05799540933435348, "grad_norm": 3.267836515525953, "learning_rate": 1.9287531806615777e-05, "loss": 0.8397, "step": 379 }, { "epoch": 0.05814843152257077, "grad_norm": 3.594078860448196, "learning_rate": 1.9338422391857507e-05, "loss": 0.9597, "step": 380 }, { "epoch": 0.05830145371078806, "grad_norm": 3.2708415507993034, "learning_rate": 1.9389312977099238e-05, "loss": 0.8571, "step": 381 }, { "epoch": 0.058454475899005355, "grad_norm": 3.3418553549010057, "learning_rate": 1.9440203562340968e-05, "loss": 0.8239, "step": 382 }, { "epoch": 0.05860749808722265, "grad_norm": 3.218696674016876, "learning_rate": 1.9491094147582698e-05, "loss": 0.8126, "step": 383 }, { "epoch": 0.05876052027543994, "grad_norm": 3.171066381208782, "learning_rate": 1.954198473282443e-05, "loss": 0.757, "step": 384 }, { "epoch": 0.05891354246365723, "grad_norm": 3.8453943663575982, "learning_rate": 1.959287531806616e-05, "loss": 0.9481, "step": 385 }, { "epoch": 0.05906656465187452, "grad_norm": 3.262482788883625, "learning_rate": 1.964376590330789e-05, "loss": 0.8465, "step": 386 }, { "epoch": 0.05921958684009181, "grad_norm": 3.303889728308021, "learning_rate": 1.969465648854962e-05, "loss": 0.7714, "step": 387 }, { "epoch": 0.059372609028309105, "grad_norm": 3.519976681128204, "learning_rate": 1.974554707379135e-05, "loss": 0.832, "step": 388 }, { "epoch": 0.059525631216526396, "grad_norm": 3.369259096460689, "learning_rate": 1.979643765903308e-05, "loss": 0.8669, "step": 389 }, { "epoch": 0.05967865340474369, "grad_norm": 3.7927752076141963, "learning_rate": 1.984732824427481e-05, "loss": 0.846, "step": 390 }, { "epoch": 0.05983167559296098, "grad_norm": 3.7018297119428523, "learning_rate": 1.989821882951654e-05, "loss": 0.819, "step": 391 }, { "epoch": 0.05998469778117827, "grad_norm": 3.394158558872775, "learning_rate": 1.994910941475827e-05, "loss": 0.7892, "step": 392 }, { "epoch": 0.06013771996939556, "grad_norm": 3.276133265082445, "learning_rate": 2e-05, "loss": 0.8333, "step": 393 }, { "epoch": 0.060290742157612855, "grad_norm": 3.2855946364421804, "learning_rate": 1.999999969293044e-05, "loss": 0.815, "step": 394 }, { "epoch": 0.060443764345830146, "grad_norm": 3.1185758818586318, "learning_rate": 1.999999877172178e-05, "loss": 0.779, "step": 395 }, { "epoch": 0.06059678653404744, "grad_norm": 3.25149254683972, "learning_rate": 1.9999997236374075e-05, "loss": 0.83, "step": 396 }, { "epoch": 0.06074980872226473, "grad_norm": 2.951296265260351, "learning_rate": 1.9999995086887418e-05, "loss": 0.8079, "step": 397 }, { "epoch": 0.06090283091048202, "grad_norm": 3.0992783869324283, "learning_rate": 1.9999992323261942e-05, "loss": 0.8959, "step": 398 }, { "epoch": 0.06105585309869931, "grad_norm": 3.3456301521826664, "learning_rate": 1.9999988945497816e-05, "loss": 0.9921, "step": 399 }, { "epoch": 0.061208875286916604, "grad_norm": 3.569679380617617, "learning_rate": 1.9999984953595253e-05, "loss": 0.9131, "step": 400 }, { "epoch": 0.061361897475133896, "grad_norm": 3.972081462044736, "learning_rate": 1.999998034755449e-05, "loss": 0.8658, "step": 401 }, { "epoch": 0.06151491966335119, "grad_norm": 3.1594994773921017, "learning_rate": 1.9999975127375815e-05, "loss": 0.7164, "step": 402 }, { "epoch": 0.06166794185156848, "grad_norm": 3.003388020711569, "learning_rate": 1.999996929305955e-05, "loss": 0.8885, "step": 403 }, { "epoch": 0.06182096403978577, "grad_norm": 3.9041072645619974, "learning_rate": 1.9999962844606046e-05, "loss": 0.9423, "step": 404 }, { "epoch": 0.06197398622800306, "grad_norm": 3.604645472820965, "learning_rate": 1.9999955782015706e-05, "loss": 0.8915, "step": 405 }, { "epoch": 0.062127008416220354, "grad_norm": 3.0395146768409615, "learning_rate": 1.9999948105288963e-05, "loss": 0.8001, "step": 406 }, { "epoch": 0.062280030604437646, "grad_norm": 3.3622363017187085, "learning_rate": 1.9999939814426283e-05, "loss": 0.9596, "step": 407 }, { "epoch": 0.06243305279265494, "grad_norm": 3.290114424412209, "learning_rate": 1.9999930909428183e-05, "loss": 0.8567, "step": 408 }, { "epoch": 0.06258607498087222, "grad_norm": 3.4438483746832245, "learning_rate": 1.9999921390295203e-05, "loss": 0.9297, "step": 409 }, { "epoch": 0.06273909716908951, "grad_norm": 3.407358852073633, "learning_rate": 1.999991125702793e-05, "loss": 0.9102, "step": 410 }, { "epoch": 0.0628921193573068, "grad_norm": 3.9536948948238693, "learning_rate": 1.999990050962699e-05, "loss": 0.7925, "step": 411 }, { "epoch": 0.0630451415455241, "grad_norm": 3.1915535537530517, "learning_rate": 1.9999889148093036e-05, "loss": 0.7997, "step": 412 }, { "epoch": 0.06319816373374139, "grad_norm": 3.3935055650270645, "learning_rate": 1.9999877172426775e-05, "loss": 0.7743, "step": 413 }, { "epoch": 0.06335118592195868, "grad_norm": 3.4416667252947377, "learning_rate": 1.9999864582628932e-05, "loss": 0.8931, "step": 414 }, { "epoch": 0.06350420811017597, "grad_norm": 3.791402724599172, "learning_rate": 1.9999851378700286e-05, "loss": 0.979, "step": 415 }, { "epoch": 0.06365723029839326, "grad_norm": 3.1724547503182703, "learning_rate": 1.999983756064165e-05, "loss": 0.7635, "step": 416 }, { "epoch": 0.06381025248661056, "grad_norm": 3.2774243411313253, "learning_rate": 1.999982312845387e-05, "loss": 0.841, "step": 417 }, { "epoch": 0.06396327467482785, "grad_norm": 3.1958537216114604, "learning_rate": 1.9999808082137828e-05, "loss": 0.9372, "step": 418 }, { "epoch": 0.06411629686304514, "grad_norm": 3.5937707452395555, "learning_rate": 1.9999792421694454e-05, "loss": 1.0499, "step": 419 }, { "epoch": 0.06426931905126243, "grad_norm": 3.0929075141412032, "learning_rate": 1.999977614712471e-05, "loss": 0.9361, "step": 420 }, { "epoch": 0.06442234123947972, "grad_norm": 3.6154685592166524, "learning_rate": 1.9999759258429592e-05, "loss": 0.8855, "step": 421 }, { "epoch": 0.06457536342769701, "grad_norm": 3.317458456160721, "learning_rate": 1.9999741755610137e-05, "loss": 0.762, "step": 422 }, { "epoch": 0.0647283856159143, "grad_norm": 3.552919568915116, "learning_rate": 1.9999723638667424e-05, "loss": 0.9853, "step": 423 }, { "epoch": 0.0648814078041316, "grad_norm": 3.614389994148897, "learning_rate": 1.9999704907602564e-05, "loss": 0.8873, "step": 424 }, { "epoch": 0.06503442999234889, "grad_norm": 3.5998150028550064, "learning_rate": 1.9999685562416704e-05, "loss": 0.9252, "step": 425 }, { "epoch": 0.06518745218056618, "grad_norm": 3.049004959216067, "learning_rate": 1.9999665603111035e-05, "loss": 0.8648, "step": 426 }, { "epoch": 0.06534047436878347, "grad_norm": 3.1745175809636237, "learning_rate": 1.9999645029686784e-05, "loss": 0.8266, "step": 427 }, { "epoch": 0.06549349655700076, "grad_norm": 3.4882134719187805, "learning_rate": 1.9999623842145212e-05, "loss": 0.7766, "step": 428 }, { "epoch": 0.06564651874521805, "grad_norm": 3.506680615344111, "learning_rate": 1.999960204048762e-05, "loss": 0.8902, "step": 429 }, { "epoch": 0.06579954093343535, "grad_norm": 3.343900859921755, "learning_rate": 1.9999579624715347e-05, "loss": 0.8235, "step": 430 }, { "epoch": 0.06595256312165264, "grad_norm": 3.506632382776108, "learning_rate": 1.9999556594829775e-05, "loss": 0.8132, "step": 431 }, { "epoch": 0.06610558530986993, "grad_norm": 3.058113063265004, "learning_rate": 1.9999532950832313e-05, "loss": 0.8041, "step": 432 }, { "epoch": 0.06625860749808722, "grad_norm": 3.3485252385202715, "learning_rate": 1.999950869272441e-05, "loss": 0.8817, "step": 433 }, { "epoch": 0.06641162968630451, "grad_norm": 3.5781509996663234, "learning_rate": 1.9999483820507562e-05, "loss": 0.9097, "step": 434 }, { "epoch": 0.0665646518745218, "grad_norm": 3.0972342262671324, "learning_rate": 1.9999458334183296e-05, "loss": 0.7467, "step": 435 }, { "epoch": 0.0667176740627391, "grad_norm": 3.106512537323738, "learning_rate": 1.9999432233753176e-05, "loss": 0.8818, "step": 436 }, { "epoch": 0.06687069625095639, "grad_norm": 3.2596638513194662, "learning_rate": 1.9999405519218804e-05, "loss": 0.9509, "step": 437 }, { "epoch": 0.06702371843917368, "grad_norm": 3.477762465985897, "learning_rate": 1.999937819058182e-05, "loss": 0.8831, "step": 438 }, { "epoch": 0.06717674062739097, "grad_norm": 3.3767003024179054, "learning_rate": 1.9999350247843904e-05, "loss": 0.9931, "step": 439 }, { "epoch": 0.06732976281560826, "grad_norm": 3.0165691988373786, "learning_rate": 1.999932169100677e-05, "loss": 0.8623, "step": 440 }, { "epoch": 0.06748278500382555, "grad_norm": 3.0751006001505594, "learning_rate": 1.9999292520072177e-05, "loss": 0.6962, "step": 441 }, { "epoch": 0.06763580719204285, "grad_norm": 3.6467793080534756, "learning_rate": 1.999926273504191e-05, "loss": 0.9321, "step": 442 }, { "epoch": 0.06778882938026014, "grad_norm": 3.0687590959557633, "learning_rate": 1.99992323359178e-05, "loss": 0.8116, "step": 443 }, { "epoch": 0.06794185156847743, "grad_norm": 3.43496703843989, "learning_rate": 1.9999201322701717e-05, "loss": 0.8043, "step": 444 }, { "epoch": 0.06809487375669472, "grad_norm": 3.544779183403212, "learning_rate": 1.9999169695395566e-05, "loss": 0.8923, "step": 445 }, { "epoch": 0.06824789594491201, "grad_norm": 3.4034242125677707, "learning_rate": 1.9999137454001282e-05, "loss": 0.8836, "step": 446 }, { "epoch": 0.0684009181331293, "grad_norm": 2.9508936139483257, "learning_rate": 1.9999104598520854e-05, "loss": 0.8638, "step": 447 }, { "epoch": 0.0685539403213466, "grad_norm": 3.109658780002175, "learning_rate": 1.9999071128956294e-05, "loss": 0.7898, "step": 448 }, { "epoch": 0.06870696250956389, "grad_norm": 3.3036335036928475, "learning_rate": 1.9999037045309663e-05, "loss": 0.9348, "step": 449 }, { "epoch": 0.06885998469778118, "grad_norm": 2.931573589022011, "learning_rate": 1.9999002347583048e-05, "loss": 0.8075, "step": 450 }, { "epoch": 0.06901300688599847, "grad_norm": 3.1926862679514265, "learning_rate": 1.999896703577858e-05, "loss": 0.8226, "step": 451 }, { "epoch": 0.06916602907421576, "grad_norm": 3.455774568574597, "learning_rate": 1.9998931109898433e-05, "loss": 0.872, "step": 452 }, { "epoch": 0.06931905126243305, "grad_norm": 3.1693621632015807, "learning_rate": 1.999889456994481e-05, "loss": 0.7752, "step": 453 }, { "epoch": 0.06947207345065035, "grad_norm": 2.9868239567077977, "learning_rate": 1.9998857415919955e-05, "loss": 0.9065, "step": 454 }, { "epoch": 0.06962509563886764, "grad_norm": 3.0397111537097006, "learning_rate": 1.999881964782615e-05, "loss": 0.7929, "step": 455 }, { "epoch": 0.06977811782708493, "grad_norm": 3.549361128360661, "learning_rate": 1.9998781265665715e-05, "loss": 0.8541, "step": 456 }, { "epoch": 0.06993114001530222, "grad_norm": 3.044784684574379, "learning_rate": 1.9998742269441008e-05, "loss": 0.8231, "step": 457 }, { "epoch": 0.07008416220351951, "grad_norm": 3.1048626907375954, "learning_rate": 1.9998702659154423e-05, "loss": 0.7968, "step": 458 }, { "epoch": 0.0702371843917368, "grad_norm": 3.2945028895738107, "learning_rate": 1.999866243480839e-05, "loss": 0.8379, "step": 459 }, { "epoch": 0.0703902065799541, "grad_norm": 3.178438471947671, "learning_rate": 1.9998621596405384e-05, "loss": 0.9277, "step": 460 }, { "epoch": 0.07054322876817139, "grad_norm": 3.42484137351198, "learning_rate": 1.9998580143947914e-05, "loss": 0.8155, "step": 461 }, { "epoch": 0.07069625095638868, "grad_norm": 3.0491149098944135, "learning_rate": 1.999853807743852e-05, "loss": 0.7763, "step": 462 }, { "epoch": 0.07084927314460597, "grad_norm": 3.376002345401871, "learning_rate": 1.9998495396879786e-05, "loss": 0.9263, "step": 463 }, { "epoch": 0.07100229533282326, "grad_norm": 3.2633106035230766, "learning_rate": 1.9998452102274336e-05, "loss": 0.9468, "step": 464 }, { "epoch": 0.07115531752104055, "grad_norm": 3.1933206048414293, "learning_rate": 1.9998408193624834e-05, "loss": 0.8901, "step": 465 }, { "epoch": 0.07130833970925785, "grad_norm": 3.4082992385767206, "learning_rate": 1.999836367093396e-05, "loss": 0.9357, "step": 466 }, { "epoch": 0.07146136189747514, "grad_norm": 3.4102970874735745, "learning_rate": 1.999831853420447e-05, "loss": 0.9104, "step": 467 }, { "epoch": 0.07161438408569243, "grad_norm": 3.014808972109428, "learning_rate": 1.9998272783439118e-05, "loss": 0.8472, "step": 468 }, { "epoch": 0.07176740627390972, "grad_norm": 3.0016946749987983, "learning_rate": 1.9998226418640724e-05, "loss": 0.8353, "step": 469 }, { "epoch": 0.07192042846212701, "grad_norm": 3.4092515518117836, "learning_rate": 1.999817943981213e-05, "loss": 0.7995, "step": 470 }, { "epoch": 0.0720734506503443, "grad_norm": 2.9282228456092887, "learning_rate": 1.9998131846956225e-05, "loss": 0.899, "step": 471 }, { "epoch": 0.0722264728385616, "grad_norm": 3.4435930622433037, "learning_rate": 1.9998083640075935e-05, "loss": 0.9694, "step": 472 }, { "epoch": 0.07237949502677889, "grad_norm": 3.284470938610737, "learning_rate": 1.9998034819174207e-05, "loss": 0.878, "step": 473 }, { "epoch": 0.07253251721499618, "grad_norm": 3.2453744318117943, "learning_rate": 1.9997985384254057e-05, "loss": 0.7952, "step": 474 }, { "epoch": 0.07268553940321347, "grad_norm": 3.577377408891758, "learning_rate": 1.9997935335318507e-05, "loss": 0.8137, "step": 475 }, { "epoch": 0.07283856159143076, "grad_norm": 3.2705975222934556, "learning_rate": 1.9997884672370637e-05, "loss": 0.918, "step": 476 }, { "epoch": 0.07299158377964805, "grad_norm": 3.4645999740507385, "learning_rate": 1.9997833395413554e-05, "loss": 0.7966, "step": 477 }, { "epoch": 0.07314460596786534, "grad_norm": 3.241023758964187, "learning_rate": 1.9997781504450416e-05, "loss": 0.7054, "step": 478 }, { "epoch": 0.07329762815608264, "grad_norm": 3.1306221791660174, "learning_rate": 1.99977289994844e-05, "loss": 0.8397, "step": 479 }, { "epoch": 0.07345065034429993, "grad_norm": 3.5542684374846676, "learning_rate": 1.999767588051874e-05, "loss": 0.9649, "step": 480 }, { "epoch": 0.07360367253251722, "grad_norm": 3.1099423042693375, "learning_rate": 1.9997622147556687e-05, "loss": 0.8699, "step": 481 }, { "epoch": 0.07375669472073451, "grad_norm": 3.092152194487772, "learning_rate": 1.9997567800601548e-05, "loss": 0.9186, "step": 482 }, { "epoch": 0.0739097169089518, "grad_norm": 3.402202777509611, "learning_rate": 1.9997512839656665e-05, "loss": 0.8209, "step": 483 }, { "epoch": 0.0740627390971691, "grad_norm": 2.9907255336981184, "learning_rate": 1.9997457264725404e-05, "loss": 0.9086, "step": 484 }, { "epoch": 0.07421576128538639, "grad_norm": 3.131981310207802, "learning_rate": 1.9997401075811182e-05, "loss": 0.8904, "step": 485 }, { "epoch": 0.07436878347360368, "grad_norm": 3.209634258627602, "learning_rate": 1.999734427291745e-05, "loss": 0.7896, "step": 486 }, { "epoch": 0.07452180566182097, "grad_norm": 3.340921344451802, "learning_rate": 1.9997286856047696e-05, "loss": 0.8265, "step": 487 }, { "epoch": 0.07467482785003826, "grad_norm": 3.13565494738022, "learning_rate": 1.999722882520545e-05, "loss": 0.8693, "step": 488 }, { "epoch": 0.07482785003825555, "grad_norm": 3.02235992945377, "learning_rate": 1.999717018039427e-05, "loss": 0.8724, "step": 489 }, { "epoch": 0.07498087222647284, "grad_norm": 3.4023434406169795, "learning_rate": 1.999711092161776e-05, "loss": 0.9148, "step": 490 }, { "epoch": 0.07513389441469014, "grad_norm": 3.343264311302944, "learning_rate": 1.999705104887956e-05, "loss": 0.8204, "step": 491 }, { "epoch": 0.07528691660290743, "grad_norm": 3.01072439243528, "learning_rate": 1.9996990562183344e-05, "loss": 0.8356, "step": 492 }, { "epoch": 0.07543993879112472, "grad_norm": 3.0677819421014543, "learning_rate": 1.999692946153283e-05, "loss": 0.9457, "step": 493 }, { "epoch": 0.07559296097934201, "grad_norm": 3.049481965152659, "learning_rate": 1.999686774693177e-05, "loss": 0.8406, "step": 494 }, { "epoch": 0.0757459831675593, "grad_norm": 3.1273680772157997, "learning_rate": 1.9996805418383957e-05, "loss": 0.8543, "step": 495 }, { "epoch": 0.0758990053557766, "grad_norm": 3.2658135510698543, "learning_rate": 1.999674247589321e-05, "loss": 0.801, "step": 496 }, { "epoch": 0.07605202754399389, "grad_norm": 3.2022841129021966, "learning_rate": 1.9996678919463403e-05, "loss": 0.9407, "step": 497 }, { "epoch": 0.07620504973221118, "grad_norm": 3.2241072818356846, "learning_rate": 1.9996614749098438e-05, "loss": 0.9081, "step": 498 }, { "epoch": 0.07635807192042846, "grad_norm": 3.141672970165098, "learning_rate": 1.999654996480225e-05, "loss": 0.8296, "step": 499 }, { "epoch": 0.07651109410864575, "grad_norm": 2.8655818640002884, "learning_rate": 1.9996484566578824e-05, "loss": 0.7494, "step": 500 }, { "epoch": 0.07666411629686304, "grad_norm": 3.005360495550959, "learning_rate": 1.9996418554432175e-05, "loss": 0.7814, "step": 501 }, { "epoch": 0.07681713848508033, "grad_norm": 3.3007083879137364, "learning_rate": 1.999635192836636e-05, "loss": 0.8228, "step": 502 }, { "epoch": 0.07697016067329762, "grad_norm": 3.239789821398647, "learning_rate": 1.9996284688385458e-05, "loss": 0.8911, "step": 503 }, { "epoch": 0.07712318286151491, "grad_norm": 3.16801784653872, "learning_rate": 1.9996216834493614e-05, "loss": 0.8827, "step": 504 }, { "epoch": 0.0772762050497322, "grad_norm": 3.2341267297113223, "learning_rate": 1.9996148366694987e-05, "loss": 0.8773, "step": 505 }, { "epoch": 0.0774292272379495, "grad_norm": 3.0993619034994526, "learning_rate": 1.9996079284993785e-05, "loss": 0.898, "step": 506 }, { "epoch": 0.07758224942616679, "grad_norm": 2.9243224608176637, "learning_rate": 1.999600958939425e-05, "loss": 0.8461, "step": 507 }, { "epoch": 0.07773527161438408, "grad_norm": 3.307290573644802, "learning_rate": 1.9995939279900658e-05, "loss": 0.91, "step": 508 }, { "epoch": 0.07788829380260137, "grad_norm": 2.9365920881459986, "learning_rate": 1.999586835651733e-05, "loss": 0.9606, "step": 509 }, { "epoch": 0.07804131599081866, "grad_norm": 2.724422019837075, "learning_rate": 1.999579681924863e-05, "loss": 0.8498, "step": 510 }, { "epoch": 0.07819433817903595, "grad_norm": 2.9876304383699326, "learning_rate": 1.9995724668098936e-05, "loss": 0.7418, "step": 511 }, { "epoch": 0.07834736036725325, "grad_norm": 2.864681589216531, "learning_rate": 1.999565190307269e-05, "loss": 0.8075, "step": 512 }, { "epoch": 0.07850038255547054, "grad_norm": 2.8644979232121517, "learning_rate": 1.9995578524174354e-05, "loss": 0.7353, "step": 513 }, { "epoch": 0.07865340474368783, "grad_norm": 3.2377935340367707, "learning_rate": 1.999550453140844e-05, "loss": 0.8998, "step": 514 }, { "epoch": 0.07880642693190512, "grad_norm": 3.0349644799969457, "learning_rate": 1.999542992477949e-05, "loss": 0.8999, "step": 515 }, { "epoch": 0.07895944912012241, "grad_norm": 3.187108910079354, "learning_rate": 1.9995354704292086e-05, "loss": 0.9367, "step": 516 }, { "epoch": 0.0791124713083397, "grad_norm": 3.6556676597386955, "learning_rate": 1.9995278869950848e-05, "loss": 0.8974, "step": 517 }, { "epoch": 0.079265493496557, "grad_norm": 3.149907651192199, "learning_rate": 1.9995202421760432e-05, "loss": 0.8972, "step": 518 }, { "epoch": 0.07941851568477429, "grad_norm": 3.1073516404536368, "learning_rate": 1.9995125359725534e-05, "loss": 0.841, "step": 519 }, { "epoch": 0.07957153787299158, "grad_norm": 3.3022754605273668, "learning_rate": 1.9995047683850887e-05, "loss": 0.9602, "step": 520 }, { "epoch": 0.07972456006120887, "grad_norm": 3.0277051105867216, "learning_rate": 1.999496939414126e-05, "loss": 0.8321, "step": 521 }, { "epoch": 0.07987758224942616, "grad_norm": 3.2127384679862416, "learning_rate": 1.9994890490601463e-05, "loss": 0.7626, "step": 522 }, { "epoch": 0.08003060443764345, "grad_norm": 3.1231645870718814, "learning_rate": 1.999481097323634e-05, "loss": 0.8598, "step": 523 }, { "epoch": 0.08018362662586075, "grad_norm": 2.919715281820122, "learning_rate": 1.9994730842050776e-05, "loss": 0.8244, "step": 524 }, { "epoch": 0.08033664881407804, "grad_norm": 2.822749796794928, "learning_rate": 1.999465009704969e-05, "loss": 0.8562, "step": 525 }, { "epoch": 0.08048967100229533, "grad_norm": 2.91444240792536, "learning_rate": 1.9994568738238046e-05, "loss": 1.0529, "step": 526 }, { "epoch": 0.08064269319051262, "grad_norm": 3.210762650851107, "learning_rate": 1.9994486765620834e-05, "loss": 0.9525, "step": 527 }, { "epoch": 0.08079571537872991, "grad_norm": 3.1679560833629528, "learning_rate": 1.9994404179203092e-05, "loss": 0.8337, "step": 528 }, { "epoch": 0.0809487375669472, "grad_norm": 2.833453068833028, "learning_rate": 1.9994320978989887e-05, "loss": 0.9588, "step": 529 }, { "epoch": 0.0811017597551645, "grad_norm": 3.2638311444981998, "learning_rate": 1.9994237164986338e-05, "loss": 0.9316, "step": 530 }, { "epoch": 0.08125478194338179, "grad_norm": 3.4811522175868608, "learning_rate": 1.9994152737197586e-05, "loss": 0.9002, "step": 531 }, { "epoch": 0.08140780413159908, "grad_norm": 3.0731278665818036, "learning_rate": 1.9994067695628815e-05, "loss": 0.8697, "step": 532 }, { "epoch": 0.08156082631981637, "grad_norm": 2.8689492083403647, "learning_rate": 1.999398204028525e-05, "loss": 0.8766, "step": 533 }, { "epoch": 0.08171384850803366, "grad_norm": 2.972650733628086, "learning_rate": 1.9993895771172153e-05, "loss": 0.7181, "step": 534 }, { "epoch": 0.08186687069625095, "grad_norm": 2.880097853744865, "learning_rate": 1.9993808888294816e-05, "loss": 0.8629, "step": 535 }, { "epoch": 0.08201989288446825, "grad_norm": 2.8699706044768116, "learning_rate": 1.9993721391658584e-05, "loss": 0.8897, "step": 536 }, { "epoch": 0.08217291507268554, "grad_norm": 3.2307318749796448, "learning_rate": 1.9993633281268825e-05, "loss": 0.79, "step": 537 }, { "epoch": 0.08232593726090283, "grad_norm": 3.4903511369450237, "learning_rate": 1.999354455713095e-05, "loss": 0.968, "step": 538 }, { "epoch": 0.08247895944912012, "grad_norm": 2.487957846673477, "learning_rate": 1.9993455219250407e-05, "loss": 0.8133, "step": 539 }, { "epoch": 0.08263198163733741, "grad_norm": 3.022484553326492, "learning_rate": 1.999336526763269e-05, "loss": 0.8825, "step": 540 }, { "epoch": 0.0827850038255547, "grad_norm": 2.8453697669895015, "learning_rate": 1.9993274702283313e-05, "loss": 0.8821, "step": 541 }, { "epoch": 0.082938026013772, "grad_norm": 3.3972904425220944, "learning_rate": 1.999318352320784e-05, "loss": 0.9181, "step": 542 }, { "epoch": 0.08309104820198929, "grad_norm": 3.389004916540709, "learning_rate": 1.999309173041188e-05, "loss": 0.9292, "step": 543 }, { "epoch": 0.08324407039020658, "grad_norm": 3.0472330893944806, "learning_rate": 1.999299932390106e-05, "loss": 0.7503, "step": 544 }, { "epoch": 0.08339709257842387, "grad_norm": 3.062998869061395, "learning_rate": 1.9992906303681057e-05, "loss": 0.9149, "step": 545 }, { "epoch": 0.08355011476664116, "grad_norm": 3.662910791698355, "learning_rate": 1.999281266975759e-05, "loss": 0.8746, "step": 546 }, { "epoch": 0.08370313695485845, "grad_norm": 2.9445135400870637, "learning_rate": 1.99927184221364e-05, "loss": 0.8416, "step": 547 }, { "epoch": 0.08385615914307575, "grad_norm": 2.9881136587386026, "learning_rate": 1.9992623560823284e-05, "loss": 0.86, "step": 548 }, { "epoch": 0.08400918133129304, "grad_norm": 2.8957772667517085, "learning_rate": 1.999252808582406e-05, "loss": 0.8825, "step": 549 }, { "epoch": 0.08416220351951033, "grad_norm": 2.7973789830398883, "learning_rate": 1.99924319971446e-05, "loss": 0.8052, "step": 550 }, { "epoch": 0.08431522570772762, "grad_norm": 2.778130072053958, "learning_rate": 1.9992335294790797e-05, "loss": 0.7703, "step": 551 }, { "epoch": 0.08446824789594491, "grad_norm": 3.1327192932994405, "learning_rate": 1.9992237978768593e-05, "loss": 0.9049, "step": 552 }, { "epoch": 0.0846212700841622, "grad_norm": 3.1565650717986977, "learning_rate": 1.9992140049083968e-05, "loss": 0.8519, "step": 553 }, { "epoch": 0.0847742922723795, "grad_norm": 3.1756761607270194, "learning_rate": 1.999204150574293e-05, "loss": 1.0047, "step": 554 }, { "epoch": 0.08492731446059679, "grad_norm": 2.6052032947874864, "learning_rate": 1.9991942348751534e-05, "loss": 0.8446, "step": 555 }, { "epoch": 0.08508033664881408, "grad_norm": 3.3747384938061846, "learning_rate": 1.9991842578115872e-05, "loss": 0.9783, "step": 556 }, { "epoch": 0.08523335883703137, "grad_norm": 3.1466121416772532, "learning_rate": 1.999174219384207e-05, "loss": 0.9396, "step": 557 }, { "epoch": 0.08538638102524866, "grad_norm": 3.203806072282312, "learning_rate": 1.999164119593629e-05, "loss": 0.8774, "step": 558 }, { "epoch": 0.08553940321346595, "grad_norm": 2.8295832347751544, "learning_rate": 1.9991539584404734e-05, "loss": 0.7787, "step": 559 }, { "epoch": 0.08569242540168324, "grad_norm": 2.9986481486485315, "learning_rate": 1.9991437359253647e-05, "loss": 0.8303, "step": 560 }, { "epoch": 0.08584544758990054, "grad_norm": 3.1163102632937525, "learning_rate": 1.9991334520489304e-05, "loss": 0.8202, "step": 561 }, { "epoch": 0.08599846977811783, "grad_norm": 3.3672338761393616, "learning_rate": 1.9991231068118024e-05, "loss": 0.8453, "step": 562 }, { "epoch": 0.08615149196633512, "grad_norm": 2.9326058066627123, "learning_rate": 1.9991127002146157e-05, "loss": 0.816, "step": 563 }, { "epoch": 0.08630451415455241, "grad_norm": 2.9286950810713277, "learning_rate": 1.9991022322580096e-05, "loss": 0.893, "step": 564 }, { "epoch": 0.0864575363427697, "grad_norm": 3.390923338954129, "learning_rate": 1.999091702942627e-05, "loss": 0.942, "step": 565 }, { "epoch": 0.086610558530987, "grad_norm": 3.100849028861991, "learning_rate": 1.9990811122691142e-05, "loss": 0.8572, "step": 566 }, { "epoch": 0.08676358071920429, "grad_norm": 3.024939074035441, "learning_rate": 1.9990704602381222e-05, "loss": 0.9249, "step": 567 }, { "epoch": 0.08691660290742158, "grad_norm": 3.086292608040693, "learning_rate": 1.9990597468503044e-05, "loss": 0.7923, "step": 568 }, { "epoch": 0.08706962509563887, "grad_norm": 2.8094297793401397, "learning_rate": 1.9990489721063194e-05, "loss": 0.7916, "step": 569 }, { "epoch": 0.08722264728385616, "grad_norm": 2.906165406278737, "learning_rate": 1.999038136006829e-05, "loss": 0.8567, "step": 570 }, { "epoch": 0.08737566947207345, "grad_norm": 3.027894811082444, "learning_rate": 1.9990272385524983e-05, "loss": 0.8776, "step": 571 }, { "epoch": 0.08752869166029074, "grad_norm": 2.951885113296094, "learning_rate": 1.9990162797439964e-05, "loss": 0.7175, "step": 572 }, { "epoch": 0.08768171384850804, "grad_norm": 2.8448885126107464, "learning_rate": 1.9990052595819966e-05, "loss": 0.7836, "step": 573 }, { "epoch": 0.08783473603672533, "grad_norm": 3.0008723156763635, "learning_rate": 1.998994178067176e-05, "loss": 0.9061, "step": 574 }, { "epoch": 0.08798775822494262, "grad_norm": 2.766808323475977, "learning_rate": 1.9989830352002144e-05, "loss": 0.7398, "step": 575 }, { "epoch": 0.08814078041315991, "grad_norm": 3.089772730908918, "learning_rate": 1.998971830981797e-05, "loss": 0.9764, "step": 576 }, { "epoch": 0.0882938026013772, "grad_norm": 3.2210205902228903, "learning_rate": 1.9989605654126113e-05, "loss": 0.88, "step": 577 }, { "epoch": 0.0884468247895945, "grad_norm": 3.3361093624428526, "learning_rate": 1.9989492384933493e-05, "loss": 0.9849, "step": 578 }, { "epoch": 0.08859984697781179, "grad_norm": 2.9746314886323773, "learning_rate": 1.9989378502247067e-05, "loss": 0.8476, "step": 579 }, { "epoch": 0.08875286916602908, "grad_norm": 2.8992176182889104, "learning_rate": 1.9989264006073826e-05, "loss": 0.7202, "step": 580 }, { "epoch": 0.08890589135424637, "grad_norm": 2.7055032415422726, "learning_rate": 1.998914889642081e-05, "loss": 0.7931, "step": 581 }, { "epoch": 0.08905891354246366, "grad_norm": 2.97229656950559, "learning_rate": 1.9989033173295075e-05, "loss": 0.7473, "step": 582 }, { "epoch": 0.08921193573068095, "grad_norm": 3.013939775038768, "learning_rate": 1.9988916836703738e-05, "loss": 0.8688, "step": 583 }, { "epoch": 0.08936495791889824, "grad_norm": 3.2961620765077106, "learning_rate": 1.998879988665394e-05, "loss": 0.9014, "step": 584 }, { "epoch": 0.08951798010711554, "grad_norm": 3.136110348623102, "learning_rate": 1.998868232315287e-05, "loss": 0.8517, "step": 585 }, { "epoch": 0.08967100229533283, "grad_norm": 2.873765409966888, "learning_rate": 1.998856414620774e-05, "loss": 0.8392, "step": 586 }, { "epoch": 0.08982402448355012, "grad_norm": 3.0620369003779997, "learning_rate": 1.9988445355825808e-05, "loss": 0.9311, "step": 587 }, { "epoch": 0.08997704667176741, "grad_norm": 2.9488999826259583, "learning_rate": 1.9988325952014375e-05, "loss": 0.7894, "step": 588 }, { "epoch": 0.0901300688599847, "grad_norm": 3.364620998306335, "learning_rate": 1.998820593478077e-05, "loss": 0.9131, "step": 589 }, { "epoch": 0.090283091048202, "grad_norm": 3.486214934509759, "learning_rate": 1.9988085304132362e-05, "loss": 0.9463, "step": 590 }, { "epoch": 0.09043611323641929, "grad_norm": 3.103071840216333, "learning_rate": 1.9987964060076565e-05, "loss": 0.9319, "step": 591 }, { "epoch": 0.09058913542463658, "grad_norm": 2.7855301279351736, "learning_rate": 1.998784220262082e-05, "loss": 0.8917, "step": 592 }, { "epoch": 0.09074215761285387, "grad_norm": 2.628571920591738, "learning_rate": 1.998771973177261e-05, "loss": 0.8585, "step": 593 }, { "epoch": 0.09089517980107116, "grad_norm": 3.0308283014483135, "learning_rate": 1.9987596647539464e-05, "loss": 0.8611, "step": 594 }, { "epoch": 0.09104820198928845, "grad_norm": 3.395921773906306, "learning_rate": 1.9987472949928936e-05, "loss": 0.8705, "step": 595 }, { "epoch": 0.09120122417750574, "grad_norm": 3.185931373181077, "learning_rate": 1.9987348638948623e-05, "loss": 0.8794, "step": 596 }, { "epoch": 0.09135424636572304, "grad_norm": 3.2153230873365857, "learning_rate": 1.9987223714606156e-05, "loss": 0.9029, "step": 597 }, { "epoch": 0.09150726855394033, "grad_norm": 2.615904068828914, "learning_rate": 1.9987098176909213e-05, "loss": 0.8384, "step": 598 }, { "epoch": 0.09166029074215762, "grad_norm": 3.1448963558441023, "learning_rate": 1.99869720258655e-05, "loss": 0.8621, "step": 599 }, { "epoch": 0.09181331293037491, "grad_norm": 2.848509682376616, "learning_rate": 1.9986845261482767e-05, "loss": 0.8979, "step": 600 }, { "epoch": 0.0919663351185922, "grad_norm": 2.8631042534970117, "learning_rate": 1.9986717883768796e-05, "loss": 0.7573, "step": 601 }, { "epoch": 0.0921193573068095, "grad_norm": 2.877288020891183, "learning_rate": 1.998658989273141e-05, "loss": 0.8908, "step": 602 }, { "epoch": 0.09227237949502679, "grad_norm": 2.9466061917507855, "learning_rate": 1.9986461288378475e-05, "loss": 0.8146, "step": 603 }, { "epoch": 0.09242540168324408, "grad_norm": 3.2326794041122344, "learning_rate": 1.9986332070717882e-05, "loss": 0.866, "step": 604 }, { "epoch": 0.09257842387146137, "grad_norm": 3.2136492245233987, "learning_rate": 1.9986202239757572e-05, "loss": 0.9044, "step": 605 }, { "epoch": 0.09273144605967866, "grad_norm": 2.9185939116175796, "learning_rate": 1.9986071795505516e-05, "loss": 0.799, "step": 606 }, { "epoch": 0.09288446824789595, "grad_norm": 2.5518184921354545, "learning_rate": 1.9985940737969724e-05, "loss": 0.7654, "step": 607 }, { "epoch": 0.09303749043611324, "grad_norm": 3.4896794353885157, "learning_rate": 1.9985809067158246e-05, "loss": 0.8924, "step": 608 }, { "epoch": 0.09319051262433053, "grad_norm": 2.9217667565577408, "learning_rate": 1.998567678307917e-05, "loss": 0.7938, "step": 609 }, { "epoch": 0.09334353481254781, "grad_norm": 2.9217531851694076, "learning_rate": 1.9985543885740616e-05, "loss": 0.8824, "step": 610 }, { "epoch": 0.0934965570007651, "grad_norm": 3.466534744413211, "learning_rate": 1.998541037515075e-05, "loss": 0.8839, "step": 611 }, { "epoch": 0.0936495791889824, "grad_norm": 2.8647068519856473, "learning_rate": 1.998527625131777e-05, "loss": 0.7641, "step": 612 }, { "epoch": 0.09380260137719969, "grad_norm": 2.990874053782021, "learning_rate": 1.9985141514249913e-05, "loss": 0.8285, "step": 613 }, { "epoch": 0.09395562356541698, "grad_norm": 3.213841555154716, "learning_rate": 1.9985006163955454e-05, "loss": 0.8348, "step": 614 }, { "epoch": 0.09410864575363427, "grad_norm": 2.897833932939109, "learning_rate": 1.9984870200442704e-05, "loss": 0.8974, "step": 615 }, { "epoch": 0.09426166794185156, "grad_norm": 3.255093140463334, "learning_rate": 1.998473362372001e-05, "loss": 0.77, "step": 616 }, { "epoch": 0.09441469013006885, "grad_norm": 3.5541022603639654, "learning_rate": 1.9984596433795768e-05, "loss": 0.9675, "step": 617 }, { "epoch": 0.09456771231828615, "grad_norm": 3.156911376432198, "learning_rate": 1.99844586306784e-05, "loss": 0.7643, "step": 618 }, { "epoch": 0.09472073450650344, "grad_norm": 3.0327127020510014, "learning_rate": 1.9984320214376367e-05, "loss": 0.9454, "step": 619 }, { "epoch": 0.09487375669472073, "grad_norm": 2.7145164111445763, "learning_rate": 1.998418118489817e-05, "loss": 0.78, "step": 620 }, { "epoch": 0.09502677888293802, "grad_norm": 3.042572988904212, "learning_rate": 1.998404154225235e-05, "loss": 0.8324, "step": 621 }, { "epoch": 0.09517980107115531, "grad_norm": 2.590002583866103, "learning_rate": 1.9983901286447477e-05, "loss": 0.7565, "step": 622 }, { "epoch": 0.0953328232593726, "grad_norm": 3.2287495750309696, "learning_rate": 1.9983760417492173e-05, "loss": 0.7237, "step": 623 }, { "epoch": 0.0954858454475899, "grad_norm": 2.9615171389313732, "learning_rate": 1.9983618935395084e-05, "loss": 0.8673, "step": 624 }, { "epoch": 0.09563886763580719, "grad_norm": 2.975886701578284, "learning_rate": 1.9983476840164896e-05, "loss": 0.8856, "step": 625 }, { "epoch": 0.09579188982402448, "grad_norm": 3.3574876890478054, "learning_rate": 1.9983334131810346e-05, "loss": 0.9716, "step": 626 }, { "epoch": 0.09594491201224177, "grad_norm": 2.7507257274616648, "learning_rate": 1.9983190810340186e-05, "loss": 0.9263, "step": 627 }, { "epoch": 0.09609793420045906, "grad_norm": 3.191008180869532, "learning_rate": 1.9983046875763228e-05, "loss": 0.931, "step": 628 }, { "epoch": 0.09625095638867635, "grad_norm": 2.853979783408991, "learning_rate": 1.998290232808831e-05, "loss": 0.786, "step": 629 }, { "epoch": 0.09640397857689365, "grad_norm": 3.0623527098455945, "learning_rate": 1.99827571673243e-05, "loss": 0.9594, "step": 630 }, { "epoch": 0.09655700076511094, "grad_norm": 3.0612621394357387, "learning_rate": 1.9982611393480124e-05, "loss": 0.8467, "step": 631 }, { "epoch": 0.09671002295332823, "grad_norm": 3.145922754185688, "learning_rate": 1.9982465006564727e-05, "loss": 0.837, "step": 632 }, { "epoch": 0.09686304514154552, "grad_norm": 3.0267411008622602, "learning_rate": 1.9982318006587107e-05, "loss": 0.9817, "step": 633 }, { "epoch": 0.09701606732976281, "grad_norm": 2.9130437517324594, "learning_rate": 1.9982170393556282e-05, "loss": 0.7791, "step": 634 }, { "epoch": 0.0971690895179801, "grad_norm": 2.8442538652688922, "learning_rate": 1.9982022167481324e-05, "loss": 0.9116, "step": 635 }, { "epoch": 0.0973221117061974, "grad_norm": 2.912158188776152, "learning_rate": 1.9981873328371338e-05, "loss": 0.8294, "step": 636 }, { "epoch": 0.09747513389441469, "grad_norm": 3.099465213513769, "learning_rate": 1.9981723876235457e-05, "loss": 0.8317, "step": 637 }, { "epoch": 0.09762815608263198, "grad_norm": 2.9188799056701296, "learning_rate": 1.9981573811082868e-05, "loss": 0.8401, "step": 638 }, { "epoch": 0.09778117827084927, "grad_norm": 2.819614664399572, "learning_rate": 1.998142313292278e-05, "loss": 0.8368, "step": 639 }, { "epoch": 0.09793420045906656, "grad_norm": 2.6738959579696577, "learning_rate": 1.9981271841764452e-05, "loss": 0.9057, "step": 640 }, { "epoch": 0.09808722264728385, "grad_norm": 2.9601321974365735, "learning_rate": 1.9981119937617174e-05, "loss": 0.8107, "step": 641 }, { "epoch": 0.09824024483550114, "grad_norm": 3.0233216111788472, "learning_rate": 1.9980967420490273e-05, "loss": 0.9779, "step": 642 }, { "epoch": 0.09839326702371844, "grad_norm": 2.7557734535440166, "learning_rate": 1.9980814290393115e-05, "loss": 0.838, "step": 643 }, { "epoch": 0.09854628921193573, "grad_norm": 2.704043797297548, "learning_rate": 1.998066054733511e-05, "loss": 0.8751, "step": 644 }, { "epoch": 0.09869931140015302, "grad_norm": 3.1041651898470657, "learning_rate": 1.9980506191325694e-05, "loss": 0.8846, "step": 645 }, { "epoch": 0.09885233358837031, "grad_norm": 2.6868333111971476, "learning_rate": 1.9980351222374347e-05, "loss": 0.8375, "step": 646 }, { "epoch": 0.0990053557765876, "grad_norm": 2.777208715705951, "learning_rate": 1.9980195640490592e-05, "loss": 0.8029, "step": 647 }, { "epoch": 0.0991583779648049, "grad_norm": 3.4013194486190548, "learning_rate": 1.9980039445683978e-05, "loss": 0.8727, "step": 648 }, { "epoch": 0.09931140015302219, "grad_norm": 3.0668546286557543, "learning_rate": 1.99798826379641e-05, "loss": 0.9092, "step": 649 }, { "epoch": 0.09946442234123948, "grad_norm": 3.0453985288017904, "learning_rate": 1.9979725217340587e-05, "loss": 0.8486, "step": 650 }, { "epoch": 0.09961744452945677, "grad_norm": 2.7942211304315143, "learning_rate": 1.9979567183823108e-05, "loss": 0.8158, "step": 651 }, { "epoch": 0.09977046671767406, "grad_norm": 3.3292124901062206, "learning_rate": 1.9979408537421367e-05, "loss": 0.9559, "step": 652 }, { "epoch": 0.09992348890589135, "grad_norm": 3.134527582640561, "learning_rate": 1.997924927814511e-05, "loss": 0.8503, "step": 653 }, { "epoch": 0.10007651109410864, "grad_norm": 2.9257436635589857, "learning_rate": 1.9979089406004115e-05, "loss": 0.7602, "step": 654 }, { "epoch": 0.10022953328232594, "grad_norm": 2.9114862208984373, "learning_rate": 1.99789289210082e-05, "loss": 0.8381, "step": 655 }, { "epoch": 0.10038255547054323, "grad_norm": 3.281387510746206, "learning_rate": 1.9978767823167224e-05, "loss": 0.8544, "step": 656 }, { "epoch": 0.10053557765876052, "grad_norm": 2.5934944798509774, "learning_rate": 1.9978606112491076e-05, "loss": 0.8238, "step": 657 }, { "epoch": 0.10068859984697781, "grad_norm": 2.803648838613119, "learning_rate": 1.9978443788989695e-05, "loss": 0.8357, "step": 658 }, { "epoch": 0.1008416220351951, "grad_norm": 3.2256668016008705, "learning_rate": 1.9978280852673038e-05, "loss": 0.9185, "step": 659 }, { "epoch": 0.1009946442234124, "grad_norm": 2.9180166328828108, "learning_rate": 1.9978117303551127e-05, "loss": 0.9, "step": 660 }, { "epoch": 0.10114766641162969, "grad_norm": 2.759551164175027, "learning_rate": 1.997795314163399e-05, "loss": 0.8243, "step": 661 }, { "epoch": 0.10130068859984698, "grad_norm": 2.8452679100467564, "learning_rate": 1.997778836693172e-05, "loss": 0.8834, "step": 662 }, { "epoch": 0.10145371078806427, "grad_norm": 2.85925562143535, "learning_rate": 1.9977622979454433e-05, "loss": 0.8022, "step": 663 }, { "epoch": 0.10160673297628156, "grad_norm": 2.925375455401089, "learning_rate": 1.9977456979212286e-05, "loss": 0.8822, "step": 664 }, { "epoch": 0.10175975516449885, "grad_norm": 2.725147333625341, "learning_rate": 1.9977290366215473e-05, "loss": 0.8138, "step": 665 }, { "epoch": 0.10191277735271614, "grad_norm": 3.1953282021420013, "learning_rate": 1.997712314047423e-05, "loss": 0.857, "step": 666 }, { "epoch": 0.10206579954093344, "grad_norm": 3.0746423357238943, "learning_rate": 1.9976955301998822e-05, "loss": 0.8515, "step": 667 }, { "epoch": 0.10221882172915073, "grad_norm": 2.900938526635504, "learning_rate": 1.997678685079956e-05, "loss": 0.8601, "step": 668 }, { "epoch": 0.10237184391736802, "grad_norm": 2.689075444923604, "learning_rate": 1.997661778688679e-05, "loss": 0.8373, "step": 669 }, { "epoch": 0.10252486610558531, "grad_norm": 2.859953745768656, "learning_rate": 1.9976448110270888e-05, "loss": 0.7555, "step": 670 }, { "epoch": 0.1026778882938026, "grad_norm": 3.145897880802856, "learning_rate": 1.997627782096228e-05, "loss": 0.9535, "step": 671 }, { "epoch": 0.1028309104820199, "grad_norm": 2.969611797938469, "learning_rate": 1.9976106918971428e-05, "loss": 0.8239, "step": 672 }, { "epoch": 0.10298393267023719, "grad_norm": 2.9387550683170014, "learning_rate": 1.9975935404308818e-05, "loss": 0.8714, "step": 673 }, { "epoch": 0.10313695485845448, "grad_norm": 2.9648398626987444, "learning_rate": 1.9975763276984993e-05, "loss": 0.8172, "step": 674 }, { "epoch": 0.10328997704667177, "grad_norm": 3.295868102675818, "learning_rate": 1.9975590537010515e-05, "loss": 0.9202, "step": 675 }, { "epoch": 0.10344299923488906, "grad_norm": 3.22903200685603, "learning_rate": 1.9975417184396005e-05, "loss": 0.9028, "step": 676 }, { "epoch": 0.10359602142310635, "grad_norm": 2.825966414769792, "learning_rate": 1.9975243219152095e-05, "loss": 0.9263, "step": 677 }, { "epoch": 0.10374904361132364, "grad_norm": 3.030014157036485, "learning_rate": 1.9975068641289478e-05, "loss": 0.718, "step": 678 }, { "epoch": 0.10390206579954094, "grad_norm": 3.475541487458047, "learning_rate": 1.9974893450818875e-05, "loss": 0.9376, "step": 679 }, { "epoch": 0.10405508798775823, "grad_norm": 3.2549605814152356, "learning_rate": 1.997471764775104e-05, "loss": 0.7524, "step": 680 }, { "epoch": 0.10420811017597552, "grad_norm": 3.126998944683233, "learning_rate": 1.9974541232096774e-05, "loss": 0.9323, "step": 681 }, { "epoch": 0.10436113236419281, "grad_norm": 3.0013467824393025, "learning_rate": 1.997436420386691e-05, "loss": 0.806, "step": 682 }, { "epoch": 0.1045141545524101, "grad_norm": 3.0259375619997253, "learning_rate": 1.997418656307232e-05, "loss": 0.8783, "step": 683 }, { "epoch": 0.1046671767406274, "grad_norm": 3.0450368516418447, "learning_rate": 1.9974008309723917e-05, "loss": 0.9218, "step": 684 }, { "epoch": 0.10482019892884469, "grad_norm": 2.825921818621975, "learning_rate": 1.997382944383264e-05, "loss": 0.8584, "step": 685 }, { "epoch": 0.10497322111706198, "grad_norm": 2.912770169006268, "learning_rate": 1.9973649965409483e-05, "loss": 0.8629, "step": 686 }, { "epoch": 0.10512624330527927, "grad_norm": 2.5954835007580477, "learning_rate": 1.9973469874465464e-05, "loss": 0.8466, "step": 687 }, { "epoch": 0.10527926549349656, "grad_norm": 2.917363261917072, "learning_rate": 1.997328917101164e-05, "loss": 0.8829, "step": 688 }, { "epoch": 0.10543228768171385, "grad_norm": 2.812270404547237, "learning_rate": 1.9973107855059116e-05, "loss": 0.8442, "step": 689 }, { "epoch": 0.10558530986993114, "grad_norm": 3.0483780333514257, "learning_rate": 1.9972925926619023e-05, "loss": 0.8695, "step": 690 }, { "epoch": 0.10573833205814843, "grad_norm": 2.9664567475153074, "learning_rate": 1.9972743385702535e-05, "loss": 0.9386, "step": 691 }, { "epoch": 0.10589135424636573, "grad_norm": 2.7137242073331804, "learning_rate": 1.9972560232320863e-05, "loss": 0.7839, "step": 692 }, { "epoch": 0.10604437643458302, "grad_norm": 3.784136448397792, "learning_rate": 1.9972376466485252e-05, "loss": 0.9644, "step": 693 }, { "epoch": 0.10619739862280031, "grad_norm": 2.6044337361202183, "learning_rate": 1.997219208820699e-05, "loss": 0.8399, "step": 694 }, { "epoch": 0.1063504208110176, "grad_norm": 3.106258109412711, "learning_rate": 1.99720070974974e-05, "loss": 0.8544, "step": 695 }, { "epoch": 0.10650344299923489, "grad_norm": 3.1386122334768376, "learning_rate": 1.9971821494367844e-05, "loss": 0.9707, "step": 696 }, { "epoch": 0.10665646518745218, "grad_norm": 2.912783268372566, "learning_rate": 1.997163527882972e-05, "loss": 0.8637, "step": 697 }, { "epoch": 0.10680948737566948, "grad_norm": 2.794401741879464, "learning_rate": 1.9971448450894467e-05, "loss": 0.7666, "step": 698 }, { "epoch": 0.10696250956388677, "grad_norm": 2.787277032267778, "learning_rate": 1.9971261010573553e-05, "loss": 0.9049, "step": 699 }, { "epoch": 0.10711553175210406, "grad_norm": 3.1363693151187535, "learning_rate": 1.9971072957878494e-05, "loss": 0.8289, "step": 700 }, { "epoch": 0.10726855394032135, "grad_norm": 3.016667481448861, "learning_rate": 1.9970884292820837e-05, "loss": 0.7704, "step": 701 }, { "epoch": 0.10742157612853864, "grad_norm": 3.2062104544910355, "learning_rate": 1.997069501541217e-05, "loss": 0.8958, "step": 702 }, { "epoch": 0.10757459831675593, "grad_norm": 2.680574521626054, "learning_rate": 1.9970505125664116e-05, "loss": 0.8637, "step": 703 }, { "epoch": 0.10772762050497323, "grad_norm": 2.674813823925464, "learning_rate": 1.9970314623588335e-05, "loss": 0.7913, "step": 704 }, { "epoch": 0.10788064269319052, "grad_norm": 2.9406076887807706, "learning_rate": 1.9970123509196533e-05, "loss": 0.9162, "step": 705 }, { "epoch": 0.10803366488140781, "grad_norm": 3.261303730361806, "learning_rate": 1.996993178250044e-05, "loss": 0.8425, "step": 706 }, { "epoch": 0.1081866870696251, "grad_norm": 2.8193883845987737, "learning_rate": 1.9969739443511835e-05, "loss": 0.748, "step": 707 }, { "epoch": 0.10833970925784239, "grad_norm": 2.7504729360532973, "learning_rate": 1.996954649224253e-05, "loss": 0.8859, "step": 708 }, { "epoch": 0.10849273144605968, "grad_norm": 2.7955831501250716, "learning_rate": 1.996935292870437e-05, "loss": 0.8175, "step": 709 }, { "epoch": 0.10864575363427698, "grad_norm": 2.9447725337658444, "learning_rate": 1.9969158752909247e-05, "loss": 0.7753, "step": 710 }, { "epoch": 0.10879877582249427, "grad_norm": 2.78759682667099, "learning_rate": 1.9968963964869088e-05, "loss": 0.958, "step": 711 }, { "epoch": 0.10895179801071156, "grad_norm": 3.074769801484582, "learning_rate": 1.9968768564595856e-05, "loss": 0.9086, "step": 712 }, { "epoch": 0.10910482019892885, "grad_norm": 2.9070592094240797, "learning_rate": 1.9968572552101544e-05, "loss": 0.9174, "step": 713 }, { "epoch": 0.10925784238714614, "grad_norm": 3.233405582020585, "learning_rate": 1.9968375927398195e-05, "loss": 0.9356, "step": 714 }, { "epoch": 0.10941086457536343, "grad_norm": 3.022272822063928, "learning_rate": 1.9968178690497884e-05, "loss": 0.851, "step": 715 }, { "epoch": 0.10956388676358073, "grad_norm": 2.8195304743616285, "learning_rate": 1.9967980841412722e-05, "loss": 0.7116, "step": 716 }, { "epoch": 0.10971690895179802, "grad_norm": 2.9245399474491602, "learning_rate": 1.9967782380154864e-05, "loss": 0.8679, "step": 717 }, { "epoch": 0.10986993114001531, "grad_norm": 2.988694346304424, "learning_rate": 1.9967583306736494e-05, "loss": 0.7599, "step": 718 }, { "epoch": 0.1100229533282326, "grad_norm": 3.1688493992216618, "learning_rate": 1.996738362116984e-05, "loss": 0.9484, "step": 719 }, { "epoch": 0.11017597551644988, "grad_norm": 2.9048182310387776, "learning_rate": 1.996718332346717e-05, "loss": 0.7532, "step": 720 }, { "epoch": 0.11032899770466717, "grad_norm": 2.637213696698448, "learning_rate": 1.9966982413640772e-05, "loss": 0.8024, "step": 721 }, { "epoch": 0.11048201989288446, "grad_norm": 2.89859467937182, "learning_rate": 1.9966780891703e-05, "loss": 0.9697, "step": 722 }, { "epoch": 0.11063504208110175, "grad_norm": 2.836160191245258, "learning_rate": 1.996657875766622e-05, "loss": 0.8161, "step": 723 }, { "epoch": 0.11078806426931904, "grad_norm": 3.0972974916050564, "learning_rate": 1.996637601154285e-05, "loss": 0.905, "step": 724 }, { "epoch": 0.11094108645753634, "grad_norm": 2.746119102608403, "learning_rate": 1.9966172653345337e-05, "loss": 0.7602, "step": 725 }, { "epoch": 0.11109410864575363, "grad_norm": 2.6662164718564765, "learning_rate": 1.9965968683086177e-05, "loss": 0.8078, "step": 726 }, { "epoch": 0.11124713083397092, "grad_norm": 3.135727407852158, "learning_rate": 1.9965764100777892e-05, "loss": 0.8687, "step": 727 }, { "epoch": 0.11140015302218821, "grad_norm": 2.902712664303539, "learning_rate": 1.996555890643305e-05, "loss": 0.8878, "step": 728 }, { "epoch": 0.1115531752104055, "grad_norm": 3.2232593590832535, "learning_rate": 1.9965353100064246e-05, "loss": 0.8875, "step": 729 }, { "epoch": 0.1117061973986228, "grad_norm": 3.098086025014026, "learning_rate": 1.9965146681684126e-05, "loss": 0.8271, "step": 730 }, { "epoch": 0.11185921958684009, "grad_norm": 2.852123909711368, "learning_rate": 1.996493965130536e-05, "loss": 0.7657, "step": 731 }, { "epoch": 0.11201224177505738, "grad_norm": 3.08617436112628, "learning_rate": 1.9964732008940673e-05, "loss": 0.9049, "step": 732 }, { "epoch": 0.11216526396327467, "grad_norm": 3.0408480858160787, "learning_rate": 1.996452375460281e-05, "loss": 0.9891, "step": 733 }, { "epoch": 0.11231828615149196, "grad_norm": 2.5643968058230313, "learning_rate": 1.9964314888304563e-05, "loss": 0.8169, "step": 734 }, { "epoch": 0.11247130833970925, "grad_norm": 3.2776028562449184, "learning_rate": 1.9964105410058754e-05, "loss": 0.8984, "step": 735 }, { "epoch": 0.11262433052792654, "grad_norm": 3.1486146940763375, "learning_rate": 1.9963895319878252e-05, "loss": 0.8662, "step": 736 }, { "epoch": 0.11277735271614384, "grad_norm": 3.0577728216797855, "learning_rate": 1.996368461777596e-05, "loss": 1.0304, "step": 737 }, { "epoch": 0.11293037490436113, "grad_norm": 2.558019479134406, "learning_rate": 1.996347330376482e-05, "loss": 0.7169, "step": 738 }, { "epoch": 0.11308339709257842, "grad_norm": 2.7971191724016577, "learning_rate": 1.9963261377857805e-05, "loss": 0.8585, "step": 739 }, { "epoch": 0.11323641928079571, "grad_norm": 2.7918459247533183, "learning_rate": 1.996304884006793e-05, "loss": 0.8193, "step": 740 }, { "epoch": 0.113389441469013, "grad_norm": 3.0171317643440276, "learning_rate": 1.9962835690408255e-05, "loss": 0.9036, "step": 741 }, { "epoch": 0.1135424636572303, "grad_norm": 2.825490934258436, "learning_rate": 1.9962621928891863e-05, "loss": 0.6977, "step": 742 }, { "epoch": 0.11369548584544759, "grad_norm": 2.946410812576731, "learning_rate": 1.9962407555531884e-05, "loss": 0.9062, "step": 743 }, { "epoch": 0.11384850803366488, "grad_norm": 2.63568342518963, "learning_rate": 1.9962192570341485e-05, "loss": 0.8382, "step": 744 }, { "epoch": 0.11400153022188217, "grad_norm": 2.6540164369542145, "learning_rate": 1.9961976973333868e-05, "loss": 0.8843, "step": 745 }, { "epoch": 0.11415455241009946, "grad_norm": 3.1360366019870254, "learning_rate": 1.996176076452227e-05, "loss": 0.9828, "step": 746 }, { "epoch": 0.11430757459831675, "grad_norm": 2.4591066537053603, "learning_rate": 1.996154394391998e-05, "loss": 0.8476, "step": 747 }, { "epoch": 0.11446059678653404, "grad_norm": 2.9487269261454645, "learning_rate": 1.9961326511540303e-05, "loss": 0.907, "step": 748 }, { "epoch": 0.11461361897475134, "grad_norm": 2.9947147155525338, "learning_rate": 1.996110846739659e-05, "loss": 0.8579, "step": 749 }, { "epoch": 0.11476664116296863, "grad_norm": 3.037066986180684, "learning_rate": 1.9960889811502247e-05, "loss": 0.8599, "step": 750 }, { "epoch": 0.11491966335118592, "grad_norm": 3.051458512563892, "learning_rate": 1.9960670543870692e-05, "loss": 0.8731, "step": 751 }, { "epoch": 0.11507268553940321, "grad_norm": 2.8398841037795317, "learning_rate": 1.996045066451539e-05, "loss": 0.8207, "step": 752 }, { "epoch": 0.1152257077276205, "grad_norm": 2.7469614353497955, "learning_rate": 1.9960230173449845e-05, "loss": 0.9335, "step": 753 }, { "epoch": 0.1153787299158378, "grad_norm": 2.935660071595326, "learning_rate": 1.9960009070687603e-05, "loss": 0.8881, "step": 754 }, { "epoch": 0.11553175210405509, "grad_norm": 2.5830086234835994, "learning_rate": 1.9959787356242243e-05, "loss": 0.8157, "step": 755 }, { "epoch": 0.11568477429227238, "grad_norm": 2.902450900848275, "learning_rate": 1.9959565030127375e-05, "loss": 0.8613, "step": 756 }, { "epoch": 0.11583779648048967, "grad_norm": 3.1217190149525544, "learning_rate": 1.9959342092356656e-05, "loss": 0.8571, "step": 757 }, { "epoch": 0.11599081866870696, "grad_norm": 3.0119235092110737, "learning_rate": 1.995911854294378e-05, "loss": 0.7914, "step": 758 }, { "epoch": 0.11614384085692425, "grad_norm": 3.270646329337315, "learning_rate": 1.9958894381902473e-05, "loss": 0.9937, "step": 759 }, { "epoch": 0.11629686304514154, "grad_norm": 2.6432702250244597, "learning_rate": 1.99586696092465e-05, "loss": 0.9079, "step": 760 }, { "epoch": 0.11644988523335884, "grad_norm": 2.86168615726239, "learning_rate": 1.9958444224989673e-05, "loss": 0.7816, "step": 761 }, { "epoch": 0.11660290742157613, "grad_norm": 3.1978664221309447, "learning_rate": 1.9958218229145828e-05, "loss": 0.8444, "step": 762 }, { "epoch": 0.11675592960979342, "grad_norm": 2.932323475678271, "learning_rate": 1.995799162172884e-05, "loss": 0.9456, "step": 763 }, { "epoch": 0.11690895179801071, "grad_norm": 3.038083319492739, "learning_rate": 1.9957764402752632e-05, "loss": 0.9396, "step": 764 }, { "epoch": 0.117061973986228, "grad_norm": 2.848333848765862, "learning_rate": 1.995753657223116e-05, "loss": 0.9366, "step": 765 }, { "epoch": 0.1172149961744453, "grad_norm": 3.0057853013541935, "learning_rate": 1.995730813017841e-05, "loss": 0.9175, "step": 766 }, { "epoch": 0.11736801836266259, "grad_norm": 2.872981414780168, "learning_rate": 1.9957079076608416e-05, "loss": 0.904, "step": 767 }, { "epoch": 0.11752104055087988, "grad_norm": 2.77278331612368, "learning_rate": 1.9956849411535243e-05, "loss": 0.8077, "step": 768 }, { "epoch": 0.11767406273909717, "grad_norm": 2.809951235769412, "learning_rate": 1.9956619134973e-05, "loss": 0.8337, "step": 769 }, { "epoch": 0.11782708492731446, "grad_norm": 2.5883083215594413, "learning_rate": 1.995638824693582e-05, "loss": 0.8807, "step": 770 }, { "epoch": 0.11798010711553175, "grad_norm": 2.8036993992336203, "learning_rate": 1.9956156747437892e-05, "loss": 0.8703, "step": 771 }, { "epoch": 0.11813312930374904, "grad_norm": 3.3243628707668234, "learning_rate": 1.9955924636493427e-05, "loss": 0.7931, "step": 772 }, { "epoch": 0.11828615149196633, "grad_norm": 2.873881310062135, "learning_rate": 1.995569191411668e-05, "loss": 0.9049, "step": 773 }, { "epoch": 0.11843917368018363, "grad_norm": 2.7069595333520895, "learning_rate": 1.995545858032195e-05, "loss": 0.8754, "step": 774 }, { "epoch": 0.11859219586840092, "grad_norm": 2.9096351619045002, "learning_rate": 1.9955224635123563e-05, "loss": 0.7806, "step": 775 }, { "epoch": 0.11874521805661821, "grad_norm": 2.8827356138086566, "learning_rate": 1.9954990078535882e-05, "loss": 0.8809, "step": 776 }, { "epoch": 0.1188982402448355, "grad_norm": 2.826962196189732, "learning_rate": 1.9954754910573322e-05, "loss": 0.87, "step": 777 }, { "epoch": 0.11905126243305279, "grad_norm": 2.8069784613392637, "learning_rate": 1.9954519131250315e-05, "loss": 0.8496, "step": 778 }, { "epoch": 0.11920428462127008, "grad_norm": 2.971601835723486, "learning_rate": 1.9954282740581347e-05, "loss": 0.8614, "step": 779 }, { "epoch": 0.11935730680948738, "grad_norm": 2.9075497971975746, "learning_rate": 1.9954045738580935e-05, "loss": 0.8203, "step": 780 }, { "epoch": 0.11951032899770467, "grad_norm": 2.9402914645046336, "learning_rate": 1.9953808125263634e-05, "loss": 0.9068, "step": 781 }, { "epoch": 0.11966335118592196, "grad_norm": 2.7248354648371245, "learning_rate": 1.9953569900644038e-05, "loss": 0.8354, "step": 782 }, { "epoch": 0.11981637337413925, "grad_norm": 2.738997062960359, "learning_rate": 1.9953331064736772e-05, "loss": 0.8416, "step": 783 }, { "epoch": 0.11996939556235654, "grad_norm": 2.408678484778986, "learning_rate": 1.9953091617556508e-05, "loss": 0.9647, "step": 784 }, { "epoch": 0.12012241775057383, "grad_norm": 3.397543310223297, "learning_rate": 1.995285155911795e-05, "loss": 0.9912, "step": 785 }, { "epoch": 0.12027543993879113, "grad_norm": 2.492146839514386, "learning_rate": 1.9952610889435847e-05, "loss": 0.7809, "step": 786 }, { "epoch": 0.12042846212700842, "grad_norm": 3.186689185906449, "learning_rate": 1.995236960852497e-05, "loss": 0.8792, "step": 787 }, { "epoch": 0.12058148431522571, "grad_norm": 2.6501731876923884, "learning_rate": 1.9952127716400147e-05, "loss": 0.9151, "step": 788 }, { "epoch": 0.120734506503443, "grad_norm": 2.5758608454690717, "learning_rate": 1.9951885213076224e-05, "loss": 0.772, "step": 789 }, { "epoch": 0.12088752869166029, "grad_norm": 2.8652593890546627, "learning_rate": 1.9951642098568098e-05, "loss": 0.8747, "step": 790 }, { "epoch": 0.12104055087987758, "grad_norm": 3.0338057992308727, "learning_rate": 1.9951398372890698e-05, "loss": 0.8661, "step": 791 }, { "epoch": 0.12119357306809488, "grad_norm": 2.921221055918673, "learning_rate": 1.9951154036058996e-05, "loss": 0.9477, "step": 792 }, { "epoch": 0.12134659525631217, "grad_norm": 2.9305423440900045, "learning_rate": 1.9950909088087998e-05, "loss": 0.8084, "step": 793 }, { "epoch": 0.12149961744452946, "grad_norm": 2.8420265459295773, "learning_rate": 1.995066352899274e-05, "loss": 0.7906, "step": 794 }, { "epoch": 0.12165263963274675, "grad_norm": 2.6560288005388255, "learning_rate": 1.995041735878831e-05, "loss": 0.9672, "step": 795 }, { "epoch": 0.12180566182096404, "grad_norm": 2.899871208782027, "learning_rate": 1.9950170577489823e-05, "loss": 0.8082, "step": 796 }, { "epoch": 0.12195868400918133, "grad_norm": 2.426459161276832, "learning_rate": 1.9949923185112437e-05, "loss": 0.7931, "step": 797 }, { "epoch": 0.12211170619739863, "grad_norm": 2.840994852998752, "learning_rate": 1.9949675181671343e-05, "loss": 0.7934, "step": 798 }, { "epoch": 0.12226472838561592, "grad_norm": 2.808195404634968, "learning_rate": 1.9949426567181773e-05, "loss": 0.8969, "step": 799 }, { "epoch": 0.12241775057383321, "grad_norm": 2.534032946860274, "learning_rate": 1.9949177341658995e-05, "loss": 0.8507, "step": 800 }, { "epoch": 0.1225707727620505, "grad_norm": 2.9255867633682167, "learning_rate": 1.9948927505118312e-05, "loss": 0.9297, "step": 801 }, { "epoch": 0.12272379495026779, "grad_norm": 2.644371265898017, "learning_rate": 1.9948677057575074e-05, "loss": 0.7543, "step": 802 }, { "epoch": 0.12287681713848508, "grad_norm": 3.1021955628731375, "learning_rate": 1.9948425999044657e-05, "loss": 0.8462, "step": 803 }, { "epoch": 0.12302983932670238, "grad_norm": 2.8070466213969336, "learning_rate": 1.9948174329542483e-05, "loss": 0.8161, "step": 804 }, { "epoch": 0.12318286151491967, "grad_norm": 2.5722394249819294, "learning_rate": 1.9947922049084007e-05, "loss": 0.8785, "step": 805 }, { "epoch": 0.12333588370313696, "grad_norm": 2.872258390308318, "learning_rate": 1.9947669157684718e-05, "loss": 0.9878, "step": 806 }, { "epoch": 0.12348890589135425, "grad_norm": 2.8229511128297933, "learning_rate": 1.9947415655360148e-05, "loss": 0.8757, "step": 807 }, { "epoch": 0.12364192807957154, "grad_norm": 2.974193887323895, "learning_rate": 1.994716154212587e-05, "loss": 0.8089, "step": 808 }, { "epoch": 0.12379495026778883, "grad_norm": 2.452950207328061, "learning_rate": 1.9946906817997495e-05, "loss": 0.8796, "step": 809 }, { "epoch": 0.12394797245600613, "grad_norm": 2.6646128965830385, "learning_rate": 1.9946651482990654e-05, "loss": 0.8494, "step": 810 }, { "epoch": 0.12410099464422342, "grad_norm": 2.974247053047141, "learning_rate": 1.9946395537121033e-05, "loss": 0.8141, "step": 811 }, { "epoch": 0.12425401683244071, "grad_norm": 2.77324872264311, "learning_rate": 1.9946138980404352e-05, "loss": 0.8281, "step": 812 }, { "epoch": 0.124407039020658, "grad_norm": 2.8947035176206204, "learning_rate": 1.994588181285637e-05, "loss": 0.865, "step": 813 }, { "epoch": 0.12456006120887529, "grad_norm": 2.481415993737972, "learning_rate": 1.9945624034492876e-05, "loss": 0.8163, "step": 814 }, { "epoch": 0.12471308339709258, "grad_norm": 2.812818009991962, "learning_rate": 1.99453656453297e-05, "loss": 0.8919, "step": 815 }, { "epoch": 0.12486610558530988, "grad_norm": 2.606521546663759, "learning_rate": 1.9945106645382713e-05, "loss": 0.8239, "step": 816 }, { "epoch": 0.12501912777352717, "grad_norm": 2.7952461518148692, "learning_rate": 1.9944847034667826e-05, "loss": 0.8532, "step": 817 }, { "epoch": 0.12517214996174444, "grad_norm": 2.7255334999000977, "learning_rate": 1.9944586813200975e-05, "loss": 0.8189, "step": 818 }, { "epoch": 0.12532517214996175, "grad_norm": 2.3712871371410578, "learning_rate": 1.9944325980998143e-05, "loss": 0.7809, "step": 819 }, { "epoch": 0.12547819433817903, "grad_norm": 2.7615076995086567, "learning_rate": 1.9944064538075355e-05, "loss": 0.8229, "step": 820 }, { "epoch": 0.12563121652639633, "grad_norm": 2.932395122434818, "learning_rate": 1.994380248444866e-05, "loss": 0.862, "step": 821 }, { "epoch": 0.1257842387146136, "grad_norm": 2.6788048670584175, "learning_rate": 1.994353982013415e-05, "loss": 0.8015, "step": 822 }, { "epoch": 0.12593726090283092, "grad_norm": 3.0278059097501893, "learning_rate": 1.9943276545147966e-05, "loss": 0.9526, "step": 823 }, { "epoch": 0.1260902830910482, "grad_norm": 2.9252691667903608, "learning_rate": 1.9943012659506268e-05, "loss": 0.8529, "step": 824 }, { "epoch": 0.1262433052792655, "grad_norm": 2.8583585269123994, "learning_rate": 1.9942748163225264e-05, "loss": 0.8131, "step": 825 }, { "epoch": 0.12639632746748278, "grad_norm": 3.2704363944279664, "learning_rate": 1.9942483056321204e-05, "loss": 0.8934, "step": 826 }, { "epoch": 0.12654934965570008, "grad_norm": 2.9576396568597705, "learning_rate": 1.994221733881036e-05, "loss": 0.9039, "step": 827 }, { "epoch": 0.12670237184391736, "grad_norm": 2.5940180643661406, "learning_rate": 1.9941951010709054e-05, "loss": 0.891, "step": 828 }, { "epoch": 0.12685539403213467, "grad_norm": 2.7471602663867722, "learning_rate": 1.9941684072033646e-05, "loss": 0.8782, "step": 829 }, { "epoch": 0.12700841622035194, "grad_norm": 3.0403029906730894, "learning_rate": 1.9941416522800528e-05, "loss": 0.8783, "step": 830 }, { "epoch": 0.12716143840856925, "grad_norm": 2.967016562837652, "learning_rate": 1.994114836302613e-05, "loss": 0.9094, "step": 831 }, { "epoch": 0.12731446059678653, "grad_norm": 2.697380123869265, "learning_rate": 1.994087959272692e-05, "loss": 0.8034, "step": 832 }, { "epoch": 0.12746748278500383, "grad_norm": 2.6505703341369, "learning_rate": 1.9940610211919404e-05, "loss": 0.762, "step": 833 }, { "epoch": 0.1276205049732211, "grad_norm": 2.9265591451269377, "learning_rate": 1.9940340220620128e-05, "loss": 0.9365, "step": 834 }, { "epoch": 0.12777352716143842, "grad_norm": 2.6800900011379114, "learning_rate": 1.9940069618845674e-05, "loss": 0.8783, "step": 835 }, { "epoch": 0.1279265493496557, "grad_norm": 2.7506250769096963, "learning_rate": 1.9939798406612657e-05, "loss": 0.8799, "step": 836 }, { "epoch": 0.128079571537873, "grad_norm": 2.77472886449925, "learning_rate": 1.9939526583937736e-05, "loss": 0.7991, "step": 837 }, { "epoch": 0.12823259372609028, "grad_norm": 2.8680315970983177, "learning_rate": 1.9939254150837603e-05, "loss": 0.7995, "step": 838 }, { "epoch": 0.12838561591430758, "grad_norm": 2.6373678887412253, "learning_rate": 1.993898110732899e-05, "loss": 0.8318, "step": 839 }, { "epoch": 0.12853863810252486, "grad_norm": 2.912194374140499, "learning_rate": 1.9938707453428665e-05, "loss": 0.8977, "step": 840 }, { "epoch": 0.12869166029074217, "grad_norm": 2.664614405723492, "learning_rate": 1.9938433189153437e-05, "loss": 0.7767, "step": 841 }, { "epoch": 0.12884468247895944, "grad_norm": 2.787127868229937, "learning_rate": 1.9938158314520145e-05, "loss": 0.882, "step": 842 }, { "epoch": 0.12899770466717675, "grad_norm": 3.050383550626497, "learning_rate": 1.9937882829545673e-05, "loss": 0.916, "step": 843 }, { "epoch": 0.12915072685539403, "grad_norm": 2.749560412191003, "learning_rate": 1.9937606734246943e-05, "loss": 0.8087, "step": 844 }, { "epoch": 0.12930374904361133, "grad_norm": 3.0722048039396954, "learning_rate": 1.9937330028640903e-05, "loss": 0.8669, "step": 845 }, { "epoch": 0.1294567712318286, "grad_norm": 2.7330294561178254, "learning_rate": 1.9937052712744552e-05, "loss": 0.9029, "step": 846 }, { "epoch": 0.12960979342004592, "grad_norm": 2.846524441855076, "learning_rate": 1.993677478657492e-05, "loss": 0.8833, "step": 847 }, { "epoch": 0.1297628156082632, "grad_norm": 2.7819980747272983, "learning_rate": 1.9936496250149077e-05, "loss": 0.8333, "step": 848 }, { "epoch": 0.1299158377964805, "grad_norm": 2.8784712404720016, "learning_rate": 1.9936217103484126e-05, "loss": 0.8171, "step": 849 }, { "epoch": 0.13006885998469778, "grad_norm": 2.6849041458366423, "learning_rate": 1.9935937346597213e-05, "loss": 0.8568, "step": 850 }, { "epoch": 0.13022188217291508, "grad_norm": 2.7137336876141913, "learning_rate": 1.9935656979505518e-05, "loss": 0.7485, "step": 851 }, { "epoch": 0.13037490436113236, "grad_norm": 2.679237726202307, "learning_rate": 1.993537600222626e-05, "loss": 0.8848, "step": 852 }, { "epoch": 0.13052792654934967, "grad_norm": 2.9623536876476013, "learning_rate": 1.993509441477669e-05, "loss": 0.9203, "step": 853 }, { "epoch": 0.13068094873756694, "grad_norm": 3.0078297852392613, "learning_rate": 1.9934812217174112e-05, "loss": 0.8651, "step": 854 }, { "epoch": 0.13083397092578425, "grad_norm": 2.924067755236479, "learning_rate": 1.9934529409435845e-05, "loss": 0.7741, "step": 855 }, { "epoch": 0.13098699311400153, "grad_norm": 2.7098897161661557, "learning_rate": 1.9934245991579265e-05, "loss": 0.8354, "step": 856 }, { "epoch": 0.13114001530221883, "grad_norm": 2.650605982041534, "learning_rate": 1.9933961963621777e-05, "loss": 0.819, "step": 857 }, { "epoch": 0.1312930374904361, "grad_norm": 2.9612775731325844, "learning_rate": 1.993367732558082e-05, "loss": 0.9485, "step": 858 }, { "epoch": 0.13144605967865342, "grad_norm": 2.6114054372319644, "learning_rate": 1.993339207747388e-05, "loss": 0.7404, "step": 859 }, { "epoch": 0.1315990818668707, "grad_norm": 3.0422415925389, "learning_rate": 1.9933106219318474e-05, "loss": 0.8357, "step": 860 }, { "epoch": 0.131752104055088, "grad_norm": 2.898403955243632, "learning_rate": 1.9932819751132153e-05, "loss": 0.78, "step": 861 }, { "epoch": 0.13190512624330528, "grad_norm": 2.9726198703448383, "learning_rate": 1.9932532672932515e-05, "loss": 0.7844, "step": 862 }, { "epoch": 0.13205814843152258, "grad_norm": 2.961682855155522, "learning_rate": 1.993224498473719e-05, "loss": 0.8929, "step": 863 }, { "epoch": 0.13221117061973986, "grad_norm": 2.9954828687652064, "learning_rate": 1.9931956686563848e-05, "loss": 0.9105, "step": 864 }, { "epoch": 0.13236419280795717, "grad_norm": 2.5682006472322163, "learning_rate": 1.9931667778430188e-05, "loss": 0.717, "step": 865 }, { "epoch": 0.13251721499617444, "grad_norm": 2.771936142857185, "learning_rate": 1.9931378260353957e-05, "loss": 0.8252, "step": 866 }, { "epoch": 0.13267023718439175, "grad_norm": 2.6416325626387613, "learning_rate": 1.9931088132352933e-05, "loss": 0.7803, "step": 867 }, { "epoch": 0.13282325937260903, "grad_norm": 2.833863980241223, "learning_rate": 1.993079739444494e-05, "loss": 0.9553, "step": 868 }, { "epoch": 0.13297628156082633, "grad_norm": 2.6645516366644424, "learning_rate": 1.993050604664783e-05, "loss": 0.7632, "step": 869 }, { "epoch": 0.1331293037490436, "grad_norm": 2.7887899146599278, "learning_rate": 1.9930214088979492e-05, "loss": 0.7871, "step": 870 }, { "epoch": 0.13328232593726091, "grad_norm": 2.7303368182841417, "learning_rate": 1.9929921521457865e-05, "loss": 0.8759, "step": 871 }, { "epoch": 0.1334353481254782, "grad_norm": 2.5203669088161336, "learning_rate": 1.9929628344100907e-05, "loss": 0.8028, "step": 872 }, { "epoch": 0.1335883703136955, "grad_norm": 2.7611918709365124, "learning_rate": 1.9929334556926628e-05, "loss": 0.8294, "step": 873 }, { "epoch": 0.13374139250191278, "grad_norm": 2.6936499626342556, "learning_rate": 1.992904015995307e-05, "loss": 0.8079, "step": 874 }, { "epoch": 0.13389441469013008, "grad_norm": 2.593917756784823, "learning_rate": 1.9928745153198313e-05, "loss": 0.7629, "step": 875 }, { "epoch": 0.13404743687834736, "grad_norm": 2.578488093197608, "learning_rate": 1.9928449536680476e-05, "loss": 0.7517, "step": 876 }, { "epoch": 0.13420045906656466, "grad_norm": 2.499522490435344, "learning_rate": 1.9928153310417712e-05, "loss": 0.8801, "step": 877 }, { "epoch": 0.13435348125478194, "grad_norm": 2.867320358801422, "learning_rate": 1.9927856474428215e-05, "loss": 0.9224, "step": 878 }, { "epoch": 0.13450650344299925, "grad_norm": 2.5535956136527846, "learning_rate": 1.9927559028730212e-05, "loss": 0.7598, "step": 879 }, { "epoch": 0.13465952563121653, "grad_norm": 2.779439531223504, "learning_rate": 1.992726097334197e-05, "loss": 0.9068, "step": 880 }, { "epoch": 0.13481254781943383, "grad_norm": 2.821228950007975, "learning_rate": 1.9926962308281802e-05, "loss": 0.7944, "step": 881 }, { "epoch": 0.1349655700076511, "grad_norm": 2.6330247037928824, "learning_rate": 1.992666303356804e-05, "loss": 0.7822, "step": 882 }, { "epoch": 0.13511859219586841, "grad_norm": 2.840038821329292, "learning_rate": 1.992636314921907e-05, "loss": 0.8301, "step": 883 }, { "epoch": 0.1352716143840857, "grad_norm": 2.6793404976515434, "learning_rate": 1.9926062655253305e-05, "loss": 0.7915, "step": 884 }, { "epoch": 0.135424636572303, "grad_norm": 2.689530769295994, "learning_rate": 1.9925761551689203e-05, "loss": 0.9103, "step": 885 }, { "epoch": 0.13557765876052028, "grad_norm": 2.887001810355891, "learning_rate": 1.9925459838545252e-05, "loss": 0.8768, "step": 886 }, { "epoch": 0.13573068094873755, "grad_norm": 2.877012186365154, "learning_rate": 1.9925157515839984e-05, "loss": 0.8887, "step": 887 }, { "epoch": 0.13588370313695486, "grad_norm": 2.8588587834893406, "learning_rate": 1.992485458359197e-05, "loss": 0.8792, "step": 888 }, { "epoch": 0.13603672532517214, "grad_norm": 3.1905984297528964, "learning_rate": 1.9924551041819807e-05, "loss": 0.895, "step": 889 }, { "epoch": 0.13618974751338944, "grad_norm": 2.7402318928244083, "learning_rate": 1.9924246890542137e-05, "loss": 0.8693, "step": 890 }, { "epoch": 0.13634276970160672, "grad_norm": 2.818887709481401, "learning_rate": 1.9923942129777644e-05, "loss": 0.7851, "step": 891 }, { "epoch": 0.13649579188982403, "grad_norm": 2.6259577978312407, "learning_rate": 1.992363675954504e-05, "loss": 0.9576, "step": 892 }, { "epoch": 0.1366488140780413, "grad_norm": 2.863449512098866, "learning_rate": 1.9923330779863084e-05, "loss": 0.9511, "step": 893 }, { "epoch": 0.1368018362662586, "grad_norm": 2.562371413427193, "learning_rate": 1.992302419075056e-05, "loss": 0.8163, "step": 894 }, { "epoch": 0.1369548584544759, "grad_norm": 2.8870573577788727, "learning_rate": 1.992271699222631e-05, "loss": 0.9528, "step": 895 }, { "epoch": 0.1371078806426932, "grad_norm": 2.6937895637721723, "learning_rate": 1.9922409184309184e-05, "loss": 0.8091, "step": 896 }, { "epoch": 0.13726090283091047, "grad_norm": 2.6630413615518163, "learning_rate": 1.9922100767018095e-05, "loss": 0.7998, "step": 897 }, { "epoch": 0.13741392501912778, "grad_norm": 2.625511387398233, "learning_rate": 1.9921791740371982e-05, "loss": 0.9357, "step": 898 }, { "epoch": 0.13756694720734505, "grad_norm": 2.758458249926434, "learning_rate": 1.9921482104389827e-05, "loss": 0.8647, "step": 899 }, { "epoch": 0.13771996939556236, "grad_norm": 2.976298176316275, "learning_rate": 1.992117185909064e-05, "loss": 0.7932, "step": 900 }, { "epoch": 0.13787299158377964, "grad_norm": 2.68384833081395, "learning_rate": 1.9920861004493477e-05, "loss": 0.9454, "step": 901 }, { "epoch": 0.13802601377199694, "grad_norm": 2.7413100902098653, "learning_rate": 1.992054954061743e-05, "loss": 0.8505, "step": 902 }, { "epoch": 0.13817903596021422, "grad_norm": 3.218955544647937, "learning_rate": 1.9920237467481628e-05, "loss": 0.9033, "step": 903 }, { "epoch": 0.13833205814843152, "grad_norm": 2.5946224723970035, "learning_rate": 1.9919924785105235e-05, "loss": 0.7389, "step": 904 }, { "epoch": 0.1384850803366488, "grad_norm": 2.6176807752494726, "learning_rate": 1.991961149350745e-05, "loss": 0.8513, "step": 905 }, { "epoch": 0.1386381025248661, "grad_norm": 3.1291931843932335, "learning_rate": 1.991929759270752e-05, "loss": 0.8245, "step": 906 }, { "epoch": 0.13879112471308339, "grad_norm": 2.8736841925384025, "learning_rate": 1.991898308272472e-05, "loss": 0.9262, "step": 907 }, { "epoch": 0.1389441469013007, "grad_norm": 2.609901479819786, "learning_rate": 1.9918667963578368e-05, "loss": 0.783, "step": 908 }, { "epoch": 0.13909716908951797, "grad_norm": 2.6331118876099873, "learning_rate": 1.991835223528781e-05, "loss": 0.7673, "step": 909 }, { "epoch": 0.13925019127773527, "grad_norm": 2.9613767339162305, "learning_rate": 1.9918035897872445e-05, "loss": 0.8643, "step": 910 }, { "epoch": 0.13940321346595255, "grad_norm": 3.039493882439516, "learning_rate": 1.9917718951351692e-05, "loss": 0.9873, "step": 911 }, { "epoch": 0.13955623565416986, "grad_norm": 3.239850256883201, "learning_rate": 1.991740139574502e-05, "loss": 0.884, "step": 912 }, { "epoch": 0.13970925784238714, "grad_norm": 2.8892605380358294, "learning_rate": 1.9917083231071933e-05, "loss": 0.8356, "step": 913 }, { "epoch": 0.13986228003060444, "grad_norm": 3.2132002955197034, "learning_rate": 1.991676445735197e-05, "loss": 0.837, "step": 914 }, { "epoch": 0.14001530221882172, "grad_norm": 3.2006127364915913, "learning_rate": 1.9916445074604705e-05, "loss": 0.8063, "step": 915 }, { "epoch": 0.14016832440703902, "grad_norm": 2.899427273902653, "learning_rate": 1.9916125082849755e-05, "loss": 0.7584, "step": 916 }, { "epoch": 0.1403213465952563, "grad_norm": 2.914187996315459, "learning_rate": 1.991580448210677e-05, "loss": 0.8503, "step": 917 }, { "epoch": 0.1404743687834736, "grad_norm": 2.804034994852326, "learning_rate": 1.9915483272395445e-05, "loss": 0.8292, "step": 918 }, { "epoch": 0.14062739097169089, "grad_norm": 2.680970492007158, "learning_rate": 1.99151614537355e-05, "loss": 0.9034, "step": 919 }, { "epoch": 0.1407804131599082, "grad_norm": 2.7084201578294516, "learning_rate": 1.9914839026146702e-05, "loss": 0.8518, "step": 920 }, { "epoch": 0.14093343534812547, "grad_norm": 2.553189897331636, "learning_rate": 1.9914515989648852e-05, "loss": 0.7878, "step": 921 }, { "epoch": 0.14108645753634277, "grad_norm": 3.050378968443345, "learning_rate": 1.991419234426179e-05, "loss": 0.784, "step": 922 }, { "epoch": 0.14123947972456005, "grad_norm": 2.977989619703046, "learning_rate": 1.991386809000539e-05, "loss": 1.0098, "step": 923 }, { "epoch": 0.14139250191277736, "grad_norm": 2.8242683639181667, "learning_rate": 1.991354322689957e-05, "loss": 0.8582, "step": 924 }, { "epoch": 0.14154552410099464, "grad_norm": 2.7128793061444596, "learning_rate": 1.991321775496428e-05, "loss": 0.7909, "step": 925 }, { "epoch": 0.14169854628921194, "grad_norm": 2.9979822954093884, "learning_rate": 1.9912891674219502e-05, "loss": 0.9373, "step": 926 }, { "epoch": 0.14185156847742922, "grad_norm": 2.9482931261488936, "learning_rate": 1.991256498468527e-05, "loss": 0.6891, "step": 927 }, { "epoch": 0.14200459066564652, "grad_norm": 3.0299388842655723, "learning_rate": 1.9912237686381643e-05, "loss": 0.8743, "step": 928 }, { "epoch": 0.1421576128538638, "grad_norm": 2.6801748229278917, "learning_rate": 1.991190977932872e-05, "loss": 0.863, "step": 929 }, { "epoch": 0.1423106350420811, "grad_norm": 3.085060897266117, "learning_rate": 1.9911581263546643e-05, "loss": 0.8992, "step": 930 }, { "epoch": 0.14246365723029839, "grad_norm": 2.800811746754135, "learning_rate": 1.991125213905559e-05, "loss": 0.9399, "step": 931 }, { "epoch": 0.1426166794185157, "grad_norm": 2.830458349120088, "learning_rate": 1.991092240587577e-05, "loss": 0.9702, "step": 932 }, { "epoch": 0.14276970160673297, "grad_norm": 2.8432982293955016, "learning_rate": 1.991059206402743e-05, "loss": 0.8011, "step": 933 }, { "epoch": 0.14292272379495027, "grad_norm": 2.739167967766181, "learning_rate": 1.9910261113530863e-05, "loss": 0.8164, "step": 934 }, { "epoch": 0.14307574598316755, "grad_norm": 2.563160167423824, "learning_rate": 1.9909929554406388e-05, "loss": 0.7779, "step": 935 }, { "epoch": 0.14322876817138486, "grad_norm": 3.186579436293255, "learning_rate": 1.9909597386674374e-05, "loss": 0.7571, "step": 936 }, { "epoch": 0.14338179035960213, "grad_norm": 2.708795534392877, "learning_rate": 1.990926461035522e-05, "loss": 0.8351, "step": 937 }, { "epoch": 0.14353481254781944, "grad_norm": 2.5425722820092065, "learning_rate": 1.990893122546936e-05, "loss": 0.8501, "step": 938 }, { "epoch": 0.14368783473603672, "grad_norm": 2.670809594852604, "learning_rate": 1.990859723203727e-05, "loss": 0.7912, "step": 939 }, { "epoch": 0.14384085692425402, "grad_norm": 3.1185793441810232, "learning_rate": 1.9908262630079454e-05, "loss": 0.8205, "step": 940 }, { "epoch": 0.1439938791124713, "grad_norm": 2.731495965280675, "learning_rate": 1.9907927419616477e-05, "loss": 0.7788, "step": 941 }, { "epoch": 0.1441469013006886, "grad_norm": 3.275322213863118, "learning_rate": 1.9907591600668916e-05, "loss": 1.0141, "step": 942 }, { "epoch": 0.14429992348890588, "grad_norm": 2.6128430204439765, "learning_rate": 1.9907255173257393e-05, "loss": 0.9174, "step": 943 }, { "epoch": 0.1444529456771232, "grad_norm": 2.908802026725082, "learning_rate": 1.9906918137402574e-05, "loss": 0.8608, "step": 944 }, { "epoch": 0.14460596786534047, "grad_norm": 2.7143909626282055, "learning_rate": 1.990658049312516e-05, "loss": 0.772, "step": 945 }, { "epoch": 0.14475899005355777, "grad_norm": 2.7624040519127706, "learning_rate": 1.9906242240445878e-05, "loss": 0.9278, "step": 946 }, { "epoch": 0.14491201224177505, "grad_norm": 2.7166448739759246, "learning_rate": 1.990590337938551e-05, "loss": 0.8404, "step": 947 }, { "epoch": 0.14506503442999236, "grad_norm": 2.9800310838070847, "learning_rate": 1.990556390996486e-05, "loss": 0.9234, "step": 948 }, { "epoch": 0.14521805661820963, "grad_norm": 3.0606738732407477, "learning_rate": 1.990522383220478e-05, "loss": 0.8102, "step": 949 }, { "epoch": 0.14537107880642694, "grad_norm": 3.1187386786744176, "learning_rate": 1.9904883146126157e-05, "loss": 0.7964, "step": 950 }, { "epoch": 0.14552410099464422, "grad_norm": 2.848220059970956, "learning_rate": 1.990454185174991e-05, "loss": 0.897, "step": 951 }, { "epoch": 0.14567712318286152, "grad_norm": 2.6548873702733613, "learning_rate": 1.9904199949097e-05, "loss": 0.7779, "step": 952 }, { "epoch": 0.1458301453710788, "grad_norm": 2.846580038404024, "learning_rate": 1.990385743818843e-05, "loss": 0.8872, "step": 953 }, { "epoch": 0.1459831675592961, "grad_norm": 3.1386716203357143, "learning_rate": 1.9903514319045224e-05, "loss": 0.981, "step": 954 }, { "epoch": 0.14613618974751338, "grad_norm": 2.6161336399690107, "learning_rate": 1.990317059168847e-05, "loss": 0.8891, "step": 955 }, { "epoch": 0.1462892119357307, "grad_norm": 2.9030163643864477, "learning_rate": 1.990282625613926e-05, "loss": 0.9119, "step": 956 }, { "epoch": 0.14644223412394797, "grad_norm": 2.6876433319201656, "learning_rate": 1.9902481312418754e-05, "loss": 0.9507, "step": 957 }, { "epoch": 0.14659525631216527, "grad_norm": 2.654743185149196, "learning_rate": 1.990213576054813e-05, "loss": 0.7974, "step": 958 }, { "epoch": 0.14674827850038255, "grad_norm": 2.444896860388207, "learning_rate": 1.9901789600548612e-05, "loss": 0.7975, "step": 959 }, { "epoch": 0.14690130068859986, "grad_norm": 2.6102754444080176, "learning_rate": 1.990144283244146e-05, "loss": 0.8526, "step": 960 }, { "epoch": 0.14705432287681713, "grad_norm": 2.3982164679534983, "learning_rate": 1.990109545624797e-05, "loss": 0.7627, "step": 961 }, { "epoch": 0.14720734506503444, "grad_norm": 2.816740248073328, "learning_rate": 1.990074747198947e-05, "loss": 0.8622, "step": 962 }, { "epoch": 0.14736036725325172, "grad_norm": 2.3312900266202554, "learning_rate": 1.9900398879687343e-05, "loss": 0.6515, "step": 963 }, { "epoch": 0.14751338944146902, "grad_norm": 3.27051923019561, "learning_rate": 1.9900049679362982e-05, "loss": 0.9881, "step": 964 }, { "epoch": 0.1476664116296863, "grad_norm": 2.515455754322227, "learning_rate": 1.9899699871037847e-05, "loss": 0.8661, "step": 965 }, { "epoch": 0.1478194338179036, "grad_norm": 2.814087317408005, "learning_rate": 1.989934945473341e-05, "loss": 0.8411, "step": 966 }, { "epoch": 0.14797245600612088, "grad_norm": 2.8017718511765213, "learning_rate": 1.9898998430471202e-05, "loss": 0.9098, "step": 967 }, { "epoch": 0.1481254781943382, "grad_norm": 2.629421952387108, "learning_rate": 1.9898646798272773e-05, "loss": 0.8979, "step": 968 }, { "epoch": 0.14827850038255547, "grad_norm": 2.841954083166271, "learning_rate": 1.9898294558159722e-05, "loss": 0.899, "step": 969 }, { "epoch": 0.14843152257077277, "grad_norm": 2.8553527981636013, "learning_rate": 1.9897941710153677e-05, "loss": 0.8999, "step": 970 }, { "epoch": 0.14858454475899005, "grad_norm": 2.813663450639181, "learning_rate": 1.989758825427631e-05, "loss": 0.8266, "step": 971 }, { "epoch": 0.14873756694720736, "grad_norm": 2.8659970900711063, "learning_rate": 1.989723419054933e-05, "loss": 0.7943, "step": 972 }, { "epoch": 0.14889058913542463, "grad_norm": 2.7945735429929965, "learning_rate": 1.9896879518994483e-05, "loss": 0.794, "step": 973 }, { "epoch": 0.14904361132364194, "grad_norm": 2.7096673500989565, "learning_rate": 1.9896524239633543e-05, "loss": 0.7983, "step": 974 }, { "epoch": 0.14919663351185922, "grad_norm": 3.053720034752578, "learning_rate": 1.9896168352488336e-05, "loss": 0.9787, "step": 975 }, { "epoch": 0.14934965570007652, "grad_norm": 2.6802833840317244, "learning_rate": 1.9895811857580717e-05, "loss": 0.8805, "step": 976 }, { "epoch": 0.1495026778882938, "grad_norm": 2.9100073841921876, "learning_rate": 1.989545475493258e-05, "loss": 0.9091, "step": 977 }, { "epoch": 0.1496557000765111, "grad_norm": 2.8957628677948177, "learning_rate": 1.9895097044565853e-05, "loss": 0.8288, "step": 978 }, { "epoch": 0.14980872226472838, "grad_norm": 3.038264240319414, "learning_rate": 1.989473872650251e-05, "loss": 0.9798, "step": 979 }, { "epoch": 0.1499617444529457, "grad_norm": 2.5609975366129483, "learning_rate": 1.9894379800764548e-05, "loss": 0.7845, "step": 980 }, { "epoch": 0.15011476664116297, "grad_norm": 2.7823981767835066, "learning_rate": 1.9894020267374025e-05, "loss": 0.7783, "step": 981 }, { "epoch": 0.15026778882938027, "grad_norm": 3.3243553824353214, "learning_rate": 1.9893660126353002e-05, "loss": 1.0154, "step": 982 }, { "epoch": 0.15042081101759755, "grad_norm": 2.480366976477657, "learning_rate": 1.9893299377723608e-05, "loss": 0.7665, "step": 983 }, { "epoch": 0.15057383320581486, "grad_norm": 2.571561037126822, "learning_rate": 1.9892938021508e-05, "loss": 0.8231, "step": 984 }, { "epoch": 0.15072685539403213, "grad_norm": 3.066861871830307, "learning_rate": 1.9892576057728366e-05, "loss": 0.8987, "step": 985 }, { "epoch": 0.15087987758224944, "grad_norm": 3.0035198245097923, "learning_rate": 1.9892213486406937e-05, "loss": 0.8285, "step": 986 }, { "epoch": 0.15103289977046672, "grad_norm": 2.4764522581212387, "learning_rate": 1.9891850307565976e-05, "loss": 0.7679, "step": 987 }, { "epoch": 0.15118592195868402, "grad_norm": 2.7267601838920257, "learning_rate": 1.989148652122779e-05, "loss": 0.8122, "step": 988 }, { "epoch": 0.1513389441469013, "grad_norm": 2.5889920763864374, "learning_rate": 1.9891122127414725e-05, "loss": 0.7737, "step": 989 }, { "epoch": 0.1514919663351186, "grad_norm": 2.8995325510612138, "learning_rate": 1.9890757126149154e-05, "loss": 0.9062, "step": 990 }, { "epoch": 0.15164498852333588, "grad_norm": 2.7144127871621495, "learning_rate": 1.9890391517453495e-05, "loss": 0.8394, "step": 991 }, { "epoch": 0.1517980107115532, "grad_norm": 2.9785649600008, "learning_rate": 1.9890025301350202e-05, "loss": 0.7811, "step": 992 }, { "epoch": 0.15195103289977047, "grad_norm": 2.7814497882213547, "learning_rate": 1.9889658477861764e-05, "loss": 0.7973, "step": 993 }, { "epoch": 0.15210405508798777, "grad_norm": 3.16891375953589, "learning_rate": 1.9889291047010713e-05, "loss": 0.8412, "step": 994 }, { "epoch": 0.15225707727620505, "grad_norm": 2.5061691308106853, "learning_rate": 1.9888923008819607e-05, "loss": 0.7916, "step": 995 }, { "epoch": 0.15241009946442236, "grad_norm": 2.863002070757968, "learning_rate": 1.9888554363311058e-05, "loss": 0.8654, "step": 996 }, { "epoch": 0.15256312165263963, "grad_norm": 2.6516947662903343, "learning_rate": 1.9888185110507702e-05, "loss": 0.8693, "step": 997 }, { "epoch": 0.1527161438408569, "grad_norm": 2.655655971232325, "learning_rate": 1.988781525043221e-05, "loss": 0.8319, "step": 998 }, { "epoch": 0.15286916602907422, "grad_norm": 2.450582508006812, "learning_rate": 1.9887444783107302e-05, "loss": 0.8347, "step": 999 }, { "epoch": 0.1530221882172915, "grad_norm": 2.670839990441853, "learning_rate": 1.9887073708555736e-05, "loss": 0.8599, "step": 1000 }, { "epoch": 0.1531752104055088, "grad_norm": 2.3507137132115736, "learning_rate": 1.9886702026800295e-05, "loss": 0.7578, "step": 1001 }, { "epoch": 0.15332823259372608, "grad_norm": 2.7099399060859444, "learning_rate": 1.98863297378638e-05, "loss": 0.8655, "step": 1002 }, { "epoch": 0.15348125478194338, "grad_norm": 2.5790296307424883, "learning_rate": 1.9885956841769124e-05, "loss": 0.8469, "step": 1003 }, { "epoch": 0.15363427697016066, "grad_norm": 2.265260853462987, "learning_rate": 1.9885583338539162e-05, "loss": 0.8579, "step": 1004 }, { "epoch": 0.15378729915837797, "grad_norm": 2.4073495126193305, "learning_rate": 1.9885209228196854e-05, "loss": 0.7811, "step": 1005 }, { "epoch": 0.15394032134659524, "grad_norm": 2.736856123838533, "learning_rate": 1.988483451076518e-05, "loss": 0.8012, "step": 1006 }, { "epoch": 0.15409334353481255, "grad_norm": 2.9918761922844714, "learning_rate": 1.988445918626715e-05, "loss": 0.9105, "step": 1007 }, { "epoch": 0.15424636572302983, "grad_norm": 2.770423326239471, "learning_rate": 1.9884083254725808e-05, "loss": 0.862, "step": 1008 }, { "epoch": 0.15439938791124713, "grad_norm": 2.596810493782701, "learning_rate": 1.988370671616425e-05, "loss": 0.8444, "step": 1009 }, { "epoch": 0.1545524100994644, "grad_norm": 2.570741057177668, "learning_rate": 1.9883329570605594e-05, "loss": 0.835, "step": 1010 }, { "epoch": 0.15470543228768172, "grad_norm": 2.4655694875759706, "learning_rate": 1.9882951818073008e-05, "loss": 0.9392, "step": 1011 }, { "epoch": 0.154858454475899, "grad_norm": 2.881513696814611, "learning_rate": 1.9882573458589687e-05, "loss": 0.871, "step": 1012 }, { "epoch": 0.1550114766641163, "grad_norm": 3.1776782245839135, "learning_rate": 1.988219449217887e-05, "loss": 0.7849, "step": 1013 }, { "epoch": 0.15516449885233358, "grad_norm": 2.911172268228049, "learning_rate": 1.988181491886383e-05, "loss": 0.8837, "step": 1014 }, { "epoch": 0.15531752104055088, "grad_norm": 2.4686030359884743, "learning_rate": 1.9881434738667877e-05, "loss": 0.8141, "step": 1015 }, { "epoch": 0.15547054322876816, "grad_norm": 2.897765487407931, "learning_rate": 1.988105395161436e-05, "loss": 0.9853, "step": 1016 }, { "epoch": 0.15562356541698547, "grad_norm": 2.71964317721296, "learning_rate": 1.9880672557726667e-05, "loss": 0.7614, "step": 1017 }, { "epoch": 0.15577658760520274, "grad_norm": 2.9302612921371485, "learning_rate": 1.9880290557028215e-05, "loss": 0.8674, "step": 1018 }, { "epoch": 0.15592960979342005, "grad_norm": 2.7434432525327455, "learning_rate": 1.9879907949542475e-05, "loss": 0.8911, "step": 1019 }, { "epoch": 0.15608263198163733, "grad_norm": 3.1889761223051107, "learning_rate": 1.987952473529293e-05, "loss": 0.8377, "step": 1020 }, { "epoch": 0.15623565416985463, "grad_norm": 2.81375672067027, "learning_rate": 1.987914091430313e-05, "loss": 0.8919, "step": 1021 }, { "epoch": 0.1563886763580719, "grad_norm": 2.8180296209140563, "learning_rate": 1.987875648659663e-05, "loss": 0.9621, "step": 1022 }, { "epoch": 0.15654169854628922, "grad_norm": 3.2066379325461916, "learning_rate": 1.9878371452197053e-05, "loss": 0.957, "step": 1023 }, { "epoch": 0.1566947207345065, "grad_norm": 2.6277701752914204, "learning_rate": 1.987798581112804e-05, "loss": 0.8304, "step": 1024 }, { "epoch": 0.1568477429227238, "grad_norm": 2.6966984629555903, "learning_rate": 1.9877599563413277e-05, "loss": 0.7235, "step": 1025 }, { "epoch": 0.15700076511094108, "grad_norm": 2.73955117421016, "learning_rate": 1.987721270907648e-05, "loss": 0.7999, "step": 1026 }, { "epoch": 0.15715378729915838, "grad_norm": 3.725546674908976, "learning_rate": 1.9876825248141413e-05, "loss": 0.8339, "step": 1027 }, { "epoch": 0.15730680948737566, "grad_norm": 2.578167705290565, "learning_rate": 1.9876437180631873e-05, "loss": 0.7681, "step": 1028 }, { "epoch": 0.15745983167559297, "grad_norm": 2.7771069205470837, "learning_rate": 1.987604850657168e-05, "loss": 0.7977, "step": 1029 }, { "epoch": 0.15761285386381024, "grad_norm": 2.8394656456803653, "learning_rate": 1.987565922598472e-05, "loss": 0.7927, "step": 1030 }, { "epoch": 0.15776587605202755, "grad_norm": 2.6510579492848088, "learning_rate": 1.9875269338894888e-05, "loss": 0.725, "step": 1031 }, { "epoch": 0.15791889824024483, "grad_norm": 2.837067274020622, "learning_rate": 1.9874878845326137e-05, "loss": 0.9011, "step": 1032 }, { "epoch": 0.15807192042846213, "grad_norm": 2.8520975345419792, "learning_rate": 1.9874487745302446e-05, "loss": 0.8493, "step": 1033 }, { "epoch": 0.1582249426166794, "grad_norm": 3.1030948946703525, "learning_rate": 1.9874096038847834e-05, "loss": 1.0212, "step": 1034 }, { "epoch": 0.15837796480489671, "grad_norm": 2.9449253861471143, "learning_rate": 1.9873703725986353e-05, "loss": 0.9892, "step": 1035 }, { "epoch": 0.158530986993114, "grad_norm": 2.6094643163438906, "learning_rate": 1.9873310806742103e-05, "loss": 0.8207, "step": 1036 }, { "epoch": 0.1586840091813313, "grad_norm": 2.8265307957870913, "learning_rate": 1.9872917281139208e-05, "loss": 0.79, "step": 1037 }, { "epoch": 0.15883703136954858, "grad_norm": 2.900708034355136, "learning_rate": 1.9872523149201844e-05, "loss": 0.9009, "step": 1038 }, { "epoch": 0.15899005355776588, "grad_norm": 2.8149903877112554, "learning_rate": 1.987212841095421e-05, "loss": 0.9657, "step": 1039 }, { "epoch": 0.15914307574598316, "grad_norm": 2.8216182032358788, "learning_rate": 1.9871733066420548e-05, "loss": 0.916, "step": 1040 }, { "epoch": 0.15929609793420046, "grad_norm": 3.0351034690911467, "learning_rate": 1.9871337115625146e-05, "loss": 0.8505, "step": 1041 }, { "epoch": 0.15944912012241774, "grad_norm": 2.4801658941328246, "learning_rate": 1.9870940558592312e-05, "loss": 0.8227, "step": 1042 }, { "epoch": 0.15960214231063505, "grad_norm": 2.745895940283436, "learning_rate": 1.98705433953464e-05, "loss": 0.6975, "step": 1043 }, { "epoch": 0.15975516449885233, "grad_norm": 2.8931887662338664, "learning_rate": 1.9870145625911806e-05, "loss": 0.8135, "step": 1044 }, { "epoch": 0.15990818668706963, "grad_norm": 3.020183308577327, "learning_rate": 1.9869747250312956e-05, "loss": 0.9374, "step": 1045 }, { "epoch": 0.1600612088752869, "grad_norm": 2.91238434506869, "learning_rate": 1.986934826857432e-05, "loss": 0.7985, "step": 1046 }, { "epoch": 0.16021423106350421, "grad_norm": 2.8074374602676264, "learning_rate": 1.9868948680720396e-05, "loss": 0.762, "step": 1047 }, { "epoch": 0.1603672532517215, "grad_norm": 2.8655970184823967, "learning_rate": 1.9868548486775726e-05, "loss": 0.9527, "step": 1048 }, { "epoch": 0.1605202754399388, "grad_norm": 3.3510217577059924, "learning_rate": 1.9868147686764887e-05, "loss": 0.9492, "step": 1049 }, { "epoch": 0.16067329762815608, "grad_norm": 2.628760206614971, "learning_rate": 1.9867746280712494e-05, "loss": 0.7946, "step": 1050 }, { "epoch": 0.16082631981637338, "grad_norm": 2.70806765868512, "learning_rate": 1.98673442686432e-05, "loss": 0.8205, "step": 1051 }, { "epoch": 0.16097934200459066, "grad_norm": 2.4609824531759905, "learning_rate": 1.986694165058169e-05, "loss": 0.8578, "step": 1052 }, { "epoch": 0.16113236419280796, "grad_norm": 3.141891000216467, "learning_rate": 1.9866538426552698e-05, "loss": 0.9828, "step": 1053 }, { "epoch": 0.16128538638102524, "grad_norm": 3.061783048506876, "learning_rate": 1.986613459658098e-05, "loss": 0.8136, "step": 1054 }, { "epoch": 0.16143840856924255, "grad_norm": 2.6758943412455745, "learning_rate": 1.9865730160691343e-05, "loss": 0.8794, "step": 1055 }, { "epoch": 0.16159143075745983, "grad_norm": 2.7201604131125876, "learning_rate": 1.986532511890862e-05, "loss": 0.9024, "step": 1056 }, { "epoch": 0.16174445294567713, "grad_norm": 3.5299447043757337, "learning_rate": 1.9864919471257685e-05, "loss": 0.8843, "step": 1057 }, { "epoch": 0.1618974751338944, "grad_norm": 2.801548938363029, "learning_rate": 1.9864513217763458e-05, "loss": 0.8217, "step": 1058 }, { "epoch": 0.16205049732211171, "grad_norm": 2.5274711526613194, "learning_rate": 1.9864106358450884e-05, "loss": 0.8106, "step": 1059 }, { "epoch": 0.162203519510329, "grad_norm": 2.7733624193991093, "learning_rate": 1.9863698893344947e-05, "loss": 0.7936, "step": 1060 }, { "epoch": 0.1623565416985463, "grad_norm": 2.5893301183647273, "learning_rate": 1.9863290822470675e-05, "loss": 0.792, "step": 1061 }, { "epoch": 0.16250956388676358, "grad_norm": 2.8266067518782934, "learning_rate": 1.9862882145853127e-05, "loss": 0.9559, "step": 1062 }, { "epoch": 0.16266258607498088, "grad_norm": 2.951855197304437, "learning_rate": 1.986247286351741e-05, "loss": 0.8523, "step": 1063 }, { "epoch": 0.16281560826319816, "grad_norm": 2.378517248975909, "learning_rate": 1.9862062975488645e-05, "loss": 0.8576, "step": 1064 }, { "epoch": 0.16296863045141546, "grad_norm": 2.795790531253483, "learning_rate": 1.986165248179201e-05, "loss": 0.8383, "step": 1065 }, { "epoch": 0.16312165263963274, "grad_norm": 2.618907367926037, "learning_rate": 1.9861241382452724e-05, "loss": 0.8413, "step": 1066 }, { "epoch": 0.16327467482785005, "grad_norm": 2.8494588393913336, "learning_rate": 1.9860829677496024e-05, "loss": 0.8867, "step": 1067 }, { "epoch": 0.16342769701606732, "grad_norm": 2.5805689918609773, "learning_rate": 1.98604173669472e-05, "loss": 0.8482, "step": 1068 }, { "epoch": 0.16358071920428463, "grad_norm": 2.711715960933929, "learning_rate": 1.9860004450831566e-05, "loss": 0.935, "step": 1069 }, { "epoch": 0.1637337413925019, "grad_norm": 2.7090531671259797, "learning_rate": 1.9859590929174487e-05, "loss": 0.7718, "step": 1070 }, { "epoch": 0.1638867635807192, "grad_norm": 2.6128893829365705, "learning_rate": 1.985917680200136e-05, "loss": 0.9498, "step": 1071 }, { "epoch": 0.1640397857689365, "grad_norm": 2.8396674469625998, "learning_rate": 1.985876206933762e-05, "loss": 0.9958, "step": 1072 }, { "epoch": 0.1641928079571538, "grad_norm": 2.8043208696375586, "learning_rate": 1.9858346731208732e-05, "loss": 0.8735, "step": 1073 }, { "epoch": 0.16434583014537107, "grad_norm": 2.684593665840827, "learning_rate": 1.98579307876402e-05, "loss": 0.8892, "step": 1074 }, { "epoch": 0.16449885233358838, "grad_norm": 2.596969620943833, "learning_rate": 1.9857514238657576e-05, "loss": 0.8741, "step": 1075 }, { "epoch": 0.16465187452180566, "grad_norm": 2.9338466904139917, "learning_rate": 1.9857097084286445e-05, "loss": 0.9696, "step": 1076 }, { "epoch": 0.16480489671002296, "grad_norm": 2.6540784341987087, "learning_rate": 1.9856679324552415e-05, "loss": 0.8988, "step": 1077 }, { "epoch": 0.16495791889824024, "grad_norm": 2.7019156426629034, "learning_rate": 1.985626095948115e-05, "loss": 0.8065, "step": 1078 }, { "epoch": 0.16511094108645755, "grad_norm": 2.615891811054439, "learning_rate": 1.9855841989098343e-05, "loss": 0.7563, "step": 1079 }, { "epoch": 0.16526396327467482, "grad_norm": 2.409266564450927, "learning_rate": 1.985542241342972e-05, "loss": 0.8721, "step": 1080 }, { "epoch": 0.16541698546289213, "grad_norm": 2.5645546399602397, "learning_rate": 1.985500223250105e-05, "loss": 0.7746, "step": 1081 }, { "epoch": 0.1655700076511094, "grad_norm": 2.606535862888543, "learning_rate": 1.985458144633815e-05, "loss": 0.9245, "step": 1082 }, { "epoch": 0.1657230298393267, "grad_norm": 3.102399821161681, "learning_rate": 1.9854160054966845e-05, "loss": 0.8806, "step": 1083 }, { "epoch": 0.165876052027544, "grad_norm": 2.5601279913055994, "learning_rate": 1.985373805841302e-05, "loss": 0.8015, "step": 1084 }, { "epoch": 0.1660290742157613, "grad_norm": 2.61724976207117, "learning_rate": 1.9853315456702594e-05, "loss": 0.7555, "step": 1085 }, { "epoch": 0.16618209640397857, "grad_norm": 2.725383208135442, "learning_rate": 1.9852892249861522e-05, "loss": 0.8368, "step": 1086 }, { "epoch": 0.16633511859219588, "grad_norm": 2.765961715816743, "learning_rate": 1.985246843791579e-05, "loss": 0.8246, "step": 1087 }, { "epoch": 0.16648814078041316, "grad_norm": 2.8936965697100505, "learning_rate": 1.985204402089143e-05, "loss": 0.8021, "step": 1088 }, { "epoch": 0.16664116296863046, "grad_norm": 2.713935819782728, "learning_rate": 1.9851618998814503e-05, "loss": 0.7641, "step": 1089 }, { "epoch": 0.16679418515684774, "grad_norm": 2.7965825996455957, "learning_rate": 1.9851193371711113e-05, "loss": 0.8254, "step": 1090 }, { "epoch": 0.16694720734506505, "grad_norm": 2.695814350476115, "learning_rate": 1.9850767139607404e-05, "loss": 0.8409, "step": 1091 }, { "epoch": 0.16710022953328232, "grad_norm": 2.8587205196165515, "learning_rate": 1.9850340302529545e-05, "loss": 0.8999, "step": 1092 }, { "epoch": 0.16725325172149963, "grad_norm": 2.616711026159911, "learning_rate": 1.9849912860503753e-05, "loss": 0.8085, "step": 1093 }, { "epoch": 0.1674062739097169, "grad_norm": 2.942385943591848, "learning_rate": 1.984948481355628e-05, "loss": 0.9305, "step": 1094 }, { "epoch": 0.1675592960979342, "grad_norm": 2.8217129762291595, "learning_rate": 1.9849056161713415e-05, "loss": 0.8427, "step": 1095 }, { "epoch": 0.1677123182861515, "grad_norm": 2.92693636303928, "learning_rate": 1.984862690500148e-05, "loss": 0.8569, "step": 1096 }, { "epoch": 0.1678653404743688, "grad_norm": 2.558373593775746, "learning_rate": 1.984819704344684e-05, "loss": 0.9141, "step": 1097 }, { "epoch": 0.16801836266258607, "grad_norm": 2.6662816349876004, "learning_rate": 1.984776657707589e-05, "loss": 0.8173, "step": 1098 }, { "epoch": 0.16817138485080338, "grad_norm": 2.914507887638554, "learning_rate": 1.984733550591507e-05, "loss": 0.8787, "step": 1099 }, { "epoch": 0.16832440703902066, "grad_norm": 2.909457834414278, "learning_rate": 1.9846903829990857e-05, "loss": 0.9803, "step": 1100 }, { "epoch": 0.16847742922723796, "grad_norm": 2.639150497743505, "learning_rate": 1.9846471549329758e-05, "loss": 0.8015, "step": 1101 }, { "epoch": 0.16863045141545524, "grad_norm": 2.594125634192865, "learning_rate": 1.984603866395832e-05, "loss": 0.7903, "step": 1102 }, { "epoch": 0.16878347360367255, "grad_norm": 2.369582246149647, "learning_rate": 1.984560517390313e-05, "loss": 0.7992, "step": 1103 }, { "epoch": 0.16893649579188982, "grad_norm": 2.555457979137369, "learning_rate": 1.984517107919081e-05, "loss": 0.8961, "step": 1104 }, { "epoch": 0.16908951798010713, "grad_norm": 2.615173559437897, "learning_rate": 1.984473637984802e-05, "loss": 0.9241, "step": 1105 }, { "epoch": 0.1692425401683244, "grad_norm": 2.6096821694922188, "learning_rate": 1.9844301075901455e-05, "loss": 0.8641, "step": 1106 }, { "epoch": 0.1693955623565417, "grad_norm": 2.559217882335341, "learning_rate": 1.984386516737785e-05, "loss": 0.927, "step": 1107 }, { "epoch": 0.169548584544759, "grad_norm": 2.6360804443043073, "learning_rate": 1.9843428654303977e-05, "loss": 0.8263, "step": 1108 }, { "epoch": 0.16970160673297627, "grad_norm": 2.7158983215786465, "learning_rate": 1.984299153670664e-05, "loss": 0.813, "step": 1109 }, { "epoch": 0.16985462892119357, "grad_norm": 2.850752676076143, "learning_rate": 1.984255381461269e-05, "loss": 0.7828, "step": 1110 }, { "epoch": 0.17000765110941085, "grad_norm": 2.849267604288, "learning_rate": 1.9842115488049006e-05, "loss": 0.8832, "step": 1111 }, { "epoch": 0.17016067329762816, "grad_norm": 2.8725800405214286, "learning_rate": 1.9841676557042505e-05, "loss": 0.8527, "step": 1112 }, { "epoch": 0.17031369548584543, "grad_norm": 2.7923030948061034, "learning_rate": 1.9841237021620147e-05, "loss": 0.9163, "step": 1113 }, { "epoch": 0.17046671767406274, "grad_norm": 2.6630219303474822, "learning_rate": 1.9840796881808922e-05, "loss": 0.774, "step": 1114 }, { "epoch": 0.17061973986228002, "grad_norm": 2.798714223571827, "learning_rate": 1.9840356137635865e-05, "loss": 0.8822, "step": 1115 }, { "epoch": 0.17077276205049732, "grad_norm": 2.8105521810924237, "learning_rate": 1.983991478912804e-05, "loss": 0.8292, "step": 1116 }, { "epoch": 0.1709257842387146, "grad_norm": 2.9679950044260988, "learning_rate": 1.9839472836312558e-05, "loss": 0.8392, "step": 1117 }, { "epoch": 0.1710788064269319, "grad_norm": 2.5546239972059923, "learning_rate": 1.9839030279216557e-05, "loss": 0.8779, "step": 1118 }, { "epoch": 0.17123182861514918, "grad_norm": 2.6350772004841367, "learning_rate": 1.983858711786721e-05, "loss": 0.9282, "step": 1119 }, { "epoch": 0.1713848508033665, "grad_norm": 2.40493852138416, "learning_rate": 1.9838143352291747e-05, "loss": 0.7634, "step": 1120 }, { "epoch": 0.17153787299158377, "grad_norm": 2.904701675083358, "learning_rate": 1.9837698982517408e-05, "loss": 0.8338, "step": 1121 }, { "epoch": 0.17169089517980107, "grad_norm": 2.6070853971468466, "learning_rate": 1.983725400857149e-05, "loss": 0.9001, "step": 1122 }, { "epoch": 0.17184391736801835, "grad_norm": 2.6858795073590023, "learning_rate": 1.983680843048132e-05, "loss": 0.9843, "step": 1123 }, { "epoch": 0.17199693955623566, "grad_norm": 2.44821873068937, "learning_rate": 1.9836362248274262e-05, "loss": 0.8795, "step": 1124 }, { "epoch": 0.17214996174445293, "grad_norm": 2.699038981515868, "learning_rate": 1.983591546197772e-05, "loss": 0.8473, "step": 1125 }, { "epoch": 0.17230298393267024, "grad_norm": 2.4135541032139654, "learning_rate": 1.9835468071619132e-05, "loss": 0.8204, "step": 1126 }, { "epoch": 0.17245600612088752, "grad_norm": 2.545206003492443, "learning_rate": 1.983502007722597e-05, "loss": 0.8033, "step": 1127 }, { "epoch": 0.17260902830910482, "grad_norm": 2.5812248093359975, "learning_rate": 1.9834571478825752e-05, "loss": 0.7242, "step": 1128 }, { "epoch": 0.1727620504973221, "grad_norm": 3.048017837391477, "learning_rate": 1.983412227644603e-05, "loss": 0.9028, "step": 1129 }, { "epoch": 0.1729150726855394, "grad_norm": 3.0440038836635597, "learning_rate": 1.983367247011438e-05, "loss": 0.7642, "step": 1130 }, { "epoch": 0.17306809487375668, "grad_norm": 2.4646195094640513, "learning_rate": 1.9833222059858438e-05, "loss": 0.737, "step": 1131 }, { "epoch": 0.173221117061974, "grad_norm": 2.9135955129234916, "learning_rate": 1.9832771045705862e-05, "loss": 0.8295, "step": 1132 }, { "epoch": 0.17337413925019127, "grad_norm": 2.517525658493596, "learning_rate": 1.9832319427684352e-05, "loss": 0.9521, "step": 1133 }, { "epoch": 0.17352716143840857, "grad_norm": 2.563415773261328, "learning_rate": 1.983186720582164e-05, "loss": 0.6452, "step": 1134 }, { "epoch": 0.17368018362662585, "grad_norm": 2.551066647247192, "learning_rate": 1.98314143801455e-05, "loss": 0.8596, "step": 1135 }, { "epoch": 0.17383320581484316, "grad_norm": 2.309987774636032, "learning_rate": 1.983096095068374e-05, "loss": 0.7836, "step": 1136 }, { "epoch": 0.17398622800306043, "grad_norm": 2.968328488790905, "learning_rate": 1.983050691746421e-05, "loss": 0.967, "step": 1137 }, { "epoch": 0.17413925019127774, "grad_norm": 2.70579171197833, "learning_rate": 1.9830052280514795e-05, "loss": 0.9944, "step": 1138 }, { "epoch": 0.17429227237949502, "grad_norm": 2.461411813599091, "learning_rate": 1.9829597039863416e-05, "loss": 0.8744, "step": 1139 }, { "epoch": 0.17444529456771232, "grad_norm": 2.907849288421829, "learning_rate": 1.9829141195538025e-05, "loss": 0.8599, "step": 1140 }, { "epoch": 0.1745983167559296, "grad_norm": 2.8382037466956875, "learning_rate": 1.9828684747566625e-05, "loss": 0.8959, "step": 1141 }, { "epoch": 0.1747513389441469, "grad_norm": 2.748550273869275, "learning_rate": 1.982822769597724e-05, "loss": 0.8289, "step": 1142 }, { "epoch": 0.17490436113236418, "grad_norm": 2.8067275425318052, "learning_rate": 1.9827770040797946e-05, "loss": 0.9106, "step": 1143 }, { "epoch": 0.1750573833205815, "grad_norm": 2.846345213122419, "learning_rate": 1.982731178205685e-05, "loss": 0.7729, "step": 1144 }, { "epoch": 0.17521040550879877, "grad_norm": 3.2293890063364796, "learning_rate": 1.982685291978209e-05, "loss": 0.9251, "step": 1145 }, { "epoch": 0.17536342769701607, "grad_norm": 2.858144968984894, "learning_rate": 1.9826393454001848e-05, "loss": 0.7992, "step": 1146 }, { "epoch": 0.17551644988523335, "grad_norm": 2.8071648371182345, "learning_rate": 1.9825933384744343e-05, "loss": 0.8398, "step": 1147 }, { "epoch": 0.17566947207345066, "grad_norm": 2.875344520665991, "learning_rate": 1.9825472712037836e-05, "loss": 0.8488, "step": 1148 }, { "epoch": 0.17582249426166793, "grad_norm": 2.6795215539666732, "learning_rate": 1.9825011435910606e-05, "loss": 0.8287, "step": 1149 }, { "epoch": 0.17597551644988524, "grad_norm": 2.6519672698029524, "learning_rate": 1.982454955639099e-05, "loss": 0.9665, "step": 1150 }, { "epoch": 0.17612853863810252, "grad_norm": 2.9398416836616263, "learning_rate": 1.982408707350735e-05, "loss": 0.9661, "step": 1151 }, { "epoch": 0.17628156082631982, "grad_norm": 2.5372992843733444, "learning_rate": 1.9823623987288096e-05, "loss": 0.7973, "step": 1152 }, { "epoch": 0.1764345830145371, "grad_norm": 2.5779136243237804, "learning_rate": 1.9823160297761657e-05, "loss": 0.8823, "step": 1153 }, { "epoch": 0.1765876052027544, "grad_norm": 2.6819788898485943, "learning_rate": 1.9822696004956516e-05, "loss": 0.757, "step": 1154 }, { "epoch": 0.17674062739097168, "grad_norm": 2.7476885475300827, "learning_rate": 1.982223110890119e-05, "loss": 0.9533, "step": 1155 }, { "epoch": 0.176893649579189, "grad_norm": 2.5992700129932493, "learning_rate": 1.9821765609624223e-05, "loss": 0.7441, "step": 1156 }, { "epoch": 0.17704667176740627, "grad_norm": 2.8631617649345626, "learning_rate": 1.982129950715421e-05, "loss": 0.8553, "step": 1157 }, { "epoch": 0.17719969395562357, "grad_norm": 2.8390171782673996, "learning_rate": 1.9820832801519772e-05, "loss": 0.8184, "step": 1158 }, { "epoch": 0.17735271614384085, "grad_norm": 2.712066208521167, "learning_rate": 1.9820365492749577e-05, "loss": 0.8139, "step": 1159 }, { "epoch": 0.17750573833205815, "grad_norm": 2.613425607348815, "learning_rate": 1.9819897580872313e-05, "loss": 0.8787, "step": 1160 }, { "epoch": 0.17765876052027543, "grad_norm": 2.7109084220530533, "learning_rate": 1.9819429065916725e-05, "loss": 0.9128, "step": 1161 }, { "epoch": 0.17781178270849274, "grad_norm": 2.9223737813686057, "learning_rate": 1.9818959947911585e-05, "loss": 0.8176, "step": 1162 }, { "epoch": 0.17796480489671002, "grad_norm": 2.778954100850369, "learning_rate": 1.9818490226885703e-05, "loss": 0.8663, "step": 1163 }, { "epoch": 0.17811782708492732, "grad_norm": 2.6877635305323593, "learning_rate": 1.9818019902867924e-05, "loss": 0.9036, "step": 1164 }, { "epoch": 0.1782708492731446, "grad_norm": 3.094006409195956, "learning_rate": 1.9817548975887136e-05, "loss": 0.822, "step": 1165 }, { "epoch": 0.1784238714613619, "grad_norm": 2.487275168235667, "learning_rate": 1.981707744597226e-05, "loss": 0.8939, "step": 1166 }, { "epoch": 0.17857689364957918, "grad_norm": 2.328184674285479, "learning_rate": 1.981660531315225e-05, "loss": 0.8309, "step": 1167 }, { "epoch": 0.1787299158377965, "grad_norm": 2.3136962546636464, "learning_rate": 1.9816132577456105e-05, "loss": 0.7128, "step": 1168 }, { "epoch": 0.17888293802601377, "grad_norm": 2.644074314289146, "learning_rate": 1.981565923891286e-05, "loss": 0.8859, "step": 1169 }, { "epoch": 0.17903596021423107, "grad_norm": 2.5503162195908544, "learning_rate": 1.9815185297551584e-05, "loss": 0.8524, "step": 1170 }, { "epoch": 0.17918898240244835, "grad_norm": 2.702749830814934, "learning_rate": 1.981471075340138e-05, "loss": 0.8143, "step": 1171 }, { "epoch": 0.17934200459066565, "grad_norm": 2.6253033708655713, "learning_rate": 1.981423560649139e-05, "loss": 0.7908, "step": 1172 }, { "epoch": 0.17949502677888293, "grad_norm": 2.5121491386593924, "learning_rate": 1.98137598568508e-05, "loss": 0.8573, "step": 1173 }, { "epoch": 0.17964804896710024, "grad_norm": 3.5925654232893134, "learning_rate": 1.9813283504508828e-05, "loss": 0.8668, "step": 1174 }, { "epoch": 0.17980107115531752, "grad_norm": 3.0099094303601195, "learning_rate": 1.9812806549494723e-05, "loss": 0.7845, "step": 1175 }, { "epoch": 0.17995409334353482, "grad_norm": 7.263338996571074, "learning_rate": 1.981232899183778e-05, "loss": 0.9048, "step": 1176 }, { "epoch": 0.1801071155317521, "grad_norm": 4.591653363432459, "learning_rate": 1.9811850831567327e-05, "loss": 0.7974, "step": 1177 }, { "epoch": 0.1802601377199694, "grad_norm": 3.825510779009282, "learning_rate": 1.9811372068712734e-05, "loss": 0.8295, "step": 1178 }, { "epoch": 0.18041315990818668, "grad_norm": 2.9921320333176706, "learning_rate": 1.9810892703303398e-05, "loss": 0.8919, "step": 1179 }, { "epoch": 0.180566182096404, "grad_norm": 2.7588519917332617, "learning_rate": 1.981041273536876e-05, "loss": 0.7638, "step": 1180 }, { "epoch": 0.18071920428462127, "grad_norm": 3.083353753553559, "learning_rate": 1.9809932164938297e-05, "loss": 0.942, "step": 1181 }, { "epoch": 0.18087222647283857, "grad_norm": 2.728694292712886, "learning_rate": 1.9809450992041522e-05, "loss": 0.9411, "step": 1182 }, { "epoch": 0.18102524866105585, "grad_norm": 2.7319529906411835, "learning_rate": 1.980896921670799e-05, "loss": 0.874, "step": 1183 }, { "epoch": 0.18117827084927315, "grad_norm": 2.7865710261296917, "learning_rate": 1.9808486838967286e-05, "loss": 0.9014, "step": 1184 }, { "epoch": 0.18133129303749043, "grad_norm": 2.5143812899440268, "learning_rate": 1.9808003858849032e-05, "loss": 0.8666, "step": 1185 }, { "epoch": 0.18148431522570774, "grad_norm": 2.4430869214626516, "learning_rate": 1.980752027638289e-05, "loss": 0.7535, "step": 1186 }, { "epoch": 0.18163733741392502, "grad_norm": 2.2877893367824726, "learning_rate": 1.9807036091598563e-05, "loss": 0.6914, "step": 1187 }, { "epoch": 0.18179035960214232, "grad_norm": 3.0901473909433124, "learning_rate": 1.9806551304525784e-05, "loss": 0.795, "step": 1188 }, { "epoch": 0.1819433817903596, "grad_norm": 2.7091237157952612, "learning_rate": 1.9806065915194326e-05, "loss": 0.8558, "step": 1189 }, { "epoch": 0.1820964039785769, "grad_norm": 2.7289003459871743, "learning_rate": 1.9805579923633997e-05, "loss": 0.9272, "step": 1190 }, { "epoch": 0.18224942616679418, "grad_norm": 2.9092943582139377, "learning_rate": 1.980509332987465e-05, "loss": 0.7949, "step": 1191 }, { "epoch": 0.1824024483550115, "grad_norm": 2.8490336831731002, "learning_rate": 1.980460613394616e-05, "loss": 0.9731, "step": 1192 }, { "epoch": 0.18255547054322876, "grad_norm": 2.5321760664034225, "learning_rate": 1.9804118335878452e-05, "loss": 0.8599, "step": 1193 }, { "epoch": 0.18270849273144607, "grad_norm": 2.582180096249362, "learning_rate": 1.980362993570148e-05, "loss": 0.7725, "step": 1194 }, { "epoch": 0.18286151491966335, "grad_norm": 3.050407598760349, "learning_rate": 1.9803140933445246e-05, "loss": 0.9098, "step": 1195 }, { "epoch": 0.18301453710788065, "grad_norm": 2.9433028267353025, "learning_rate": 1.980265132913978e-05, "loss": 0.9321, "step": 1196 }, { "epoch": 0.18316755929609793, "grad_norm": 2.439342164588578, "learning_rate": 1.980216112281514e-05, "loss": 0.745, "step": 1197 }, { "epoch": 0.18332058148431524, "grad_norm": 2.8658445163454886, "learning_rate": 1.9801670314501445e-05, "loss": 0.9419, "step": 1198 }, { "epoch": 0.18347360367253251, "grad_norm": 2.3996411264820825, "learning_rate": 1.980117890422883e-05, "loss": 0.7613, "step": 1199 }, { "epoch": 0.18362662586074982, "grad_norm": 2.634152697556565, "learning_rate": 1.9800686892027476e-05, "loss": 0.838, "step": 1200 }, { "epoch": 0.1837796480489671, "grad_norm": 2.507128419724863, "learning_rate": 1.9800194277927598e-05, "loss": 0.7374, "step": 1201 }, { "epoch": 0.1839326702371844, "grad_norm": 2.5150477885289453, "learning_rate": 1.9799701061959453e-05, "loss": 0.8103, "step": 1202 }, { "epoch": 0.18408569242540168, "grad_norm": 2.7743946382832214, "learning_rate": 1.9799207244153328e-05, "loss": 0.7684, "step": 1203 }, { "epoch": 0.184238714613619, "grad_norm": 2.7916364471729262, "learning_rate": 1.979871282453955e-05, "loss": 0.9085, "step": 1204 }, { "epoch": 0.18439173680183626, "grad_norm": 2.6761395292421652, "learning_rate": 1.9798217803148488e-05, "loss": 0.8737, "step": 1205 }, { "epoch": 0.18454475899005357, "grad_norm": 2.7327345821631353, "learning_rate": 1.9797722180010536e-05, "loss": 0.8907, "step": 1206 }, { "epoch": 0.18469778117827085, "grad_norm": 2.926716209435097, "learning_rate": 1.979722595515614e-05, "loss": 0.9471, "step": 1207 }, { "epoch": 0.18485080336648815, "grad_norm": 2.4288423001399724, "learning_rate": 1.979672912861577e-05, "loss": 0.8499, "step": 1208 }, { "epoch": 0.18500382555470543, "grad_norm": 2.8360903216784865, "learning_rate": 1.9796231700419937e-05, "loss": 0.9216, "step": 1209 }, { "epoch": 0.18515684774292274, "grad_norm": 2.9513922158366466, "learning_rate": 1.9795733670599194e-05, "loss": 0.8389, "step": 1210 }, { "epoch": 0.18530986993114001, "grad_norm": 2.480276836129265, "learning_rate": 1.9795235039184122e-05, "loss": 0.8037, "step": 1211 }, { "epoch": 0.18546289211935732, "grad_norm": 2.5886986020209632, "learning_rate": 1.979473580620535e-05, "loss": 0.837, "step": 1212 }, { "epoch": 0.1856159143075746, "grad_norm": 2.673110598202028, "learning_rate": 1.9794235971693537e-05, "loss": 0.8506, "step": 1213 }, { "epoch": 0.1857689364957919, "grad_norm": 2.8120113685946833, "learning_rate": 1.9793735535679373e-05, "loss": 0.8873, "step": 1214 }, { "epoch": 0.18592195868400918, "grad_norm": 2.4841528470474525, "learning_rate": 1.97932344981936e-05, "loss": 0.8227, "step": 1215 }, { "epoch": 0.1860749808722265, "grad_norm": 2.6262957390756423, "learning_rate": 1.9792732859266985e-05, "loss": 0.831, "step": 1216 }, { "epoch": 0.18622800306044376, "grad_norm": 2.7867995827118905, "learning_rate": 1.979223061893033e-05, "loss": 0.8887, "step": 1217 }, { "epoch": 0.18638102524866107, "grad_norm": 2.4630014832920133, "learning_rate": 1.9791727777214494e-05, "loss": 0.7623, "step": 1218 }, { "epoch": 0.18653404743687835, "grad_norm": 2.8608171398956714, "learning_rate": 1.9791224334150344e-05, "loss": 0.8973, "step": 1219 }, { "epoch": 0.18668706962509563, "grad_norm": 2.625067202467613, "learning_rate": 1.9790720289768807e-05, "loss": 0.88, "step": 1220 }, { "epoch": 0.18684009181331293, "grad_norm": 2.6002917464889475, "learning_rate": 1.9790215644100834e-05, "loss": 0.8389, "step": 1221 }, { "epoch": 0.1869931140015302, "grad_norm": 2.9459681926716046, "learning_rate": 1.978971039717742e-05, "loss": 0.8296, "step": 1222 }, { "epoch": 0.18714613618974751, "grad_norm": 2.713734464464466, "learning_rate": 1.978920454902959e-05, "loss": 0.8426, "step": 1223 }, { "epoch": 0.1872991583779648, "grad_norm": 2.7203914331532415, "learning_rate": 1.9788698099688416e-05, "loss": 0.9216, "step": 1224 }, { "epoch": 0.1874521805661821, "grad_norm": 2.410057875984283, "learning_rate": 1.9788191049184998e-05, "loss": 0.8333, "step": 1225 }, { "epoch": 0.18760520275439937, "grad_norm": 2.7449351455709827, "learning_rate": 1.9787683397550476e-05, "loss": 0.8998, "step": 1226 }, { "epoch": 0.18775822494261668, "grad_norm": 2.852865788819563, "learning_rate": 1.9787175144816024e-05, "loss": 0.7877, "step": 1227 }, { "epoch": 0.18791124713083396, "grad_norm": 2.50764849784216, "learning_rate": 1.9786666291012865e-05, "loss": 0.8286, "step": 1228 }, { "epoch": 0.18806426931905126, "grad_norm": 2.572027875501263, "learning_rate": 1.9786156836172237e-05, "loss": 0.8445, "step": 1229 }, { "epoch": 0.18821729150726854, "grad_norm": 2.484590343748145, "learning_rate": 1.9785646780325435e-05, "loss": 0.8945, "step": 1230 }, { "epoch": 0.18837031369548585, "grad_norm": 2.5100868462046817, "learning_rate": 1.9785136123503787e-05, "loss": 0.8939, "step": 1231 }, { "epoch": 0.18852333588370312, "grad_norm": 2.455177735428078, "learning_rate": 1.9784624865738643e-05, "loss": 0.7709, "step": 1232 }, { "epoch": 0.18867635807192043, "grad_norm": 2.4916566077574567, "learning_rate": 1.9784113007061414e-05, "loss": 0.8872, "step": 1233 }, { "epoch": 0.1888293802601377, "grad_norm": 2.430966096318744, "learning_rate": 1.9783600547503528e-05, "loss": 0.9165, "step": 1234 }, { "epoch": 0.188982402448355, "grad_norm": 2.4779511899944264, "learning_rate": 1.978308748709646e-05, "loss": 0.7428, "step": 1235 }, { "epoch": 0.1891354246365723, "grad_norm": 2.7661213468146175, "learning_rate": 1.9782573825871715e-05, "loss": 0.7765, "step": 1236 }, { "epoch": 0.1892884468247896, "grad_norm": 2.5469813018884793, "learning_rate": 1.9782059563860844e-05, "loss": 0.7575, "step": 1237 }, { "epoch": 0.18944146901300687, "grad_norm": 2.6609455008160396, "learning_rate": 1.9781544701095426e-05, "loss": 0.9117, "step": 1238 }, { "epoch": 0.18959449120122418, "grad_norm": 2.8769593063146948, "learning_rate": 1.9781029237607082e-05, "loss": 0.8893, "step": 1239 }, { "epoch": 0.18974751338944146, "grad_norm": 2.910752462671504, "learning_rate": 1.9780513173427472e-05, "loss": 0.862, "step": 1240 }, { "epoch": 0.18990053557765876, "grad_norm": 2.8258652202291135, "learning_rate": 1.977999650858828e-05, "loss": 0.9601, "step": 1241 }, { "epoch": 0.19005355776587604, "grad_norm": 2.3361137453572756, "learning_rate": 1.977947924312125e-05, "loss": 0.6773, "step": 1242 }, { "epoch": 0.19020657995409335, "grad_norm": 2.732147684563504, "learning_rate": 1.9778961377058138e-05, "loss": 0.9071, "step": 1243 }, { "epoch": 0.19035960214231062, "grad_norm": 2.6873683102739014, "learning_rate": 1.9778442910430753e-05, "loss": 0.7924, "step": 1244 }, { "epoch": 0.19051262433052793, "grad_norm": 2.8385621841540196, "learning_rate": 1.9777923843270937e-05, "loss": 0.9363, "step": 1245 }, { "epoch": 0.1906656465187452, "grad_norm": 2.6345781764376106, "learning_rate": 1.9777404175610563e-05, "loss": 0.8486, "step": 1246 }, { "epoch": 0.1908186687069625, "grad_norm": 2.7282412099331967, "learning_rate": 1.9776883907481556e-05, "loss": 0.8968, "step": 1247 }, { "epoch": 0.1909716908951798, "grad_norm": 3.1198583665695385, "learning_rate": 1.9776363038915853e-05, "loss": 0.8591, "step": 1248 }, { "epoch": 0.1911247130833971, "grad_norm": 2.9650963500849605, "learning_rate": 1.9775841569945455e-05, "loss": 0.895, "step": 1249 }, { "epoch": 0.19127773527161437, "grad_norm": 3.005956067515491, "learning_rate": 1.9775319500602383e-05, "loss": 0.8774, "step": 1250 }, { "epoch": 0.19143075745983168, "grad_norm": 2.785558603450888, "learning_rate": 1.9774796830918696e-05, "loss": 0.7999, "step": 1251 }, { "epoch": 0.19158377964804896, "grad_norm": 2.754290803774593, "learning_rate": 1.97742735609265e-05, "loss": 0.8619, "step": 1252 }, { "epoch": 0.19173680183626626, "grad_norm": 2.59123133889139, "learning_rate": 1.9773749690657923e-05, "loss": 0.8181, "step": 1253 }, { "epoch": 0.19188982402448354, "grad_norm": 2.423499848751921, "learning_rate": 1.9773225220145144e-05, "loss": 0.858, "step": 1254 }, { "epoch": 0.19204284621270085, "grad_norm": 2.651915413290144, "learning_rate": 1.9772700149420374e-05, "loss": 0.9273, "step": 1255 }, { "epoch": 0.19219586840091812, "grad_norm": 2.4797552410990154, "learning_rate": 1.9772174478515853e-05, "loss": 0.8235, "step": 1256 }, { "epoch": 0.19234889058913543, "grad_norm": 2.5741982732357824, "learning_rate": 1.977164820746387e-05, "loss": 0.77, "step": 1257 }, { "epoch": 0.1925019127773527, "grad_norm": 2.610715685337163, "learning_rate": 1.9771121336296744e-05, "loss": 0.8574, "step": 1258 }, { "epoch": 0.19265493496557, "grad_norm": 2.4847035418283254, "learning_rate": 1.9770593865046832e-05, "loss": 0.8437, "step": 1259 }, { "epoch": 0.1928079571537873, "grad_norm": 2.4929585213596672, "learning_rate": 1.9770065793746528e-05, "loss": 0.8486, "step": 1260 }, { "epoch": 0.1929609793420046, "grad_norm": 3.044420651513117, "learning_rate": 1.9769537122428264e-05, "loss": 0.9013, "step": 1261 }, { "epoch": 0.19311400153022187, "grad_norm": 2.6620986896543237, "learning_rate": 1.9769007851124505e-05, "loss": 0.9234, "step": 1262 }, { "epoch": 0.19326702371843918, "grad_norm": 2.8157426489061135, "learning_rate": 1.9768477979867757e-05, "loss": 0.9035, "step": 1263 }, { "epoch": 0.19342004590665646, "grad_norm": 2.529021689411045, "learning_rate": 1.9767947508690562e-05, "loss": 0.8663, "step": 1264 }, { "epoch": 0.19357306809487376, "grad_norm": 2.5843925171763535, "learning_rate": 1.97674164376255e-05, "loss": 0.8378, "step": 1265 }, { "epoch": 0.19372609028309104, "grad_norm": 3.0335901445285525, "learning_rate": 1.9766884766705182e-05, "loss": 0.8908, "step": 1266 }, { "epoch": 0.19387911247130835, "grad_norm": 3.145047505720264, "learning_rate": 1.9766352495962263e-05, "loss": 1.0083, "step": 1267 }, { "epoch": 0.19403213465952562, "grad_norm": 2.4039128378461543, "learning_rate": 1.976581962542943e-05, "loss": 0.7459, "step": 1268 }, { "epoch": 0.19418515684774293, "grad_norm": 2.2548402455018133, "learning_rate": 1.976528615513941e-05, "loss": 0.8713, "step": 1269 }, { "epoch": 0.1943381790359602, "grad_norm": 2.5574308945342876, "learning_rate": 1.9764752085124968e-05, "loss": 0.8306, "step": 1270 }, { "epoch": 0.1944912012241775, "grad_norm": 2.73069294916681, "learning_rate": 1.97642174154189e-05, "loss": 0.8369, "step": 1271 }, { "epoch": 0.1946442234123948, "grad_norm": 2.48980869464948, "learning_rate": 1.9763682146054036e-05, "loss": 0.7243, "step": 1272 }, { "epoch": 0.1947972456006121, "grad_norm": 2.736038468896169, "learning_rate": 1.9763146277063264e-05, "loss": 0.8022, "step": 1273 }, { "epoch": 0.19495026778882937, "grad_norm": 2.698636507396173, "learning_rate": 1.976260980847948e-05, "loss": 0.8665, "step": 1274 }, { "epoch": 0.19510328997704668, "grad_norm": 2.728859710561004, "learning_rate": 1.976207274033564e-05, "loss": 0.7747, "step": 1275 }, { "epoch": 0.19525631216526396, "grad_norm": 2.4838979015966607, "learning_rate": 1.976153507266472e-05, "loss": 0.8504, "step": 1276 }, { "epoch": 0.19540933435348126, "grad_norm": 2.659727034754188, "learning_rate": 1.976099680549975e-05, "loss": 0.9027, "step": 1277 }, { "epoch": 0.19556235654169854, "grad_norm": 2.7146676990146257, "learning_rate": 1.9760457938873773e-05, "loss": 0.8086, "step": 1278 }, { "epoch": 0.19571537872991585, "grad_norm": 2.476438101864568, "learning_rate": 1.9759918472819896e-05, "loss": 0.8024, "step": 1279 }, { "epoch": 0.19586840091813312, "grad_norm": 2.494365497726393, "learning_rate": 1.9759378407371242e-05, "loss": 0.8212, "step": 1280 }, { "epoch": 0.19602142310635043, "grad_norm": 2.7005260217584444, "learning_rate": 1.9758837742560984e-05, "loss": 0.8976, "step": 1281 }, { "epoch": 0.1961744452945677, "grad_norm": 2.590370938150912, "learning_rate": 1.975829647842232e-05, "loss": 0.7617, "step": 1282 }, { "epoch": 0.196327467482785, "grad_norm": 2.5882464926232824, "learning_rate": 1.9757754614988495e-05, "loss": 0.7785, "step": 1283 }, { "epoch": 0.1964804896710023, "grad_norm": 2.502547989366547, "learning_rate": 1.975721215229279e-05, "loss": 0.9019, "step": 1284 }, { "epoch": 0.1966335118592196, "grad_norm": 2.5390514701365268, "learning_rate": 1.9756669090368514e-05, "loss": 0.821, "step": 1285 }, { "epoch": 0.19678653404743687, "grad_norm": 2.639839580822911, "learning_rate": 1.975612542924902e-05, "loss": 0.8262, "step": 1286 }, { "epoch": 0.19693955623565418, "grad_norm": 2.4825562925435443, "learning_rate": 1.97555811689677e-05, "loss": 0.79, "step": 1287 }, { "epoch": 0.19709257842387146, "grad_norm": 2.7116933120169704, "learning_rate": 1.9755036309557974e-05, "loss": 0.9368, "step": 1288 }, { "epoch": 0.19724560061208876, "grad_norm": 2.663602543382191, "learning_rate": 1.9754490851053306e-05, "loss": 0.8395, "step": 1289 }, { "epoch": 0.19739862280030604, "grad_norm": 2.642048603050421, "learning_rate": 1.9753944793487195e-05, "loss": 0.9173, "step": 1290 }, { "epoch": 0.19755164498852334, "grad_norm": 2.619082257934699, "learning_rate": 1.975339813689318e-05, "loss": 0.8949, "step": 1291 }, { "epoch": 0.19770466717674062, "grad_norm": 2.469477303471826, "learning_rate": 1.9752850881304827e-05, "loss": 0.8011, "step": 1292 }, { "epoch": 0.19785768936495793, "grad_norm": 2.332170742162521, "learning_rate": 1.9752303026755747e-05, "loss": 0.7746, "step": 1293 }, { "epoch": 0.1980107115531752, "grad_norm": 2.465506481746153, "learning_rate": 1.975175457327959e-05, "loss": 0.7254, "step": 1294 }, { "epoch": 0.1981637337413925, "grad_norm": 2.504242954456863, "learning_rate": 1.975120552091003e-05, "loss": 0.8851, "step": 1295 }, { "epoch": 0.1983167559296098, "grad_norm": 2.7474656960473345, "learning_rate": 1.97506558696808e-05, "loss": 0.8854, "step": 1296 }, { "epoch": 0.1984697781178271, "grad_norm": 2.55032537694633, "learning_rate": 1.9750105619625644e-05, "loss": 0.8987, "step": 1297 }, { "epoch": 0.19862280030604437, "grad_norm": 3.147485983482909, "learning_rate": 1.9749554770778358e-05, "loss": 0.8879, "step": 1298 }, { "epoch": 0.19877582249426168, "grad_norm": 2.774617380445243, "learning_rate": 1.974900332317277e-05, "loss": 0.9347, "step": 1299 }, { "epoch": 0.19892884468247896, "grad_norm": 2.5337569604258623, "learning_rate": 1.974845127684276e-05, "loss": 0.7679, "step": 1300 }, { "epoch": 0.19908186687069626, "grad_norm": 2.432147574123346, "learning_rate": 1.9747898631822213e-05, "loss": 0.8188, "step": 1301 }, { "epoch": 0.19923488905891354, "grad_norm": 2.6080426910470775, "learning_rate": 1.9747345388145082e-05, "loss": 0.8023, "step": 1302 }, { "epoch": 0.19938791124713084, "grad_norm": 2.4662419988741497, "learning_rate": 1.9746791545845335e-05, "loss": 0.7576, "step": 1303 }, { "epoch": 0.19954093343534812, "grad_norm": 2.6788843876154353, "learning_rate": 1.974623710495699e-05, "loss": 0.807, "step": 1304 }, { "epoch": 0.19969395562356543, "grad_norm": 2.575256113628228, "learning_rate": 1.9745682065514096e-05, "loss": 0.8279, "step": 1305 }, { "epoch": 0.1998469778117827, "grad_norm": 2.714741610802897, "learning_rate": 1.9745126427550742e-05, "loss": 0.8878, "step": 1306 }, { "epoch": 0.2, "grad_norm": 2.8040762303011126, "learning_rate": 1.9744570191101053e-05, "loss": 0.8519, "step": 1307 }, { "epoch": 0.2001530221882173, "grad_norm": 2.824683717072596, "learning_rate": 1.9744013356199186e-05, "loss": 0.8524, "step": 1308 }, { "epoch": 0.2003060443764346, "grad_norm": 2.6712623088554306, "learning_rate": 1.974345592287934e-05, "loss": 0.9416, "step": 1309 }, { "epoch": 0.20045906656465187, "grad_norm": 2.6566874873753794, "learning_rate": 1.9742897891175746e-05, "loss": 0.8073, "step": 1310 }, { "epoch": 0.20061208875286918, "grad_norm": 2.8105095398771174, "learning_rate": 1.9742339261122682e-05, "loss": 0.8246, "step": 1311 }, { "epoch": 0.20076511094108646, "grad_norm": 2.4516251972652334, "learning_rate": 1.9741780032754452e-05, "loss": 0.8449, "step": 1312 }, { "epoch": 0.20091813312930376, "grad_norm": 2.7270021374282036, "learning_rate": 1.9741220206105398e-05, "loss": 0.9152, "step": 1313 }, { "epoch": 0.20107115531752104, "grad_norm": 2.6346059254428815, "learning_rate": 1.9740659781209905e-05, "loss": 0.9087, "step": 1314 }, { "epoch": 0.20122417750573834, "grad_norm": 2.6401950334296433, "learning_rate": 1.9740098758102388e-05, "loss": 0.8442, "step": 1315 }, { "epoch": 0.20137719969395562, "grad_norm": 2.590632699986551, "learning_rate": 1.9739537136817303e-05, "loss": 0.7427, "step": 1316 }, { "epoch": 0.20153022188217293, "grad_norm": 2.515449790487725, "learning_rate": 1.973897491738914e-05, "loss": 0.852, "step": 1317 }, { "epoch": 0.2016832440703902, "grad_norm": 2.5728999188481927, "learning_rate": 1.973841209985243e-05, "loss": 0.8264, "step": 1318 }, { "epoch": 0.2018362662586075, "grad_norm": 2.762783647383601, "learning_rate": 1.973784868424174e-05, "loss": 0.9527, "step": 1319 }, { "epoch": 0.2019892884468248, "grad_norm": 2.5039467780176, "learning_rate": 1.9737284670591662e-05, "loss": 0.8145, "step": 1320 }, { "epoch": 0.2021423106350421, "grad_norm": 2.741462373035885, "learning_rate": 1.973672005893684e-05, "loss": 0.8645, "step": 1321 }, { "epoch": 0.20229533282325937, "grad_norm": 2.589649068698681, "learning_rate": 1.973615484931195e-05, "loss": 0.9089, "step": 1322 }, { "epoch": 0.20244835501147668, "grad_norm": 2.2420383393474625, "learning_rate": 1.9735589041751702e-05, "loss": 0.7429, "step": 1323 }, { "epoch": 0.20260137719969395, "grad_norm": 2.8834726302997473, "learning_rate": 1.9735022636290845e-05, "loss": 0.8063, "step": 1324 }, { "epoch": 0.20275439938791126, "grad_norm": 2.8281724474513608, "learning_rate": 1.9734455632964166e-05, "loss": 0.9345, "step": 1325 }, { "epoch": 0.20290742157612854, "grad_norm": 2.6418928860290967, "learning_rate": 1.9733888031806485e-05, "loss": 0.8527, "step": 1326 }, { "epoch": 0.20306044376434584, "grad_norm": 2.801742930828167, "learning_rate": 1.973331983285266e-05, "loss": 0.902, "step": 1327 }, { "epoch": 0.20321346595256312, "grad_norm": 2.724387569739316, "learning_rate": 1.9732751036137588e-05, "loss": 0.9353, "step": 1328 }, { "epoch": 0.20336648814078043, "grad_norm": 2.852911846115286, "learning_rate": 1.97321816416962e-05, "loss": 0.9431, "step": 1329 }, { "epoch": 0.2035195103289977, "grad_norm": 2.9385006447379203, "learning_rate": 1.973161164956346e-05, "loss": 0.7805, "step": 1330 }, { "epoch": 0.20367253251721498, "grad_norm": 2.6617267844097, "learning_rate": 1.9731041059774387e-05, "loss": 0.9011, "step": 1331 }, { "epoch": 0.2038255547054323, "grad_norm": 2.535089745451929, "learning_rate": 1.973046987236401e-05, "loss": 0.8131, "step": 1332 }, { "epoch": 0.20397857689364957, "grad_norm": 2.5575312901886074, "learning_rate": 1.972989808736741e-05, "loss": 0.9255, "step": 1333 }, { "epoch": 0.20413159908186687, "grad_norm": 2.9489071986972246, "learning_rate": 1.972932570481971e-05, "loss": 0.8734, "step": 1334 }, { "epoch": 0.20428462127008415, "grad_norm": 2.6401501138319814, "learning_rate": 1.9728752724756052e-05, "loss": 0.8659, "step": 1335 }, { "epoch": 0.20443764345830145, "grad_norm": 2.57743017743404, "learning_rate": 1.9728179147211634e-05, "loss": 0.6302, "step": 1336 }, { "epoch": 0.20459066564651873, "grad_norm": 2.696725935953648, "learning_rate": 1.9727604972221674e-05, "loss": 0.9445, "step": 1337 }, { "epoch": 0.20474368783473604, "grad_norm": 2.627476393799533, "learning_rate": 1.9727030199821443e-05, "loss": 0.7543, "step": 1338 }, { "epoch": 0.20489671002295332, "grad_norm": 2.3880575508880604, "learning_rate": 1.9726454830046233e-05, "loss": 0.7506, "step": 1339 }, { "epoch": 0.20504973221117062, "grad_norm": 2.7835239139366177, "learning_rate": 1.9725878862931376e-05, "loss": 0.8102, "step": 1340 }, { "epoch": 0.2052027543993879, "grad_norm": 2.7321404831940614, "learning_rate": 1.9725302298512257e-05, "loss": 0.7956, "step": 1341 }, { "epoch": 0.2053557765876052, "grad_norm": 2.457638556107576, "learning_rate": 1.9724725136824277e-05, "loss": 0.8221, "step": 1342 }, { "epoch": 0.20550879877582248, "grad_norm": 2.4536782205822516, "learning_rate": 1.9724147377902884e-05, "loss": 0.71, "step": 1343 }, { "epoch": 0.2056618209640398, "grad_norm": 2.7635778874710564, "learning_rate": 1.9723569021783557e-05, "loss": 0.8422, "step": 1344 }, { "epoch": 0.20581484315225707, "grad_norm": 2.600934468297566, "learning_rate": 1.9722990068501818e-05, "loss": 0.8543, "step": 1345 }, { "epoch": 0.20596786534047437, "grad_norm": 2.5400207546187388, "learning_rate": 1.972241051809322e-05, "loss": 0.8007, "step": 1346 }, { "epoch": 0.20612088752869165, "grad_norm": 2.709332583626073, "learning_rate": 1.9721830370593364e-05, "loss": 0.8138, "step": 1347 }, { "epoch": 0.20627390971690895, "grad_norm": 2.724189789443129, "learning_rate": 1.972124962603787e-05, "loss": 0.9304, "step": 1348 }, { "epoch": 0.20642693190512623, "grad_norm": 2.4858669918490834, "learning_rate": 1.9720668284462407e-05, "loss": 0.7141, "step": 1349 }, { "epoch": 0.20657995409334354, "grad_norm": 2.4065464378747254, "learning_rate": 1.9720086345902675e-05, "loss": 0.7101, "step": 1350 }, { "epoch": 0.20673297628156082, "grad_norm": 2.524649685819583, "learning_rate": 1.9719503810394417e-05, "loss": 0.9611, "step": 1351 }, { "epoch": 0.20688599846977812, "grad_norm": 2.7392602306838083, "learning_rate": 1.9718920677973407e-05, "loss": 0.8701, "step": 1352 }, { "epoch": 0.2070390206579954, "grad_norm": 2.8684316541922716, "learning_rate": 1.9718336948675457e-05, "loss": 0.8392, "step": 1353 }, { "epoch": 0.2071920428462127, "grad_norm": 2.797057123846008, "learning_rate": 1.9717752622536417e-05, "loss": 0.8712, "step": 1354 }, { "epoch": 0.20734506503442998, "grad_norm": 2.7197161819835194, "learning_rate": 1.9717167699592173e-05, "loss": 0.8252, "step": 1355 }, { "epoch": 0.2074980872226473, "grad_norm": 2.3072808087513605, "learning_rate": 1.9716582179878645e-05, "loss": 0.803, "step": 1356 }, { "epoch": 0.20765110941086456, "grad_norm": 2.923572676762194, "learning_rate": 1.97159960634318e-05, "loss": 0.9197, "step": 1357 }, { "epoch": 0.20780413159908187, "grad_norm": 2.792292441459427, "learning_rate": 1.9715409350287618e-05, "loss": 0.8846, "step": 1358 }, { "epoch": 0.20795715378729915, "grad_norm": 2.793577571557601, "learning_rate": 1.9714822040482144e-05, "loss": 0.9456, "step": 1359 }, { "epoch": 0.20811017597551645, "grad_norm": 2.5197653839780094, "learning_rate": 1.9714234134051446e-05, "loss": 0.8231, "step": 1360 }, { "epoch": 0.20826319816373373, "grad_norm": 2.5856070289772943, "learning_rate": 1.9713645631031628e-05, "loss": 0.8052, "step": 1361 }, { "epoch": 0.20841622035195104, "grad_norm": 2.4578741682910694, "learning_rate": 1.971305653145883e-05, "loss": 0.7517, "step": 1362 }, { "epoch": 0.20856924254016831, "grad_norm": 2.519889808986514, "learning_rate": 1.9712466835369234e-05, "loss": 0.8433, "step": 1363 }, { "epoch": 0.20872226472838562, "grad_norm": 2.377119912314756, "learning_rate": 1.9711876542799056e-05, "loss": 0.8387, "step": 1364 }, { "epoch": 0.2088752869166029, "grad_norm": 2.577350155100122, "learning_rate": 1.9711285653784543e-05, "loss": 0.8693, "step": 1365 }, { "epoch": 0.2090283091048202, "grad_norm": 2.3743002683845544, "learning_rate": 1.971069416836199e-05, "loss": 0.7715, "step": 1366 }, { "epoch": 0.20918133129303748, "grad_norm": 2.5970257954430167, "learning_rate": 1.9710102086567714e-05, "loss": 0.8364, "step": 1367 }, { "epoch": 0.2093343534812548, "grad_norm": 2.463807934762222, "learning_rate": 1.9709509408438087e-05, "loss": 0.9367, "step": 1368 }, { "epoch": 0.20948737566947206, "grad_norm": 2.5215756250590173, "learning_rate": 1.9708916134009507e-05, "loss": 0.9188, "step": 1369 }, { "epoch": 0.20964039785768937, "grad_norm": 2.5332861323220026, "learning_rate": 1.97083222633184e-05, "loss": 0.8427, "step": 1370 }, { "epoch": 0.20979342004590665, "grad_norm": 3.1280702666688738, "learning_rate": 1.9707727796401243e-05, "loss": 0.8749, "step": 1371 }, { "epoch": 0.20994644223412395, "grad_norm": 2.7501921842495447, "learning_rate": 1.970713273329455e-05, "loss": 0.929, "step": 1372 }, { "epoch": 0.21009946442234123, "grad_norm": 2.489037234979162, "learning_rate": 1.970653707403486e-05, "loss": 0.8092, "step": 1373 }, { "epoch": 0.21025248661055854, "grad_norm": 2.6729264987086436, "learning_rate": 1.9705940818658753e-05, "loss": 0.8513, "step": 1374 }, { "epoch": 0.21040550879877581, "grad_norm": 2.7135658037185815, "learning_rate": 1.9705343967202853e-05, "loss": 0.8504, "step": 1375 }, { "epoch": 0.21055853098699312, "grad_norm": 2.748488375492164, "learning_rate": 1.970474651970381e-05, "loss": 0.8899, "step": 1376 }, { "epoch": 0.2107115531752104, "grad_norm": 2.851236345307254, "learning_rate": 1.9704148476198323e-05, "loss": 0.8719, "step": 1377 }, { "epoch": 0.2108645753634277, "grad_norm": 2.7526677950267637, "learning_rate": 1.9703549836723112e-05, "loss": 0.9131, "step": 1378 }, { "epoch": 0.21101759755164498, "grad_norm": 2.7251933214674784, "learning_rate": 1.9702950601314948e-05, "loss": 0.7765, "step": 1379 }, { "epoch": 0.2111706197398623, "grad_norm": 2.422786440415208, "learning_rate": 1.9702350770010625e-05, "loss": 0.816, "step": 1380 }, { "epoch": 0.21132364192807956, "grad_norm": 2.8510678470175823, "learning_rate": 1.9701750342846985e-05, "loss": 0.9267, "step": 1381 }, { "epoch": 0.21147666411629687, "grad_norm": 2.445685396692264, "learning_rate": 1.970114931986091e-05, "loss": 0.7711, "step": 1382 }, { "epoch": 0.21162968630451415, "grad_norm": 2.5729642145381395, "learning_rate": 1.9700547701089297e-05, "loss": 0.8154, "step": 1383 }, { "epoch": 0.21178270849273145, "grad_norm": 2.3053407957709378, "learning_rate": 1.9699945486569102e-05, "loss": 0.8914, "step": 1384 }, { "epoch": 0.21193573068094873, "grad_norm": 2.3823797011223373, "learning_rate": 1.969934267633731e-05, "loss": 0.7488, "step": 1385 }, { "epoch": 0.21208875286916604, "grad_norm": 2.482422953417084, "learning_rate": 1.969873927043094e-05, "loss": 0.8749, "step": 1386 }, { "epoch": 0.21224177505738331, "grad_norm": 2.779771599316904, "learning_rate": 1.969813526888705e-05, "loss": 0.8083, "step": 1387 }, { "epoch": 0.21239479724560062, "grad_norm": 2.651475617855006, "learning_rate": 1.9697530671742733e-05, "loss": 0.8822, "step": 1388 }, { "epoch": 0.2125478194338179, "grad_norm": 2.4441971083865113, "learning_rate": 1.969692547903512e-05, "loss": 0.9099, "step": 1389 }, { "epoch": 0.2127008416220352, "grad_norm": 2.901432036147306, "learning_rate": 1.969631969080138e-05, "loss": 0.8652, "step": 1390 }, { "epoch": 0.21285386381025248, "grad_norm": 2.7019808196456974, "learning_rate": 1.9695713307078718e-05, "loss": 0.8447, "step": 1391 }, { "epoch": 0.21300688599846979, "grad_norm": 2.524291797108313, "learning_rate": 1.9695106327904367e-05, "loss": 0.9771, "step": 1392 }, { "epoch": 0.21315990818668706, "grad_norm": 2.432632551140893, "learning_rate": 1.9694498753315613e-05, "loss": 0.816, "step": 1393 }, { "epoch": 0.21331293037490437, "grad_norm": 3.0185482439437012, "learning_rate": 1.9693890583349762e-05, "loss": 1.0286, "step": 1394 }, { "epoch": 0.21346595256312165, "grad_norm": 2.473777410059679, "learning_rate": 1.9693281818044168e-05, "loss": 0.8313, "step": 1395 }, { "epoch": 0.21361897475133895, "grad_norm": 2.9398419262263973, "learning_rate": 1.969267245743622e-05, "loss": 0.9696, "step": 1396 }, { "epoch": 0.21377199693955623, "grad_norm": 2.5598888551944197, "learning_rate": 1.9692062501563333e-05, "loss": 0.7388, "step": 1397 }, { "epoch": 0.21392501912777354, "grad_norm": 2.6365253523922365, "learning_rate": 1.9691451950462977e-05, "loss": 0.8378, "step": 1398 }, { "epoch": 0.2140780413159908, "grad_norm": 2.6272557509945313, "learning_rate": 1.9690840804172644e-05, "loss": 0.7908, "step": 1399 }, { "epoch": 0.21423106350420812, "grad_norm": 2.457956626306656, "learning_rate": 1.9690229062729863e-05, "loss": 0.9102, "step": 1400 }, { "epoch": 0.2143840856924254, "grad_norm": 2.7856149622579003, "learning_rate": 1.968961672617221e-05, "loss": 0.7951, "step": 1401 }, { "epoch": 0.2145371078806427, "grad_norm": 2.5890160262046766, "learning_rate": 1.9689003794537286e-05, "loss": 0.8368, "step": 1402 }, { "epoch": 0.21469013006885998, "grad_norm": 2.5397138932158865, "learning_rate": 1.9688390267862737e-05, "loss": 0.8831, "step": 1403 }, { "epoch": 0.21484315225707729, "grad_norm": 2.5658431838731195, "learning_rate": 1.968777614618624e-05, "loss": 0.8381, "step": 1404 }, { "epoch": 0.21499617444529456, "grad_norm": 2.551108162079543, "learning_rate": 1.9687161429545512e-05, "loss": 0.9045, "step": 1405 }, { "epoch": 0.21514919663351187, "grad_norm": 2.4869151629476436, "learning_rate": 1.96865461179783e-05, "loss": 0.7808, "step": 1406 }, { "epoch": 0.21530221882172915, "grad_norm": 2.733337979301424, "learning_rate": 1.96859302115224e-05, "loss": 0.9888, "step": 1407 }, { "epoch": 0.21545524100994645, "grad_norm": 2.6573788126722486, "learning_rate": 1.9685313710215634e-05, "loss": 0.8592, "step": 1408 }, { "epoch": 0.21560826319816373, "grad_norm": 2.4780481991806367, "learning_rate": 1.9684696614095865e-05, "loss": 0.7615, "step": 1409 }, { "epoch": 0.21576128538638104, "grad_norm": 2.8468670949990713, "learning_rate": 1.968407892320099e-05, "loss": 0.8195, "step": 1410 }, { "epoch": 0.2159143075745983, "grad_norm": 2.542501474269545, "learning_rate": 1.968346063756894e-05, "loss": 0.7449, "step": 1411 }, { "epoch": 0.21606732976281562, "grad_norm": 2.5323321906232006, "learning_rate": 1.9682841757237693e-05, "loss": 0.8259, "step": 1412 }, { "epoch": 0.2162203519510329, "grad_norm": 2.6822987672510616, "learning_rate": 1.9682222282245255e-05, "loss": 0.9143, "step": 1413 }, { "epoch": 0.2163733741392502, "grad_norm": 2.661368983707966, "learning_rate": 1.9681602212629668e-05, "loss": 0.8521, "step": 1414 }, { "epoch": 0.21652639632746748, "grad_norm": 2.4421559076683925, "learning_rate": 1.9680981548429017e-05, "loss": 0.7393, "step": 1415 }, { "epoch": 0.21667941851568479, "grad_norm": 2.7182110574776375, "learning_rate": 1.9680360289681415e-05, "loss": 0.8152, "step": 1416 }, { "epoch": 0.21683244070390206, "grad_norm": 2.7587215745106426, "learning_rate": 1.967973843642502e-05, "loss": 0.9543, "step": 1417 }, { "epoch": 0.21698546289211937, "grad_norm": 2.443920441762142, "learning_rate": 1.967911598869802e-05, "loss": 0.8022, "step": 1418 }, { "epoch": 0.21713848508033665, "grad_norm": 2.770489472508194, "learning_rate": 1.967849294653864e-05, "loss": 0.8629, "step": 1419 }, { "epoch": 0.21729150726855395, "grad_norm": 2.6168180326566186, "learning_rate": 1.9677869309985146e-05, "loss": 0.8642, "step": 1420 }, { "epoch": 0.21744452945677123, "grad_norm": 2.752417489975351, "learning_rate": 1.9677245079075837e-05, "loss": 0.789, "step": 1421 }, { "epoch": 0.21759755164498853, "grad_norm": 2.683755581101207, "learning_rate": 1.967662025384905e-05, "loss": 0.9186, "step": 1422 }, { "epoch": 0.2177505738332058, "grad_norm": 2.804693835725841, "learning_rate": 1.967599483434316e-05, "loss": 0.9001, "step": 1423 }, { "epoch": 0.21790359602142312, "grad_norm": 3.135183658906024, "learning_rate": 1.9675368820596575e-05, "loss": 0.9598, "step": 1424 }, { "epoch": 0.2180566182096404, "grad_norm": 3.0719430556931067, "learning_rate": 1.9674742212647738e-05, "loss": 0.8038, "step": 1425 }, { "epoch": 0.2182096403978577, "grad_norm": 2.60160695139502, "learning_rate": 1.9674115010535135e-05, "loss": 0.8428, "step": 1426 }, { "epoch": 0.21836266258607498, "grad_norm": 2.6151319424341897, "learning_rate": 1.9673487214297284e-05, "loss": 0.8505, "step": 1427 }, { "epoch": 0.21851568477429228, "grad_norm": 2.575352044936147, "learning_rate": 1.967285882397274e-05, "loss": 0.826, "step": 1428 }, { "epoch": 0.21866870696250956, "grad_norm": 2.628612019649863, "learning_rate": 1.9672229839600098e-05, "loss": 0.7893, "step": 1429 }, { "epoch": 0.21882172915072687, "grad_norm": 2.512796903702364, "learning_rate": 1.9671600261217978e-05, "loss": 0.7888, "step": 1430 }, { "epoch": 0.21897475133894415, "grad_norm": 2.6086882195208525, "learning_rate": 1.9670970088865052e-05, "loss": 0.794, "step": 1431 }, { "epoch": 0.21912777352716145, "grad_norm": 2.9600265005459225, "learning_rate": 1.9670339322580023e-05, "loss": 0.932, "step": 1432 }, { "epoch": 0.21928079571537873, "grad_norm": 2.366110521394664, "learning_rate": 1.966970796240162e-05, "loss": 0.885, "step": 1433 }, { "epoch": 0.21943381790359603, "grad_norm": 2.406125948360677, "learning_rate": 1.966907600836863e-05, "loss": 0.8062, "step": 1434 }, { "epoch": 0.2195868400918133, "grad_norm": 2.735448209351719, "learning_rate": 1.9668443460519854e-05, "loss": 0.9436, "step": 1435 }, { "epoch": 0.21973986228003062, "grad_norm": 2.468087419582429, "learning_rate": 1.966781031889414e-05, "loss": 0.8398, "step": 1436 }, { "epoch": 0.2198928844682479, "grad_norm": 2.4874153722433316, "learning_rate": 1.9667176583530377e-05, "loss": 0.8523, "step": 1437 }, { "epoch": 0.2200459066564652, "grad_norm": 2.6445890152061917, "learning_rate": 1.9666542254467478e-05, "loss": 1.0136, "step": 1438 }, { "epoch": 0.22019892884468248, "grad_norm": 2.607525745722285, "learning_rate": 1.9665907331744404e-05, "loss": 0.8658, "step": 1439 }, { "epoch": 0.22035195103289976, "grad_norm": 2.7498225059483348, "learning_rate": 1.966527181540015e-05, "loss": 0.8795, "step": 1440 }, { "epoch": 0.22050497322111706, "grad_norm": 2.449442723272099, "learning_rate": 1.9664635705473745e-05, "loss": 0.7556, "step": 1441 }, { "epoch": 0.22065799540933434, "grad_norm": 2.5182627516208265, "learning_rate": 1.9663999002004247e-05, "loss": 0.9188, "step": 1442 }, { "epoch": 0.22081101759755165, "grad_norm": 3.0815320780904214, "learning_rate": 1.966336170503077e-05, "loss": 0.8873, "step": 1443 }, { "epoch": 0.22096403978576892, "grad_norm": 2.1521962301015205, "learning_rate": 1.9662723814592443e-05, "loss": 0.6666, "step": 1444 }, { "epoch": 0.22111706197398623, "grad_norm": 2.617498957904519, "learning_rate": 1.9662085330728448e-05, "loss": 0.8849, "step": 1445 }, { "epoch": 0.2212700841622035, "grad_norm": 2.6878494492762717, "learning_rate": 1.9661446253477995e-05, "loss": 0.8731, "step": 1446 }, { "epoch": 0.2214231063504208, "grad_norm": 2.4562980436730086, "learning_rate": 1.966080658288033e-05, "loss": 0.7922, "step": 1447 }, { "epoch": 0.2215761285386381, "grad_norm": 2.4564471373976278, "learning_rate": 1.966016631897474e-05, "loss": 0.8254, "step": 1448 }, { "epoch": 0.2217291507268554, "grad_norm": 2.649776130638235, "learning_rate": 1.9659525461800546e-05, "loss": 0.9063, "step": 1449 }, { "epoch": 0.22188217291507267, "grad_norm": 2.2959740420216237, "learning_rate": 1.9658884011397105e-05, "loss": 0.7877, "step": 1450 }, { "epoch": 0.22203519510328998, "grad_norm": 2.5813619059244166, "learning_rate": 1.965824196780381e-05, "loss": 0.8595, "step": 1451 }, { "epoch": 0.22218821729150726, "grad_norm": 2.584172187703942, "learning_rate": 1.9657599331060097e-05, "loss": 0.7845, "step": 1452 }, { "epoch": 0.22234123947972456, "grad_norm": 2.7062610966861564, "learning_rate": 1.9656956101205426e-05, "loss": 0.8721, "step": 1453 }, { "epoch": 0.22249426166794184, "grad_norm": 2.374713272758218, "learning_rate": 1.9656312278279298e-05, "loss": 0.6398, "step": 1454 }, { "epoch": 0.22264728385615914, "grad_norm": 2.562393686891158, "learning_rate": 1.965566786232126e-05, "loss": 0.8596, "step": 1455 }, { "epoch": 0.22280030604437642, "grad_norm": 2.2857421843137518, "learning_rate": 1.9655022853370886e-05, "loss": 0.7233, "step": 1456 }, { "epoch": 0.22295332823259373, "grad_norm": 2.364375523326793, "learning_rate": 1.965437725146779e-05, "loss": 0.777, "step": 1457 }, { "epoch": 0.223106350420811, "grad_norm": 2.326524319190945, "learning_rate": 1.9653731056651616e-05, "loss": 0.7536, "step": 1458 }, { "epoch": 0.2232593726090283, "grad_norm": 2.417153715891703, "learning_rate": 1.9653084268962054e-05, "loss": 0.7981, "step": 1459 }, { "epoch": 0.2234123947972456, "grad_norm": 2.5776287610942306, "learning_rate": 1.965243688843882e-05, "loss": 0.7941, "step": 1460 }, { "epoch": 0.2235654169854629, "grad_norm": 2.392772961147377, "learning_rate": 1.965178891512168e-05, "loss": 0.9104, "step": 1461 }, { "epoch": 0.22371843917368017, "grad_norm": 2.487273411363196, "learning_rate": 1.9651140349050428e-05, "loss": 0.7555, "step": 1462 }, { "epoch": 0.22387146136189748, "grad_norm": 2.7143668921668245, "learning_rate": 1.9650491190264887e-05, "loss": 0.8837, "step": 1463 }, { "epoch": 0.22402448355011476, "grad_norm": 2.734873235580024, "learning_rate": 1.964984143880493e-05, "loss": 0.815, "step": 1464 }, { "epoch": 0.22417750573833206, "grad_norm": 2.928990160124733, "learning_rate": 1.9649191094710457e-05, "loss": 0.8404, "step": 1465 }, { "epoch": 0.22433052792654934, "grad_norm": 2.5593116581170583, "learning_rate": 1.9648540158021416e-05, "loss": 0.7353, "step": 1466 }, { "epoch": 0.22448355011476664, "grad_norm": 2.6266572490060804, "learning_rate": 1.9647888628777777e-05, "loss": 0.9329, "step": 1467 }, { "epoch": 0.22463657230298392, "grad_norm": 2.8704971084818878, "learning_rate": 1.9647236507019552e-05, "loss": 0.9432, "step": 1468 }, { "epoch": 0.22478959449120123, "grad_norm": 2.590453922700811, "learning_rate": 1.9646583792786795e-05, "loss": 0.7691, "step": 1469 }, { "epoch": 0.2249426166794185, "grad_norm": 3.1415820506912624, "learning_rate": 1.9645930486119588e-05, "loss": 1.0224, "step": 1470 }, { "epoch": 0.2250956388676358, "grad_norm": 2.5309118552802836, "learning_rate": 1.964527658705806e-05, "loss": 0.886, "step": 1471 }, { "epoch": 0.2252486610558531, "grad_norm": 2.5972158241040657, "learning_rate": 1.9644622095642358e-05, "loss": 0.6973, "step": 1472 }, { "epoch": 0.2254016832440704, "grad_norm": 2.5546268806279455, "learning_rate": 1.9643967011912685e-05, "loss": 0.8449, "step": 1473 }, { "epoch": 0.22555470543228767, "grad_norm": 2.3479011681317314, "learning_rate": 1.964331133590927e-05, "loss": 0.8669, "step": 1474 }, { "epoch": 0.22570772762050498, "grad_norm": 2.515862803842189, "learning_rate": 1.9642655067672384e-05, "loss": 0.8085, "step": 1475 }, { "epoch": 0.22586074980872226, "grad_norm": 2.3668911776660755, "learning_rate": 1.9641998207242325e-05, "loss": 0.7926, "step": 1476 }, { "epoch": 0.22601377199693956, "grad_norm": 2.280772898900187, "learning_rate": 1.9641340754659438e-05, "loss": 0.5904, "step": 1477 }, { "epoch": 0.22616679418515684, "grad_norm": 2.465742578131, "learning_rate": 1.9640682709964098e-05, "loss": 0.7349, "step": 1478 }, { "epoch": 0.22631981637337414, "grad_norm": 2.530176243255086, "learning_rate": 1.964002407319672e-05, "loss": 0.8513, "step": 1479 }, { "epoch": 0.22647283856159142, "grad_norm": 2.5405734648853873, "learning_rate": 1.9639364844397745e-05, "loss": 0.9168, "step": 1480 }, { "epoch": 0.22662586074980873, "grad_norm": 2.8804862950288337, "learning_rate": 1.9638705023607668e-05, "loss": 0.8588, "step": 1481 }, { "epoch": 0.226778882938026, "grad_norm": 2.260546018171997, "learning_rate": 1.9638044610867013e-05, "loss": 0.7765, "step": 1482 }, { "epoch": 0.2269319051262433, "grad_norm": 2.536491527405855, "learning_rate": 1.963738360621633e-05, "loss": 0.764, "step": 1483 }, { "epoch": 0.2270849273144606, "grad_norm": 2.0029390016703763, "learning_rate": 1.963672200969622e-05, "loss": 0.7933, "step": 1484 }, { "epoch": 0.2272379495026779, "grad_norm": 2.681926900943292, "learning_rate": 1.9636059821347315e-05, "loss": 0.8794, "step": 1485 }, { "epoch": 0.22739097169089517, "grad_norm": 2.460030338333229, "learning_rate": 1.9635397041210274e-05, "loss": 0.8535, "step": 1486 }, { "epoch": 0.22754399387911248, "grad_norm": 2.666549304410844, "learning_rate": 1.963473366932581e-05, "loss": 0.7369, "step": 1487 }, { "epoch": 0.22769701606732975, "grad_norm": 2.5057662138797676, "learning_rate": 1.9634069705734662e-05, "loss": 0.8288, "step": 1488 }, { "epoch": 0.22785003825554706, "grad_norm": 2.5683484789985256, "learning_rate": 1.9633405150477603e-05, "loss": 0.7962, "step": 1489 }, { "epoch": 0.22800306044376434, "grad_norm": 3.0952874862064816, "learning_rate": 1.9632740003595447e-05, "loss": 0.879, "step": 1490 }, { "epoch": 0.22815608263198164, "grad_norm": 2.8884222769603065, "learning_rate": 1.9632074265129044e-05, "loss": 0.7994, "step": 1491 }, { "epoch": 0.22830910482019892, "grad_norm": 2.735873639194007, "learning_rate": 1.963140793511928e-05, "loss": 0.8619, "step": 1492 }, { "epoch": 0.22846212700841623, "grad_norm": 2.631822280768736, "learning_rate": 1.9630741013607077e-05, "loss": 0.8749, "step": 1493 }, { "epoch": 0.2286151491966335, "grad_norm": 2.577737303029769, "learning_rate": 1.9630073500633392e-05, "loss": 0.8527, "step": 1494 }, { "epoch": 0.2287681713848508, "grad_norm": 2.852657029630802, "learning_rate": 1.962940539623922e-05, "loss": 0.9213, "step": 1495 }, { "epoch": 0.2289211935730681, "grad_norm": 2.5092520712765447, "learning_rate": 1.962873670046559e-05, "loss": 0.7863, "step": 1496 }, { "epoch": 0.2290742157612854, "grad_norm": 3.0271335789574065, "learning_rate": 1.962806741335357e-05, "loss": 0.8167, "step": 1497 }, { "epoch": 0.22922723794950267, "grad_norm": 2.671356145739248, "learning_rate": 1.962739753494427e-05, "loss": 0.9197, "step": 1498 }, { "epoch": 0.22938026013771998, "grad_norm": 2.6843150913405354, "learning_rate": 1.9626727065278827e-05, "loss": 0.9332, "step": 1499 }, { "epoch": 0.22953328232593725, "grad_norm": 2.5279817214021385, "learning_rate": 1.962605600439841e-05, "loss": 0.8936, "step": 1500 }, { "epoch": 0.22968630451415456, "grad_norm": 2.825279808824693, "learning_rate": 1.9625384352344235e-05, "loss": 0.8902, "step": 1501 }, { "epoch": 0.22983932670237184, "grad_norm": 2.592876328071922, "learning_rate": 1.9624712109157554e-05, "loss": 0.9093, "step": 1502 }, { "epoch": 0.22999234889058914, "grad_norm": 2.328063927339844, "learning_rate": 1.9624039274879654e-05, "loss": 0.7551, "step": 1503 }, { "epoch": 0.23014537107880642, "grad_norm": 2.597131304137858, "learning_rate": 1.962336584955185e-05, "loss": 0.8054, "step": 1504 }, { "epoch": 0.23029839326702373, "grad_norm": 2.561933334091904, "learning_rate": 1.9622691833215502e-05, "loss": 0.8699, "step": 1505 }, { "epoch": 0.230451415455241, "grad_norm": 2.6185390256998904, "learning_rate": 1.9622017225912e-05, "loss": 0.882, "step": 1506 }, { "epoch": 0.2306044376434583, "grad_norm": 2.4996637324030844, "learning_rate": 1.9621342027682785e-05, "loss": 0.7593, "step": 1507 }, { "epoch": 0.2307574598316756, "grad_norm": 2.337279426626155, "learning_rate": 1.9620666238569317e-05, "loss": 0.7016, "step": 1508 }, { "epoch": 0.2309104820198929, "grad_norm": 2.644195150141663, "learning_rate": 1.96199898586131e-05, "loss": 0.8604, "step": 1509 }, { "epoch": 0.23106350420811017, "grad_norm": 2.6142675375463624, "learning_rate": 1.9619312887855666e-05, "loss": 0.8094, "step": 1510 }, { "epoch": 0.23121652639632748, "grad_norm": 2.331606054900535, "learning_rate": 1.96186353263386e-05, "loss": 0.7867, "step": 1511 }, { "epoch": 0.23136954858454475, "grad_norm": 2.8201130356617057, "learning_rate": 1.9617957174103512e-05, "loss": 0.9134, "step": 1512 }, { "epoch": 0.23152257077276206, "grad_norm": 3.166099027860909, "learning_rate": 1.9617278431192045e-05, "loss": 0.7511, "step": 1513 }, { "epoch": 0.23167559296097934, "grad_norm": 2.5832948593251595, "learning_rate": 1.961659909764589e-05, "loss": 0.6853, "step": 1514 }, { "epoch": 0.23182861514919664, "grad_norm": 3.0072774469631534, "learning_rate": 1.9615919173506763e-05, "loss": 0.8164, "step": 1515 }, { "epoch": 0.23198163733741392, "grad_norm": 2.5885075319239084, "learning_rate": 1.961523865881642e-05, "loss": 0.8215, "step": 1516 }, { "epoch": 0.23213465952563123, "grad_norm": 2.5422510418493127, "learning_rate": 1.9614557553616658e-05, "loss": 0.8625, "step": 1517 }, { "epoch": 0.2322876817138485, "grad_norm": 2.994853277911608, "learning_rate": 1.96138758579493e-05, "loss": 1.0072, "step": 1518 }, { "epoch": 0.2324407039020658, "grad_norm": 3.8292304341773473, "learning_rate": 1.961319357185622e-05, "loss": 0.8986, "step": 1519 }, { "epoch": 0.2325937260902831, "grad_norm": 2.4955902381832273, "learning_rate": 1.9612510695379318e-05, "loss": 0.8552, "step": 1520 }, { "epoch": 0.2327467482785004, "grad_norm": 2.5365745905944976, "learning_rate": 1.9611827228560526e-05, "loss": 0.833, "step": 1521 }, { "epoch": 0.23289977046671767, "grad_norm": 2.5287596898930675, "learning_rate": 1.9611143171441824e-05, "loss": 0.7321, "step": 1522 }, { "epoch": 0.23305279265493498, "grad_norm": 2.5755887252276604, "learning_rate": 1.961045852406522e-05, "loss": 0.8743, "step": 1523 }, { "epoch": 0.23320581484315225, "grad_norm": 2.6042826275761564, "learning_rate": 1.960977328647276e-05, "loss": 0.9544, "step": 1524 }, { "epoch": 0.23335883703136956, "grad_norm": 2.622514539233016, "learning_rate": 1.9609087458706528e-05, "loss": 0.9218, "step": 1525 }, { "epoch": 0.23351185921958684, "grad_norm": 2.4768249685418735, "learning_rate": 1.9608401040808648e-05, "loss": 0.8509, "step": 1526 }, { "epoch": 0.23366488140780414, "grad_norm": 2.5099475586849036, "learning_rate": 1.960771403282127e-05, "loss": 0.7543, "step": 1527 }, { "epoch": 0.23381790359602142, "grad_norm": 2.4341180953680035, "learning_rate": 1.9607026434786587e-05, "loss": 0.7317, "step": 1528 }, { "epoch": 0.23397092578423873, "grad_norm": 2.722094841689053, "learning_rate": 1.960633824674683e-05, "loss": 0.8697, "step": 1529 }, { "epoch": 0.234123947972456, "grad_norm": 2.8260615483151588, "learning_rate": 1.960564946874426e-05, "loss": 0.8851, "step": 1530 }, { "epoch": 0.2342769701606733, "grad_norm": 2.5417326953835824, "learning_rate": 1.960496010082118e-05, "loss": 0.8586, "step": 1531 }, { "epoch": 0.2344299923488906, "grad_norm": 2.5826334101465944, "learning_rate": 1.9604270143019923e-05, "loss": 0.8883, "step": 1532 }, { "epoch": 0.2345830145371079, "grad_norm": 2.678630361021353, "learning_rate": 1.9603579595382866e-05, "loss": 0.8858, "step": 1533 }, { "epoch": 0.23473603672532517, "grad_norm": 2.5681223877610346, "learning_rate": 1.9602888457952414e-05, "loss": 0.8861, "step": 1534 }, { "epoch": 0.23488905891354248, "grad_norm": 2.556048444081866, "learning_rate": 1.9602196730771017e-05, "loss": 0.9105, "step": 1535 }, { "epoch": 0.23504208110175975, "grad_norm": 2.2656890021267966, "learning_rate": 1.9601504413881155e-05, "loss": 0.8354, "step": 1536 }, { "epoch": 0.23519510328997706, "grad_norm": 2.6891425941651375, "learning_rate": 1.9600811507325346e-05, "loss": 0.7968, "step": 1537 }, { "epoch": 0.23534812547819434, "grad_norm": 2.6911301647078805, "learning_rate": 1.9600118011146142e-05, "loss": 0.8348, "step": 1538 }, { "epoch": 0.23550114766641164, "grad_norm": 2.9627416505056092, "learning_rate": 1.9599423925386135e-05, "loss": 0.8664, "step": 1539 }, { "epoch": 0.23565416985462892, "grad_norm": 2.5888079738464387, "learning_rate": 1.9598729250087953e-05, "loss": 0.834, "step": 1540 }, { "epoch": 0.23580719204284623, "grad_norm": 2.8789926115688007, "learning_rate": 1.9598033985294262e-05, "loss": 0.9464, "step": 1541 }, { "epoch": 0.2359602142310635, "grad_norm": 2.575145545960676, "learning_rate": 1.9597338131047747e-05, "loss": 0.8041, "step": 1542 }, { "epoch": 0.2361132364192808, "grad_norm": 2.4750916407006516, "learning_rate": 1.9596641687391157e-05, "loss": 0.938, "step": 1543 }, { "epoch": 0.2362662586074981, "grad_norm": 2.3274446744899477, "learning_rate": 1.9595944654367257e-05, "loss": 0.8365, "step": 1544 }, { "epoch": 0.2364192807957154, "grad_norm": 2.474251844393818, "learning_rate": 1.959524703201886e-05, "loss": 0.8646, "step": 1545 }, { "epoch": 0.23657230298393267, "grad_norm": 2.5147567170404153, "learning_rate": 1.9594548820388802e-05, "loss": 0.8311, "step": 1546 }, { "epoch": 0.23672532517214998, "grad_norm": 2.5166123534386813, "learning_rate": 1.959385001951997e-05, "loss": 0.8027, "step": 1547 }, { "epoch": 0.23687834736036725, "grad_norm": 2.740847576540663, "learning_rate": 1.9593150629455276e-05, "loss": 0.953, "step": 1548 }, { "epoch": 0.23703136954858456, "grad_norm": 2.3306374527547735, "learning_rate": 1.9592450650237672e-05, "loss": 0.8136, "step": 1549 }, { "epoch": 0.23718439173680184, "grad_norm": 2.777170941620961, "learning_rate": 1.9591750081910147e-05, "loss": 0.8697, "step": 1550 }, { "epoch": 0.23733741392501911, "grad_norm": 2.5805389495767574, "learning_rate": 1.9591048924515727e-05, "loss": 0.8805, "step": 1551 }, { "epoch": 0.23749043611323642, "grad_norm": 2.514446714606269, "learning_rate": 1.9590347178097472e-05, "loss": 0.763, "step": 1552 }, { "epoch": 0.2376434583014537, "grad_norm": 2.8092464729269424, "learning_rate": 1.9589644842698483e-05, "loss": 0.8816, "step": 1553 }, { "epoch": 0.237796480489671, "grad_norm": 2.483884600727865, "learning_rate": 1.9588941918361887e-05, "loss": 0.8546, "step": 1554 }, { "epoch": 0.23794950267788828, "grad_norm": 2.474736699712295, "learning_rate": 1.958823840513085e-05, "loss": 0.8574, "step": 1555 }, { "epoch": 0.23810252486610559, "grad_norm": 2.4571375008471383, "learning_rate": 1.9587534303048593e-05, "loss": 0.8287, "step": 1556 }, { "epoch": 0.23825554705432286, "grad_norm": 2.3407274090716546, "learning_rate": 1.9586829612158344e-05, "loss": 0.6982, "step": 1557 }, { "epoch": 0.23840856924254017, "grad_norm": 2.377616391628943, "learning_rate": 1.958612433250338e-05, "loss": 0.8394, "step": 1558 }, { "epoch": 0.23856159143075745, "grad_norm": 2.5486874556181407, "learning_rate": 1.9585418464127024e-05, "loss": 0.761, "step": 1559 }, { "epoch": 0.23871461361897475, "grad_norm": 2.766035827712233, "learning_rate": 1.958471200707262e-05, "loss": 0.9256, "step": 1560 }, { "epoch": 0.23886763580719203, "grad_norm": 2.7270798479698035, "learning_rate": 1.958400496138356e-05, "loss": 0.8441, "step": 1561 }, { "epoch": 0.23902065799540934, "grad_norm": 2.5294552861595556, "learning_rate": 1.9583297327103258e-05, "loss": 0.8277, "step": 1562 }, { "epoch": 0.2391736801836266, "grad_norm": 2.5286458879300002, "learning_rate": 1.9582589104275178e-05, "loss": 0.9408, "step": 1563 }, { "epoch": 0.23932670237184392, "grad_norm": 2.290446335806122, "learning_rate": 1.9581880292942813e-05, "loss": 0.7325, "step": 1564 }, { "epoch": 0.2394797245600612, "grad_norm": 2.542319946851815, "learning_rate": 1.9581170893149696e-05, "loss": 0.8568, "step": 1565 }, { "epoch": 0.2396327467482785, "grad_norm": 2.6996924947494594, "learning_rate": 1.9580460904939393e-05, "loss": 0.8699, "step": 1566 }, { "epoch": 0.23978576893649578, "grad_norm": 2.7752847934882894, "learning_rate": 1.9579750328355505e-05, "loss": 0.8267, "step": 1567 }, { "epoch": 0.23993879112471309, "grad_norm": 2.564510429754208, "learning_rate": 1.957903916344168e-05, "loss": 0.8308, "step": 1568 }, { "epoch": 0.24009181331293036, "grad_norm": 2.425102215801101, "learning_rate": 1.957832741024158e-05, "loss": 0.806, "step": 1569 }, { "epoch": 0.24024483550114767, "grad_norm": 2.6676736135165364, "learning_rate": 1.957761506879892e-05, "loss": 0.8601, "step": 1570 }, { "epoch": 0.24039785768936495, "grad_norm": 2.4198971288915163, "learning_rate": 1.9576902139157455e-05, "loss": 0.77, "step": 1571 }, { "epoch": 0.24055087987758225, "grad_norm": 2.2966349523292307, "learning_rate": 1.9576188621360966e-05, "loss": 0.7461, "step": 1572 }, { "epoch": 0.24070390206579953, "grad_norm": 2.6079230848347956, "learning_rate": 1.957547451545327e-05, "loss": 0.8364, "step": 1573 }, { "epoch": 0.24085692425401684, "grad_norm": 2.758369012562051, "learning_rate": 1.9574759821478223e-05, "loss": 0.9007, "step": 1574 }, { "epoch": 0.2410099464422341, "grad_norm": 2.3613131551646673, "learning_rate": 1.9574044539479722e-05, "loss": 0.8054, "step": 1575 }, { "epoch": 0.24116296863045142, "grad_norm": 2.464175192909486, "learning_rate": 1.9573328669501693e-05, "loss": 0.7988, "step": 1576 }, { "epoch": 0.2413159908186687, "grad_norm": 2.8590820543841837, "learning_rate": 1.9572612211588094e-05, "loss": 0.7991, "step": 1577 }, { "epoch": 0.241469013006886, "grad_norm": 2.6513303722544386, "learning_rate": 1.9571895165782933e-05, "loss": 0.8747, "step": 1578 }, { "epoch": 0.24162203519510328, "grad_norm": 2.6797342921700555, "learning_rate": 1.9571177532130245e-05, "loss": 0.9601, "step": 1579 }, { "epoch": 0.24177505738332059, "grad_norm": 2.656625618019867, "learning_rate": 1.9570459310674104e-05, "loss": 0.9296, "step": 1580 }, { "epoch": 0.24192807957153786, "grad_norm": 2.476973262298971, "learning_rate": 1.9569740501458615e-05, "loss": 0.8351, "step": 1581 }, { "epoch": 0.24208110175975517, "grad_norm": 2.6908872499133283, "learning_rate": 1.9569021104527924e-05, "loss": 0.9745, "step": 1582 }, { "epoch": 0.24223412394797245, "grad_norm": 2.400852771610583, "learning_rate": 1.956830111992621e-05, "loss": 0.8625, "step": 1583 }, { "epoch": 0.24238714613618975, "grad_norm": 2.7311721684322574, "learning_rate": 1.95675805476977e-05, "loss": 0.8294, "step": 1584 }, { "epoch": 0.24254016832440703, "grad_norm": 3.031322195765501, "learning_rate": 1.9566859387886638e-05, "loss": 0.8948, "step": 1585 }, { "epoch": 0.24269319051262433, "grad_norm": 2.386831353760463, "learning_rate": 1.956613764053731e-05, "loss": 0.8555, "step": 1586 }, { "epoch": 0.2428462127008416, "grad_norm": 2.5958286967053223, "learning_rate": 1.956541530569405e-05, "loss": 0.8871, "step": 1587 }, { "epoch": 0.24299923488905892, "grad_norm": 2.351972886279123, "learning_rate": 1.9564692383401218e-05, "loss": 0.7892, "step": 1588 }, { "epoch": 0.2431522570772762, "grad_norm": 2.563518227284029, "learning_rate": 1.9563968873703206e-05, "loss": 0.8589, "step": 1589 }, { "epoch": 0.2433052792654935, "grad_norm": 2.7469234929921242, "learning_rate": 1.956324477664445e-05, "loss": 0.9476, "step": 1590 }, { "epoch": 0.24345830145371078, "grad_norm": 2.5793234210805354, "learning_rate": 1.9562520092269426e-05, "loss": 0.9077, "step": 1591 }, { "epoch": 0.24361132364192808, "grad_norm": 2.487999582227551, "learning_rate": 1.956179482062263e-05, "loss": 0.8971, "step": 1592 }, { "epoch": 0.24376434583014536, "grad_norm": 2.5411762234948734, "learning_rate": 1.9561068961748612e-05, "loss": 0.7937, "step": 1593 }, { "epoch": 0.24391736801836267, "grad_norm": 2.7545988237977275, "learning_rate": 1.9560342515691942e-05, "loss": 0.9105, "step": 1594 }, { "epoch": 0.24407039020657995, "grad_norm": 2.338750092973846, "learning_rate": 1.9559615482497237e-05, "loss": 0.8024, "step": 1595 }, { "epoch": 0.24422341239479725, "grad_norm": 2.5892173696364145, "learning_rate": 1.9558887862209153e-05, "loss": 0.9056, "step": 1596 }, { "epoch": 0.24437643458301453, "grad_norm": 2.779375109330424, "learning_rate": 1.955815965487237e-05, "loss": 0.8351, "step": 1597 }, { "epoch": 0.24452945677123183, "grad_norm": 2.85621778641634, "learning_rate": 1.9557430860531605e-05, "loss": 0.8458, "step": 1598 }, { "epoch": 0.2446824789594491, "grad_norm": 3.0230061074447794, "learning_rate": 1.955670147923163e-05, "loss": 0.8472, "step": 1599 }, { "epoch": 0.24483550114766642, "grad_norm": 2.81465165939412, "learning_rate": 1.9555971511017225e-05, "loss": 0.8206, "step": 1600 }, { "epoch": 0.2449885233358837, "grad_norm": 2.4138314641623984, "learning_rate": 1.9555240955933227e-05, "loss": 0.8138, "step": 1601 }, { "epoch": 0.245141545524101, "grad_norm": 2.8178514221242126, "learning_rate": 1.9554509814024502e-05, "loss": 0.9016, "step": 1602 }, { "epoch": 0.24529456771231828, "grad_norm": 2.5742380441275796, "learning_rate": 1.9553778085335953e-05, "loss": 0.8801, "step": 1603 }, { "epoch": 0.24544758990053558, "grad_norm": 2.2493911411311296, "learning_rate": 1.955304576991252e-05, "loss": 0.7488, "step": 1604 }, { "epoch": 0.24560061208875286, "grad_norm": 2.618681068118831, "learning_rate": 1.9552312867799168e-05, "loss": 0.8048, "step": 1605 }, { "epoch": 0.24575363427697017, "grad_norm": 2.882616971639018, "learning_rate": 1.955157937904092e-05, "loss": 0.9131, "step": 1606 }, { "epoch": 0.24590665646518745, "grad_norm": 2.5178465278639792, "learning_rate": 1.955084530368281e-05, "loss": 0.8579, "step": 1607 }, { "epoch": 0.24605967865340475, "grad_norm": 2.760529263401625, "learning_rate": 1.955011064176993e-05, "loss": 0.8543, "step": 1608 }, { "epoch": 0.24621270084162203, "grad_norm": 2.6012155141285143, "learning_rate": 1.9549375393347397e-05, "loss": 1.1095, "step": 1609 }, { "epoch": 0.24636572302983933, "grad_norm": 2.379103223108997, "learning_rate": 1.9548639558460363e-05, "loss": 0.7103, "step": 1610 }, { "epoch": 0.2465187452180566, "grad_norm": 2.4002591433736105, "learning_rate": 1.954790313715402e-05, "loss": 0.8428, "step": 1611 }, { "epoch": 0.24667176740627392, "grad_norm": 2.4612350390198134, "learning_rate": 1.9547166129473592e-05, "loss": 0.8282, "step": 1612 }, { "epoch": 0.2468247895944912, "grad_norm": 2.7307204781232044, "learning_rate": 1.9546428535464342e-05, "loss": 0.8591, "step": 1613 }, { "epoch": 0.2469778117827085, "grad_norm": 2.4852927020406375, "learning_rate": 1.954569035517157e-05, "loss": 0.8924, "step": 1614 }, { "epoch": 0.24713083397092578, "grad_norm": 2.4986642056279673, "learning_rate": 1.9544951588640613e-05, "loss": 0.8122, "step": 1615 }, { "epoch": 0.24728385615914308, "grad_norm": 2.4127278623822526, "learning_rate": 1.954421223591684e-05, "loss": 0.6704, "step": 1616 }, { "epoch": 0.24743687834736036, "grad_norm": 2.4815806633333413, "learning_rate": 1.9543472297045652e-05, "loss": 0.9251, "step": 1617 }, { "epoch": 0.24758990053557767, "grad_norm": 2.5451951843300957, "learning_rate": 1.95427317720725e-05, "loss": 0.77, "step": 1618 }, { "epoch": 0.24774292272379494, "grad_norm": 2.6155792651075465, "learning_rate": 1.9541990661042855e-05, "loss": 0.7563, "step": 1619 }, { "epoch": 0.24789594491201225, "grad_norm": 2.6364533209981116, "learning_rate": 1.9541248964002237e-05, "loss": 0.7738, "step": 1620 }, { "epoch": 0.24804896710022953, "grad_norm": 2.4555048705871365, "learning_rate": 1.9540506680996194e-05, "loss": 0.844, "step": 1621 }, { "epoch": 0.24820198928844683, "grad_norm": 2.5501946462827707, "learning_rate": 1.9539763812070315e-05, "loss": 0.8376, "step": 1622 }, { "epoch": 0.2483550114766641, "grad_norm": 2.393788755143763, "learning_rate": 1.953902035727022e-05, "loss": 0.8484, "step": 1623 }, { "epoch": 0.24850803366488142, "grad_norm": 2.7383748703121813, "learning_rate": 1.9538276316641563e-05, "loss": 0.8219, "step": 1624 }, { "epoch": 0.2486610558530987, "grad_norm": 2.7572133382018844, "learning_rate": 1.9537531690230047e-05, "loss": 0.8975, "step": 1625 }, { "epoch": 0.248814078041316, "grad_norm": 2.515487334192462, "learning_rate": 1.95367864780814e-05, "loss": 0.8214, "step": 1626 }, { "epoch": 0.24896710022953328, "grad_norm": 2.48662376579883, "learning_rate": 1.9536040680241386e-05, "loss": 0.8922, "step": 1627 }, { "epoch": 0.24912012241775058, "grad_norm": 2.9232465851956424, "learning_rate": 1.953529429675581e-05, "loss": 0.9557, "step": 1628 }, { "epoch": 0.24927314460596786, "grad_norm": 2.6749171268509278, "learning_rate": 1.9534547327670507e-05, "loss": 0.8247, "step": 1629 }, { "epoch": 0.24942616679418517, "grad_norm": 2.4601486076921932, "learning_rate": 1.9533799773031355e-05, "loss": 0.8779, "step": 1630 }, { "epoch": 0.24957918898240244, "grad_norm": 2.24378715386169, "learning_rate": 1.9533051632884262e-05, "loss": 0.7781, "step": 1631 }, { "epoch": 0.24973221117061975, "grad_norm": 2.2968548749052977, "learning_rate": 1.9532302907275172e-05, "loss": 0.8105, "step": 1632 }, { "epoch": 0.24988523335883703, "grad_norm": 2.5467470529707246, "learning_rate": 1.9531553596250076e-05, "loss": 0.8232, "step": 1633 }, { "epoch": 0.25003825554705433, "grad_norm": 2.7549851558941967, "learning_rate": 1.953080369985498e-05, "loss": 0.866, "step": 1634 }, { "epoch": 0.2501912777352716, "grad_norm": 2.5660121185422793, "learning_rate": 1.9530053218135947e-05, "loss": 0.8282, "step": 1635 }, { "epoch": 0.2503442999234889, "grad_norm": 2.8719794869613264, "learning_rate": 1.9529302151139062e-05, "loss": 0.9236, "step": 1636 }, { "epoch": 0.2504973221117062, "grad_norm": 2.708214993200167, "learning_rate": 1.9528550498910454e-05, "loss": 0.8374, "step": 1637 }, { "epoch": 0.2506503442999235, "grad_norm": 2.604499565659554, "learning_rate": 1.9527798261496283e-05, "loss": 0.9509, "step": 1638 }, { "epoch": 0.2508033664881408, "grad_norm": 2.4786457903057473, "learning_rate": 1.952704543894275e-05, "loss": 0.7756, "step": 1639 }, { "epoch": 0.25095638867635806, "grad_norm": 2.6065249288861603, "learning_rate": 1.9526292031296086e-05, "loss": 0.8183, "step": 1640 }, { "epoch": 0.2511094108645754, "grad_norm": 2.644774458166758, "learning_rate": 1.9525538038602563e-05, "loss": 0.8495, "step": 1641 }, { "epoch": 0.25126243305279267, "grad_norm": 2.7492322691250735, "learning_rate": 1.9524783460908482e-05, "loss": 0.8283, "step": 1642 }, { "epoch": 0.25141545524100994, "grad_norm": 2.3011963876888757, "learning_rate": 1.952402829826019e-05, "loss": 0.7487, "step": 1643 }, { "epoch": 0.2515684774292272, "grad_norm": 2.885621875830341, "learning_rate": 1.952327255070406e-05, "loss": 0.86, "step": 1644 }, { "epoch": 0.25172149961744456, "grad_norm": 2.4666631770259895, "learning_rate": 1.9522516218286508e-05, "loss": 0.7159, "step": 1645 }, { "epoch": 0.25187452180566183, "grad_norm": 2.3549607097582608, "learning_rate": 1.9521759301053983e-05, "loss": 0.8202, "step": 1646 }, { "epoch": 0.2520275439938791, "grad_norm": 2.4507380115808357, "learning_rate": 1.9521001799052967e-05, "loss": 0.8397, "step": 1647 }, { "epoch": 0.2521805661820964, "grad_norm": 2.5911841336571433, "learning_rate": 1.952024371232999e-05, "loss": 0.9088, "step": 1648 }, { "epoch": 0.2523335883703137, "grad_norm": 2.6750521928124256, "learning_rate": 1.9519485040931597e-05, "loss": 0.793, "step": 1649 }, { "epoch": 0.252486610558531, "grad_norm": 2.7834755569688503, "learning_rate": 1.951872578490439e-05, "loss": 0.8295, "step": 1650 }, { "epoch": 0.2526396327467483, "grad_norm": 2.237899404312587, "learning_rate": 1.9517965944295e-05, "loss": 0.8427, "step": 1651 }, { "epoch": 0.25279265493496555, "grad_norm": 2.4852949812861906, "learning_rate": 1.951720551915008e-05, "loss": 0.8564, "step": 1652 }, { "epoch": 0.2529456771231829, "grad_norm": 2.632829816927367, "learning_rate": 1.951644450951634e-05, "loss": 0.7319, "step": 1653 }, { "epoch": 0.25309869931140017, "grad_norm": 2.40826639657792, "learning_rate": 1.951568291544051e-05, "loss": 0.8, "step": 1654 }, { "epoch": 0.25325172149961744, "grad_norm": 2.3396314856198694, "learning_rate": 1.9514920736969374e-05, "loss": 0.8018, "step": 1655 }, { "epoch": 0.2534047436878347, "grad_norm": 2.4346994936885356, "learning_rate": 1.951415797414973e-05, "loss": 0.8905, "step": 1656 }, { "epoch": 0.25355776587605205, "grad_norm": 2.644073064070106, "learning_rate": 1.9513394627028423e-05, "loss": 0.9219, "step": 1657 }, { "epoch": 0.25371078806426933, "grad_norm": 2.4464354186477433, "learning_rate": 1.9512630695652336e-05, "loss": 0.7972, "step": 1658 }, { "epoch": 0.2538638102524866, "grad_norm": 2.590677816544767, "learning_rate": 1.9511866180068385e-05, "loss": 0.7883, "step": 1659 }, { "epoch": 0.2540168324407039, "grad_norm": 2.361634237448809, "learning_rate": 1.9511101080323524e-05, "loss": 0.7745, "step": 1660 }, { "epoch": 0.2541698546289212, "grad_norm": 2.577411232973206, "learning_rate": 1.9510335396464736e-05, "loss": 0.867, "step": 1661 }, { "epoch": 0.2543228768171385, "grad_norm": 2.9471633774424477, "learning_rate": 1.9509569128539048e-05, "loss": 0.902, "step": 1662 }, { "epoch": 0.2544758990053558, "grad_norm": 2.3233960734786345, "learning_rate": 1.9508802276593514e-05, "loss": 0.7731, "step": 1663 }, { "epoch": 0.25462892119357305, "grad_norm": 3.0664701594842296, "learning_rate": 1.9508034840675236e-05, "loss": 0.8958, "step": 1664 }, { "epoch": 0.25478194338179033, "grad_norm": 2.7694989075868914, "learning_rate": 1.9507266820831344e-05, "loss": 0.8821, "step": 1665 }, { "epoch": 0.25493496557000767, "grad_norm": 2.4964849392361006, "learning_rate": 1.9506498217109003e-05, "loss": 0.8569, "step": 1666 }, { "epoch": 0.25508798775822494, "grad_norm": 2.5239295777251534, "learning_rate": 1.9505729029555417e-05, "loss": 0.9162, "step": 1667 }, { "epoch": 0.2552410099464422, "grad_norm": 2.5052913112386426, "learning_rate": 1.9504959258217823e-05, "loss": 0.922, "step": 1668 }, { "epoch": 0.2553940321346595, "grad_norm": 2.36279363557302, "learning_rate": 1.95041889031435e-05, "loss": 0.8044, "step": 1669 }, { "epoch": 0.25554705432287683, "grad_norm": 2.349428630092639, "learning_rate": 1.9503417964379754e-05, "loss": 0.8111, "step": 1670 }, { "epoch": 0.2557000765110941, "grad_norm": 2.4173485389188247, "learning_rate": 1.9502646441973933e-05, "loss": 0.8887, "step": 1671 }, { "epoch": 0.2558530986993114, "grad_norm": 2.3382379947820344, "learning_rate": 1.9501874335973422e-05, "loss": 0.848, "step": 1672 }, { "epoch": 0.25600612088752867, "grad_norm": 2.610146018950607, "learning_rate": 1.9501101646425633e-05, "loss": 0.8671, "step": 1673 }, { "epoch": 0.256159143075746, "grad_norm": 2.4976105687857166, "learning_rate": 1.9500328373378026e-05, "loss": 0.79, "step": 1674 }, { "epoch": 0.2563121652639633, "grad_norm": 2.4734090750179103, "learning_rate": 1.9499554516878088e-05, "loss": 0.8003, "step": 1675 }, { "epoch": 0.25646518745218055, "grad_norm": 2.481384562214497, "learning_rate": 1.949878007697334e-05, "loss": 0.8418, "step": 1676 }, { "epoch": 0.25661820964039783, "grad_norm": 2.0977882078690824, "learning_rate": 1.9498005053711354e-05, "loss": 0.819, "step": 1677 }, { "epoch": 0.25677123182861517, "grad_norm": 2.421363761561868, "learning_rate": 1.9497229447139717e-05, "loss": 0.9224, "step": 1678 }, { "epoch": 0.25692425401683244, "grad_norm": 2.5633269767032894, "learning_rate": 1.949645325730607e-05, "loss": 0.849, "step": 1679 }, { "epoch": 0.2570772762050497, "grad_norm": 2.8582041097741344, "learning_rate": 1.949567648425808e-05, "loss": 0.8003, "step": 1680 }, { "epoch": 0.257230298393267, "grad_norm": 2.6758874575609957, "learning_rate": 1.9494899128043442e-05, "loss": 0.8408, "step": 1681 }, { "epoch": 0.25738332058148433, "grad_norm": 2.5888294107853147, "learning_rate": 1.949412118870991e-05, "loss": 0.7712, "step": 1682 }, { "epoch": 0.2575363427697016, "grad_norm": 2.485349688173273, "learning_rate": 1.9493342666305254e-05, "loss": 0.7898, "step": 1683 }, { "epoch": 0.2576893649579189, "grad_norm": 2.4074664249571707, "learning_rate": 1.9492563560877285e-05, "loss": 0.9074, "step": 1684 }, { "epoch": 0.25784238714613616, "grad_norm": 3.006519080206942, "learning_rate": 1.949178387247385e-05, "loss": 0.9606, "step": 1685 }, { "epoch": 0.2579954093343535, "grad_norm": 2.9344864762687353, "learning_rate": 1.9491003601142842e-05, "loss": 0.7411, "step": 1686 }, { "epoch": 0.2581484315225708, "grad_norm": 2.160570396212931, "learning_rate": 1.949022274693217e-05, "loss": 0.6894, "step": 1687 }, { "epoch": 0.25830145371078805, "grad_norm": 2.438882169334196, "learning_rate": 1.9489441309889794e-05, "loss": 0.7715, "step": 1688 }, { "epoch": 0.25845447589900533, "grad_norm": 2.436285424122042, "learning_rate": 1.9488659290063702e-05, "loss": 0.7852, "step": 1689 }, { "epoch": 0.25860749808722266, "grad_norm": 2.854857000650002, "learning_rate": 1.9487876687501926e-05, "loss": 0.8264, "step": 1690 }, { "epoch": 0.25876052027543994, "grad_norm": 2.271191622502827, "learning_rate": 1.9487093502252526e-05, "loss": 0.7694, "step": 1691 }, { "epoch": 0.2589135424636572, "grad_norm": 2.489235162164264, "learning_rate": 1.94863097343636e-05, "loss": 0.7871, "step": 1692 }, { "epoch": 0.2590665646518745, "grad_norm": 3.0714855530371015, "learning_rate": 1.948552538388328e-05, "loss": 0.8953, "step": 1693 }, { "epoch": 0.25921958684009183, "grad_norm": 2.5084220890499007, "learning_rate": 1.9484740450859743e-05, "loss": 0.8391, "step": 1694 }, { "epoch": 0.2593726090283091, "grad_norm": 2.5910117597918116, "learning_rate": 1.9483954935341186e-05, "loss": 0.8472, "step": 1695 }, { "epoch": 0.2595256312165264, "grad_norm": 2.4874414300183982, "learning_rate": 1.9483168837375856e-05, "loss": 0.8618, "step": 1696 }, { "epoch": 0.25967865340474366, "grad_norm": 2.4390882665604536, "learning_rate": 1.9482382157012033e-05, "loss": 0.8633, "step": 1697 }, { "epoch": 0.259831675592961, "grad_norm": 2.7537562928055355, "learning_rate": 1.9481594894298023e-05, "loss": 0.8594, "step": 1698 }, { "epoch": 0.2599846977811783, "grad_norm": 2.3613754614350078, "learning_rate": 1.9480807049282177e-05, "loss": 0.8136, "step": 1699 }, { "epoch": 0.26013771996939555, "grad_norm": 2.611634039266382, "learning_rate": 1.9480018622012884e-05, "loss": 0.9738, "step": 1700 }, { "epoch": 0.26029074215761283, "grad_norm": 2.4645726484909, "learning_rate": 1.9479229612538558e-05, "loss": 0.8931, "step": 1701 }, { "epoch": 0.26044376434583016, "grad_norm": 2.4373875784863284, "learning_rate": 1.9478440020907662e-05, "loss": 0.8618, "step": 1702 }, { "epoch": 0.26059678653404744, "grad_norm": 2.642912343584658, "learning_rate": 1.9477649847168685e-05, "loss": 0.8661, "step": 1703 }, { "epoch": 0.2607498087222647, "grad_norm": 2.433110527065469, "learning_rate": 1.9476859091370153e-05, "loss": 0.7962, "step": 1704 }, { "epoch": 0.260902830910482, "grad_norm": 2.5788838749611966, "learning_rate": 1.947606775356063e-05, "loss": 0.9376, "step": 1705 }, { "epoch": 0.26105585309869933, "grad_norm": 2.3973604286815458, "learning_rate": 1.9475275833788714e-05, "loss": 0.8217, "step": 1706 }, { "epoch": 0.2612088752869166, "grad_norm": 2.669068275728912, "learning_rate": 1.9474483332103043e-05, "loss": 0.8883, "step": 1707 }, { "epoch": 0.2613618974751339, "grad_norm": 2.5071451612106883, "learning_rate": 1.947369024855229e-05, "loss": 0.8878, "step": 1708 }, { "epoch": 0.26151491966335116, "grad_norm": 2.2570325349738556, "learning_rate": 1.947289658318515e-05, "loss": 0.8426, "step": 1709 }, { "epoch": 0.2616679418515685, "grad_norm": 2.5071883489100015, "learning_rate": 1.947210233605038e-05, "loss": 0.8524, "step": 1710 }, { "epoch": 0.2618209640397858, "grad_norm": 2.4286658527705343, "learning_rate": 1.9471307507196746e-05, "loss": 0.9167, "step": 1711 }, { "epoch": 0.26197398622800305, "grad_norm": 2.5819637026712683, "learning_rate": 1.9470512096673065e-05, "loss": 0.7922, "step": 1712 }, { "epoch": 0.26212700841622033, "grad_norm": 2.4051224205801027, "learning_rate": 1.946971610452819e-05, "loss": 0.7694, "step": 1713 }, { "epoch": 0.26228003060443766, "grad_norm": 2.5521899267184667, "learning_rate": 1.9468919530811002e-05, "loss": 0.847, "step": 1714 }, { "epoch": 0.26243305279265494, "grad_norm": 2.4020801572092516, "learning_rate": 1.9468122375570425e-05, "loss": 0.8737, "step": 1715 }, { "epoch": 0.2625860749808722, "grad_norm": 2.3615195314704582, "learning_rate": 1.946732463885541e-05, "loss": 0.7862, "step": 1716 }, { "epoch": 0.2627390971690895, "grad_norm": 2.5988427829601415, "learning_rate": 1.9466526320714956e-05, "loss": 0.8121, "step": 1717 }, { "epoch": 0.26289211935730683, "grad_norm": 2.720083299441531, "learning_rate": 1.9465727421198086e-05, "loss": 0.9416, "step": 1718 }, { "epoch": 0.2630451415455241, "grad_norm": 2.3491570168515534, "learning_rate": 1.9464927940353865e-05, "loss": 0.8364, "step": 1719 }, { "epoch": 0.2631981637337414, "grad_norm": 2.3320233151056846, "learning_rate": 1.9464127878231393e-05, "loss": 0.9151, "step": 1720 }, { "epoch": 0.26335118592195866, "grad_norm": 2.4609771161448064, "learning_rate": 1.9463327234879805e-05, "loss": 0.7901, "step": 1721 }, { "epoch": 0.263504208110176, "grad_norm": 2.576087752470663, "learning_rate": 1.946252601034827e-05, "loss": 0.9416, "step": 1722 }, { "epoch": 0.2636572302983933, "grad_norm": 2.746683862451466, "learning_rate": 1.946172420468599e-05, "loss": 0.9327, "step": 1723 }, { "epoch": 0.26381025248661055, "grad_norm": 2.544995404935606, "learning_rate": 1.9460921817942217e-05, "loss": 0.7567, "step": 1724 }, { "epoch": 0.26396327467482783, "grad_norm": 2.2425437074057832, "learning_rate": 1.9460118850166223e-05, "loss": 0.6886, "step": 1725 }, { "epoch": 0.26411629686304516, "grad_norm": 2.4009536341104036, "learning_rate": 1.9459315301407323e-05, "loss": 0.9018, "step": 1726 }, { "epoch": 0.26426931905126244, "grad_norm": 2.610099232671235, "learning_rate": 1.9458511171714863e-05, "loss": 0.8207, "step": 1727 }, { "epoch": 0.2644223412394797, "grad_norm": 2.4320257408549635, "learning_rate": 1.9457706461138233e-05, "loss": 0.7446, "step": 1728 }, { "epoch": 0.264575363427697, "grad_norm": 2.5185379316749548, "learning_rate": 1.945690116972685e-05, "loss": 0.7479, "step": 1729 }, { "epoch": 0.26472838561591433, "grad_norm": 2.895728792841351, "learning_rate": 1.945609529753017e-05, "loss": 0.8769, "step": 1730 }, { "epoch": 0.2648814078041316, "grad_norm": 2.730992822149975, "learning_rate": 1.945528884459768e-05, "loss": 0.916, "step": 1731 }, { "epoch": 0.2650344299923489, "grad_norm": 2.693860601738405, "learning_rate": 1.945448181097892e-05, "loss": 0.8514, "step": 1732 }, { "epoch": 0.26518745218056616, "grad_norm": 2.6405405825364494, "learning_rate": 1.9453674196723445e-05, "loss": 0.9356, "step": 1733 }, { "epoch": 0.2653404743687835, "grad_norm": 2.5688295938831693, "learning_rate": 1.9452866001880852e-05, "loss": 0.9191, "step": 1734 }, { "epoch": 0.2654934965570008, "grad_norm": 2.499877747605039, "learning_rate": 1.945205722650078e-05, "loss": 0.888, "step": 1735 }, { "epoch": 0.26564651874521805, "grad_norm": 2.922492979194366, "learning_rate": 1.9451247870632898e-05, "loss": 0.8968, "step": 1736 }, { "epoch": 0.26579954093343533, "grad_norm": 2.5466573461677076, "learning_rate": 1.9450437934326906e-05, "loss": 0.7677, "step": 1737 }, { "epoch": 0.26595256312165266, "grad_norm": 2.5820858780258753, "learning_rate": 1.9449627417632554e-05, "loss": 0.8154, "step": 1738 }, { "epoch": 0.26610558530986994, "grad_norm": 2.2852352856657903, "learning_rate": 1.9448816320599615e-05, "loss": 0.8817, "step": 1739 }, { "epoch": 0.2662586074980872, "grad_norm": 2.473054849169737, "learning_rate": 1.94480046432779e-05, "loss": 0.8555, "step": 1740 }, { "epoch": 0.2664116296863045, "grad_norm": 2.761146652490966, "learning_rate": 1.944719238571726e-05, "loss": 0.8623, "step": 1741 }, { "epoch": 0.26656465187452183, "grad_norm": 2.687345946730486, "learning_rate": 1.944637954796758e-05, "loss": 0.7884, "step": 1742 }, { "epoch": 0.2667176740627391, "grad_norm": 2.6170414834070415, "learning_rate": 1.9445566130078774e-05, "loss": 0.823, "step": 1743 }, { "epoch": 0.2668706962509564, "grad_norm": 2.5065210386666315, "learning_rate": 1.94447521321008e-05, "loss": 0.8013, "step": 1744 }, { "epoch": 0.26702371843917366, "grad_norm": 2.569502254268044, "learning_rate": 1.9443937554083655e-05, "loss": 0.9016, "step": 1745 }, { "epoch": 0.267176740627391, "grad_norm": 2.318313240796993, "learning_rate": 1.9443122396077357e-05, "loss": 0.8254, "step": 1746 }, { "epoch": 0.2673297628156083, "grad_norm": 2.3525786375234063, "learning_rate": 1.9442306658131967e-05, "loss": 0.7866, "step": 1747 }, { "epoch": 0.26748278500382555, "grad_norm": 2.558329696352076, "learning_rate": 1.944149034029759e-05, "loss": 0.8594, "step": 1748 }, { "epoch": 0.26763580719204283, "grad_norm": 2.285331779879835, "learning_rate": 1.944067344262436e-05, "loss": 0.7182, "step": 1749 }, { "epoch": 0.26778882938026016, "grad_norm": 2.607640802866723, "learning_rate": 1.9439855965162436e-05, "loss": 0.7751, "step": 1750 }, { "epoch": 0.26794185156847744, "grad_norm": 2.412052455367587, "learning_rate": 1.9439037907962032e-05, "loss": 0.8451, "step": 1751 }, { "epoch": 0.2680948737566947, "grad_norm": 2.6293034239554456, "learning_rate": 1.943821927107338e-05, "loss": 0.9424, "step": 1752 }, { "epoch": 0.268247895944912, "grad_norm": 2.6107845855461695, "learning_rate": 1.9437400054546765e-05, "loss": 0.8322, "step": 1753 }, { "epoch": 0.26840091813312933, "grad_norm": 2.548893203671856, "learning_rate": 1.9436580258432488e-05, "loss": 0.7892, "step": 1754 }, { "epoch": 0.2685539403213466, "grad_norm": 3.0202000676385783, "learning_rate": 1.9435759882780905e-05, "loss": 0.9641, "step": 1755 }, { "epoch": 0.2687069625095639, "grad_norm": 2.5840683936343467, "learning_rate": 1.9434938927642393e-05, "loss": 0.8314, "step": 1756 }, { "epoch": 0.26885998469778116, "grad_norm": 2.754920380157699, "learning_rate": 1.9434117393067375e-05, "loss": 0.9488, "step": 1757 }, { "epoch": 0.2690130068859985, "grad_norm": 2.48864745158179, "learning_rate": 1.9433295279106296e-05, "loss": 0.8753, "step": 1758 }, { "epoch": 0.2691660290742158, "grad_norm": 2.3149579588067573, "learning_rate": 1.9432472585809657e-05, "loss": 0.8767, "step": 1759 }, { "epoch": 0.26931905126243305, "grad_norm": 2.6859581863399087, "learning_rate": 1.9431649313227972e-05, "loss": 0.8624, "step": 1760 }, { "epoch": 0.26947207345065033, "grad_norm": 2.2212850321280593, "learning_rate": 1.9430825461411805e-05, "loss": 0.761, "step": 1761 }, { "epoch": 0.26962509563886766, "grad_norm": 2.4027565272940654, "learning_rate": 1.9430001030411757e-05, "loss": 0.8739, "step": 1762 }, { "epoch": 0.26977811782708494, "grad_norm": 2.660172589499753, "learning_rate": 1.942917602027845e-05, "loss": 0.9251, "step": 1763 }, { "epoch": 0.2699311400153022, "grad_norm": 2.743206963419242, "learning_rate": 1.942835043106256e-05, "loss": 0.9153, "step": 1764 }, { "epoch": 0.2700841622035195, "grad_norm": 2.6989866804235145, "learning_rate": 1.9427524262814786e-05, "loss": 0.9009, "step": 1765 }, { "epoch": 0.27023718439173683, "grad_norm": 2.487307962781265, "learning_rate": 1.9426697515585865e-05, "loss": 0.7067, "step": 1766 }, { "epoch": 0.2703902065799541, "grad_norm": 2.4830826951639353, "learning_rate": 1.9425870189426573e-05, "loss": 0.8055, "step": 1767 }, { "epoch": 0.2705432287681714, "grad_norm": 2.5304574169611134, "learning_rate": 1.942504228438772e-05, "loss": 0.766, "step": 1768 }, { "epoch": 0.27069625095638866, "grad_norm": 2.328713565548941, "learning_rate": 1.9424213800520147e-05, "loss": 0.7621, "step": 1769 }, { "epoch": 0.270849273144606, "grad_norm": 2.6578397488472723, "learning_rate": 1.9423384737874738e-05, "loss": 0.8997, "step": 1770 }, { "epoch": 0.2710022953328233, "grad_norm": 2.406319383474901, "learning_rate": 1.9422555096502406e-05, "loss": 0.6949, "step": 1771 }, { "epoch": 0.27115531752104055, "grad_norm": 2.320190474995434, "learning_rate": 1.9421724876454108e-05, "loss": 0.7765, "step": 1772 }, { "epoch": 0.27130833970925783, "grad_norm": 2.255508796586933, "learning_rate": 1.9420894077780826e-05, "loss": 0.7989, "step": 1773 }, { "epoch": 0.2714613618974751, "grad_norm": 2.4808465555872967, "learning_rate": 1.942006270053358e-05, "loss": 0.7873, "step": 1774 }, { "epoch": 0.27161438408569244, "grad_norm": 2.6957590465125505, "learning_rate": 1.9419230744763437e-05, "loss": 0.9488, "step": 1775 }, { "epoch": 0.2717674062739097, "grad_norm": 2.6825223214394205, "learning_rate": 1.9418398210521486e-05, "loss": 0.8551, "step": 1776 }, { "epoch": 0.271920428462127, "grad_norm": 2.319812276713523, "learning_rate": 1.941756509785885e-05, "loss": 0.8255, "step": 1777 }, { "epoch": 0.2720734506503443, "grad_norm": 2.736308455119988, "learning_rate": 1.9416731406826704e-05, "loss": 0.784, "step": 1778 }, { "epoch": 0.2722264728385616, "grad_norm": 2.623518511491986, "learning_rate": 1.9415897137476243e-05, "loss": 0.8191, "step": 1779 }, { "epoch": 0.2723794950267789, "grad_norm": 2.3011853266463924, "learning_rate": 1.9415062289858702e-05, "loss": 0.7144, "step": 1780 }, { "epoch": 0.27253251721499616, "grad_norm": 2.820252509123847, "learning_rate": 1.941422686402536e-05, "loss": 0.8803, "step": 1781 }, { "epoch": 0.27268553940321344, "grad_norm": 2.3683267612707195, "learning_rate": 1.9413390860027512e-05, "loss": 0.7924, "step": 1782 }, { "epoch": 0.2728385615914308, "grad_norm": 2.489890268313438, "learning_rate": 1.9412554277916506e-05, "loss": 0.8228, "step": 1783 }, { "epoch": 0.27299158377964805, "grad_norm": 2.5950610129814966, "learning_rate": 1.941171711774372e-05, "loss": 0.8059, "step": 1784 }, { "epoch": 0.27314460596786533, "grad_norm": 2.4309315036464603, "learning_rate": 1.941087937956057e-05, "loss": 0.8338, "step": 1785 }, { "epoch": 0.2732976281560826, "grad_norm": 2.4619004282248484, "learning_rate": 1.94100410634185e-05, "loss": 0.775, "step": 1786 }, { "epoch": 0.27345065034429994, "grad_norm": 2.553665078049513, "learning_rate": 1.9409202169368994e-05, "loss": 0.7981, "step": 1787 }, { "epoch": 0.2736036725325172, "grad_norm": 2.7012115848050375, "learning_rate": 1.9408362697463576e-05, "loss": 0.7708, "step": 1788 }, { "epoch": 0.2737566947207345, "grad_norm": 2.548331990200666, "learning_rate": 1.94075226477538e-05, "loss": 0.8804, "step": 1789 }, { "epoch": 0.2739097169089518, "grad_norm": 2.348960462028775, "learning_rate": 1.9406682020291253e-05, "loss": 0.7993, "step": 1790 }, { "epoch": 0.2740627390971691, "grad_norm": 2.4769272628241517, "learning_rate": 1.9405840815127567e-05, "loss": 0.8696, "step": 1791 }, { "epoch": 0.2742157612853864, "grad_norm": 2.659296349245691, "learning_rate": 1.9404999032314397e-05, "loss": 0.8705, "step": 1792 }, { "epoch": 0.27436878347360366, "grad_norm": 2.5496822689710177, "learning_rate": 1.9404156671903443e-05, "loss": 0.7476, "step": 1793 }, { "epoch": 0.27452180566182094, "grad_norm": 2.1802122613902726, "learning_rate": 1.9403313733946442e-05, "loss": 0.7877, "step": 1794 }, { "epoch": 0.2746748278500383, "grad_norm": 2.636214632414026, "learning_rate": 1.9402470218495158e-05, "loss": 0.8912, "step": 1795 }, { "epoch": 0.27482785003825555, "grad_norm": 2.5169431177436996, "learning_rate": 1.9401626125601395e-05, "loss": 0.7936, "step": 1796 }, { "epoch": 0.27498087222647283, "grad_norm": 2.6274651789471486, "learning_rate": 1.940078145531699e-05, "loss": 0.8681, "step": 1797 }, { "epoch": 0.2751338944146901, "grad_norm": 2.6433366981390223, "learning_rate": 1.9399936207693826e-05, "loss": 0.7657, "step": 1798 }, { "epoch": 0.27528691660290744, "grad_norm": 2.3588536871322914, "learning_rate": 1.9399090382783802e-05, "loss": 0.708, "step": 1799 }, { "epoch": 0.2754399387911247, "grad_norm": 2.6128210317047076, "learning_rate": 1.9398243980638867e-05, "loss": 0.8084, "step": 1800 }, { "epoch": 0.275592960979342, "grad_norm": 2.384807803529249, "learning_rate": 1.9397397001311007e-05, "loss": 0.8958, "step": 1801 }, { "epoch": 0.27574598316755927, "grad_norm": 2.561729566518759, "learning_rate": 1.939654944485223e-05, "loss": 0.8511, "step": 1802 }, { "epoch": 0.2758990053557766, "grad_norm": 2.409093911820784, "learning_rate": 1.9395701311314594e-05, "loss": 0.7611, "step": 1803 }, { "epoch": 0.2760520275439939, "grad_norm": 2.369205457750817, "learning_rate": 1.9394852600750184e-05, "loss": 0.7975, "step": 1804 }, { "epoch": 0.27620504973221116, "grad_norm": 2.501377346696639, "learning_rate": 1.9394003313211126e-05, "loss": 0.7295, "step": 1805 }, { "epoch": 0.27635807192042844, "grad_norm": 2.419062632867766, "learning_rate": 1.9393153448749572e-05, "loss": 0.8551, "step": 1806 }, { "epoch": 0.27651109410864577, "grad_norm": 2.4102900990912928, "learning_rate": 1.9392303007417717e-05, "loss": 0.9076, "step": 1807 }, { "epoch": 0.27666411629686305, "grad_norm": 2.2795357293501457, "learning_rate": 1.9391451989267795e-05, "loss": 0.7942, "step": 1808 }, { "epoch": 0.2768171384850803, "grad_norm": 2.650801351754482, "learning_rate": 1.9390600394352066e-05, "loss": 0.8162, "step": 1809 }, { "epoch": 0.2769701606732976, "grad_norm": 2.389261821947559, "learning_rate": 1.9389748222722827e-05, "loss": 0.7481, "step": 1810 }, { "epoch": 0.27712318286151494, "grad_norm": 2.1568992212804363, "learning_rate": 1.938889547443242e-05, "loss": 0.7289, "step": 1811 }, { "epoch": 0.2772762050497322, "grad_norm": 2.5354031323568242, "learning_rate": 1.9388042149533214e-05, "loss": 0.8771, "step": 1812 }, { "epoch": 0.2774292272379495, "grad_norm": 2.7834820672844067, "learning_rate": 1.938718824807761e-05, "loss": 0.8314, "step": 1813 }, { "epoch": 0.27758224942616677, "grad_norm": 2.342286831367304, "learning_rate": 1.9386333770118054e-05, "loss": 0.7752, "step": 1814 }, { "epoch": 0.2777352716143841, "grad_norm": 2.431265873833426, "learning_rate": 1.9385478715707024e-05, "loss": 0.8154, "step": 1815 }, { "epoch": 0.2778882938026014, "grad_norm": 3.472642793672234, "learning_rate": 1.9384623084897025e-05, "loss": 0.8133, "step": 1816 }, { "epoch": 0.27804131599081866, "grad_norm": 2.4898768514747287, "learning_rate": 1.938376687774061e-05, "loss": 0.9039, "step": 1817 }, { "epoch": 0.27819433817903594, "grad_norm": 2.453143034799608, "learning_rate": 1.9382910094290367e-05, "loss": 0.7147, "step": 1818 }, { "epoch": 0.27834736036725327, "grad_norm": 2.3760737629972275, "learning_rate": 1.9382052734598902e-05, "loss": 0.8291, "step": 1819 }, { "epoch": 0.27850038255547055, "grad_norm": 2.9747112957484365, "learning_rate": 1.938119479871888e-05, "loss": 0.8505, "step": 1820 }, { "epoch": 0.2786534047436878, "grad_norm": 2.6897445660961323, "learning_rate": 1.9380336286702987e-05, "loss": 0.8376, "step": 1821 }, { "epoch": 0.2788064269319051, "grad_norm": 2.9716563210946103, "learning_rate": 1.9379477198603944e-05, "loss": 0.9802, "step": 1822 }, { "epoch": 0.27895944912012244, "grad_norm": 2.508289472632622, "learning_rate": 1.9378617534474514e-05, "loss": 0.9134, "step": 1823 }, { "epoch": 0.2791124713083397, "grad_norm": 2.458112494711284, "learning_rate": 1.937775729436749e-05, "loss": 0.775, "step": 1824 }, { "epoch": 0.279265493496557, "grad_norm": 2.334406138363872, "learning_rate": 1.937689647833571e-05, "loss": 0.82, "step": 1825 }, { "epoch": 0.27941851568477427, "grad_norm": 2.4747860463869005, "learning_rate": 1.937603508643203e-05, "loss": 0.8692, "step": 1826 }, { "epoch": 0.2795715378729916, "grad_norm": 2.477096190126597, "learning_rate": 1.9375173118709357e-05, "loss": 0.8041, "step": 1827 }, { "epoch": 0.2797245600612089, "grad_norm": 2.2376490321825253, "learning_rate": 1.937431057522063e-05, "loss": 0.8329, "step": 1828 }, { "epoch": 0.27987758224942616, "grad_norm": 2.7371937042935217, "learning_rate": 1.9373447456018814e-05, "loss": 0.8114, "step": 1829 }, { "epoch": 0.28003060443764344, "grad_norm": 2.4625318159596223, "learning_rate": 1.9372583761156924e-05, "loss": 0.8698, "step": 1830 }, { "epoch": 0.28018362662586077, "grad_norm": 2.572144753716217, "learning_rate": 1.9371719490687994e-05, "loss": 0.7726, "step": 1831 }, { "epoch": 0.28033664881407805, "grad_norm": 3.1187471865773766, "learning_rate": 1.9370854644665113e-05, "loss": 0.907, "step": 1832 }, { "epoch": 0.2804896710022953, "grad_norm": 2.8014006276479577, "learning_rate": 1.9369989223141386e-05, "loss": 0.879, "step": 1833 }, { "epoch": 0.2806426931905126, "grad_norm": 2.3574224893672655, "learning_rate": 1.9369123226169967e-05, "loss": 0.7876, "step": 1834 }, { "epoch": 0.28079571537872994, "grad_norm": 2.4041536999686706, "learning_rate": 1.936825665380404e-05, "loss": 0.7086, "step": 1835 }, { "epoch": 0.2809487375669472, "grad_norm": 2.629825929620783, "learning_rate": 1.936738950609682e-05, "loss": 0.8817, "step": 1836 }, { "epoch": 0.2811017597551645, "grad_norm": 2.501772945163813, "learning_rate": 1.9366521783101566e-05, "loss": 0.8242, "step": 1837 }, { "epoch": 0.28125478194338177, "grad_norm": 2.5759467678625065, "learning_rate": 1.9365653484871567e-05, "loss": 0.8058, "step": 1838 }, { "epoch": 0.2814078041315991, "grad_norm": 2.506404915094637, "learning_rate": 1.936478461146015e-05, "loss": 0.7182, "step": 1839 }, { "epoch": 0.2815608263198164, "grad_norm": 2.654103256153006, "learning_rate": 1.9363915162920676e-05, "loss": 0.8209, "step": 1840 }, { "epoch": 0.28171384850803366, "grad_norm": 2.352377734931323, "learning_rate": 1.9363045139306536e-05, "loss": 0.7696, "step": 1841 }, { "epoch": 0.28186687069625094, "grad_norm": 2.4722586829533966, "learning_rate": 1.9362174540671167e-05, "loss": 0.7529, "step": 1842 }, { "epoch": 0.28201989288446827, "grad_norm": 2.573994784803902, "learning_rate": 1.9361303367068035e-05, "loss": 0.7462, "step": 1843 }, { "epoch": 0.28217291507268555, "grad_norm": 2.2498326088485974, "learning_rate": 1.9360431618550645e-05, "loss": 0.7454, "step": 1844 }, { "epoch": 0.2823259372609028, "grad_norm": 2.289597736923765, "learning_rate": 1.9359559295172525e-05, "loss": 0.8085, "step": 1845 }, { "epoch": 0.2824789594491201, "grad_norm": 2.7762561379472954, "learning_rate": 1.9358686396987256e-05, "loss": 0.7927, "step": 1846 }, { "epoch": 0.28263198163733744, "grad_norm": 2.6661352318965044, "learning_rate": 1.9357812924048445e-05, "loss": 0.841, "step": 1847 }, { "epoch": 0.2827850038255547, "grad_norm": 2.5975812171329964, "learning_rate": 1.9356938876409735e-05, "loss": 0.8676, "step": 1848 }, { "epoch": 0.282938026013772, "grad_norm": 2.125735235990031, "learning_rate": 1.9356064254124803e-05, "loss": 0.7212, "step": 1849 }, { "epoch": 0.28309104820198927, "grad_norm": 2.7059435749550556, "learning_rate": 1.9355189057247363e-05, "loss": 0.7979, "step": 1850 }, { "epoch": 0.2832440703902066, "grad_norm": 2.4770619838430448, "learning_rate": 1.9354313285831167e-05, "loss": 0.8406, "step": 1851 }, { "epoch": 0.2833970925784239, "grad_norm": 2.695527673356984, "learning_rate": 1.9353436939929997e-05, "loss": 0.7533, "step": 1852 }, { "epoch": 0.28355011476664116, "grad_norm": 3.0149128657035362, "learning_rate": 1.9352560019597675e-05, "loss": 0.8231, "step": 1853 }, { "epoch": 0.28370313695485844, "grad_norm": 2.6130169564784893, "learning_rate": 1.9351682524888052e-05, "loss": 0.7279, "step": 1854 }, { "epoch": 0.28385615914307577, "grad_norm": 2.2246128982518347, "learning_rate": 1.935080445585502e-05, "loss": 0.8827, "step": 1855 }, { "epoch": 0.28400918133129305, "grad_norm": 2.21223342955157, "learning_rate": 1.934992581255251e-05, "loss": 0.727, "step": 1856 }, { "epoch": 0.2841622035195103, "grad_norm": 2.579774600746488, "learning_rate": 1.934904659503448e-05, "loss": 0.8197, "step": 1857 }, { "epoch": 0.2843152257077276, "grad_norm": 2.485921849818407, "learning_rate": 1.9348166803354923e-05, "loss": 0.77, "step": 1858 }, { "epoch": 0.28446824789594494, "grad_norm": 2.407776330565242, "learning_rate": 1.9347286437567868e-05, "loss": 0.8485, "step": 1859 }, { "epoch": 0.2846212700841622, "grad_norm": 2.462948009991273, "learning_rate": 1.934640549772739e-05, "loss": 0.748, "step": 1860 }, { "epoch": 0.2847742922723795, "grad_norm": 2.5536317903542, "learning_rate": 1.9345523983887585e-05, "loss": 0.8609, "step": 1861 }, { "epoch": 0.28492731446059677, "grad_norm": 2.4342429583754637, "learning_rate": 1.9344641896102596e-05, "loss": 0.8274, "step": 1862 }, { "epoch": 0.2850803366488141, "grad_norm": 2.629328839248284, "learning_rate": 1.934375923442659e-05, "loss": 0.7806, "step": 1863 }, { "epoch": 0.2852333588370314, "grad_norm": 2.825115788194663, "learning_rate": 1.9342875998913774e-05, "loss": 0.843, "step": 1864 }, { "epoch": 0.28538638102524866, "grad_norm": 2.432079122950358, "learning_rate": 1.93419921896184e-05, "loss": 0.6456, "step": 1865 }, { "epoch": 0.28553940321346594, "grad_norm": 2.437685754761619, "learning_rate": 1.9341107806594733e-05, "loss": 0.8309, "step": 1866 }, { "epoch": 0.28569242540168327, "grad_norm": 2.551675385381315, "learning_rate": 1.9340222849897096e-05, "loss": 0.8865, "step": 1867 }, { "epoch": 0.28584544758990055, "grad_norm": 2.508467686617371, "learning_rate": 1.9339337319579833e-05, "loss": 0.7803, "step": 1868 }, { "epoch": 0.2859984697781178, "grad_norm": 2.5126960561564413, "learning_rate": 1.933845121569733e-05, "loss": 0.7849, "step": 1869 }, { "epoch": 0.2861514919663351, "grad_norm": 2.613958666791803, "learning_rate": 1.9337564538304004e-05, "loss": 0.8853, "step": 1870 }, { "epoch": 0.28630451415455244, "grad_norm": 2.5244937795138376, "learning_rate": 1.9336677287454316e-05, "loss": 0.8044, "step": 1871 }, { "epoch": 0.2864575363427697, "grad_norm": 2.565342492613703, "learning_rate": 1.9335789463202744e-05, "loss": 0.7959, "step": 1872 }, { "epoch": 0.286610558530987, "grad_norm": 2.5403185668043773, "learning_rate": 1.9334901065603823e-05, "loss": 0.7555, "step": 1873 }, { "epoch": 0.28676358071920427, "grad_norm": 2.4550949680199, "learning_rate": 1.9334012094712108e-05, "loss": 0.7674, "step": 1874 }, { "epoch": 0.2869166029074216, "grad_norm": 2.3284267657849793, "learning_rate": 1.9333122550582197e-05, "loss": 0.7756, "step": 1875 }, { "epoch": 0.2870696250956389, "grad_norm": 2.6275948038792323, "learning_rate": 1.9332232433268718e-05, "loss": 0.7993, "step": 1876 }, { "epoch": 0.28722264728385616, "grad_norm": 2.568517017557821, "learning_rate": 1.9331341742826337e-05, "loss": 0.7721, "step": 1877 }, { "epoch": 0.28737566947207344, "grad_norm": 2.334517381546241, "learning_rate": 1.933045047930976e-05, "loss": 0.8133, "step": 1878 }, { "epoch": 0.28752869166029077, "grad_norm": 2.3005139977871942, "learning_rate": 1.932955864277371e-05, "loss": 0.8128, "step": 1879 }, { "epoch": 0.28768171384850805, "grad_norm": 2.7207446187801017, "learning_rate": 1.932866623327297e-05, "loss": 0.8604, "step": 1880 }, { "epoch": 0.2878347360367253, "grad_norm": 2.492245625318649, "learning_rate": 1.9327773250862344e-05, "loss": 0.845, "step": 1881 }, { "epoch": 0.2879877582249426, "grad_norm": 2.467518676739865, "learning_rate": 1.932687969559667e-05, "loss": 0.9091, "step": 1882 }, { "epoch": 0.2881407804131599, "grad_norm": 2.538560762578847, "learning_rate": 1.9325985567530825e-05, "loss": 0.7151, "step": 1883 }, { "epoch": 0.2882938026013772, "grad_norm": 2.3038764537536305, "learning_rate": 1.9325090866719726e-05, "loss": 0.8092, "step": 1884 }, { "epoch": 0.2884468247895945, "grad_norm": 2.703874276713587, "learning_rate": 1.9324195593218315e-05, "loss": 0.7405, "step": 1885 }, { "epoch": 0.28859984697781177, "grad_norm": 2.2950766344341047, "learning_rate": 1.932329974708158e-05, "loss": 0.6749, "step": 1886 }, { "epoch": 0.28875286916602905, "grad_norm": 2.4093547511917826, "learning_rate": 1.932240332836453e-05, "loss": 0.9128, "step": 1887 }, { "epoch": 0.2889058913542464, "grad_norm": 2.743832608962814, "learning_rate": 1.9321506337122224e-05, "loss": 0.8467, "step": 1888 }, { "epoch": 0.28905891354246366, "grad_norm": 2.418044806593421, "learning_rate": 1.932060877340975e-05, "loss": 0.7427, "step": 1889 }, { "epoch": 0.28921193573068094, "grad_norm": 2.4645967587420987, "learning_rate": 1.9319710637282227e-05, "loss": 0.8417, "step": 1890 }, { "epoch": 0.2893649579188982, "grad_norm": 2.6800041375280763, "learning_rate": 1.9318811928794817e-05, "loss": 0.8604, "step": 1891 }, { "epoch": 0.28951798010711555, "grad_norm": 2.8142534613435033, "learning_rate": 1.9317912648002708e-05, "loss": 0.8685, "step": 1892 }, { "epoch": 0.2896710022953328, "grad_norm": 2.517363713662083, "learning_rate": 1.931701279496113e-05, "loss": 0.8123, "step": 1893 }, { "epoch": 0.2898240244835501, "grad_norm": 2.559876231147071, "learning_rate": 1.9316112369725354e-05, "loss": 0.8617, "step": 1894 }, { "epoch": 0.2899770466717674, "grad_norm": 2.247438925725813, "learning_rate": 1.9315211372350667e-05, "loss": 0.8346, "step": 1895 }, { "epoch": 0.2901300688599847, "grad_norm": 2.4545703617872365, "learning_rate": 1.9314309802892407e-05, "loss": 0.7889, "step": 1896 }, { "epoch": 0.290283091048202, "grad_norm": 2.3768578395555835, "learning_rate": 1.931340766140595e-05, "loss": 0.8748, "step": 1897 }, { "epoch": 0.29043611323641927, "grad_norm": 2.429358667095976, "learning_rate": 1.931250494794669e-05, "loss": 0.8916, "step": 1898 }, { "epoch": 0.29058913542463655, "grad_norm": 2.494900204607473, "learning_rate": 1.9311601662570072e-05, "loss": 0.8617, "step": 1899 }, { "epoch": 0.2907421576128539, "grad_norm": 2.4468745440891295, "learning_rate": 1.931069780533157e-05, "loss": 0.7456, "step": 1900 }, { "epoch": 0.29089517980107116, "grad_norm": 2.736762375685349, "learning_rate": 1.930979337628669e-05, "loss": 0.7943, "step": 1901 }, { "epoch": 0.29104820198928844, "grad_norm": 2.702694904834764, "learning_rate": 1.930888837549098e-05, "loss": 0.8252, "step": 1902 }, { "epoch": 0.2912012241775057, "grad_norm": 2.527780804908085, "learning_rate": 1.9307982803000017e-05, "loss": 0.7839, "step": 1903 }, { "epoch": 0.29135424636572305, "grad_norm": 2.579662212769781, "learning_rate": 1.9307076658869417e-05, "loss": 0.8185, "step": 1904 }, { "epoch": 0.2915072685539403, "grad_norm": 2.3463765755521186, "learning_rate": 1.9306169943154832e-05, "loss": 0.7127, "step": 1905 }, { "epoch": 0.2916602907421576, "grad_norm": 2.5764100564188106, "learning_rate": 1.9305262655911946e-05, "loss": 0.8555, "step": 1906 }, { "epoch": 0.2918133129303749, "grad_norm": 2.7225480761530623, "learning_rate": 1.9304354797196475e-05, "loss": 0.8489, "step": 1907 }, { "epoch": 0.2919663351185922, "grad_norm": 2.9225439859512687, "learning_rate": 1.9303446367064175e-05, "loss": 0.8204, "step": 1908 }, { "epoch": 0.2921193573068095, "grad_norm": 2.7728976761121276, "learning_rate": 1.9302537365570843e-05, "loss": 0.8533, "step": 1909 }, { "epoch": 0.29227237949502677, "grad_norm": 2.788291953812961, "learning_rate": 1.9301627792772297e-05, "loss": 0.9378, "step": 1910 }, { "epoch": 0.29242540168324405, "grad_norm": 2.4399042097338235, "learning_rate": 1.93007176487244e-05, "loss": 0.7785, "step": 1911 }, { "epoch": 0.2925784238714614, "grad_norm": 2.428125304895574, "learning_rate": 1.929980693348305e-05, "loss": 0.7808, "step": 1912 }, { "epoch": 0.29273144605967866, "grad_norm": 2.472689699702656, "learning_rate": 1.929889564710417e-05, "loss": 0.8331, "step": 1913 }, { "epoch": 0.29288446824789593, "grad_norm": 2.5262689416061037, "learning_rate": 1.9297983789643735e-05, "loss": 0.8749, "step": 1914 }, { "epoch": 0.2930374904361132, "grad_norm": 2.638386784656946, "learning_rate": 1.929707136115774e-05, "loss": 0.919, "step": 1915 }, { "epoch": 0.29319051262433055, "grad_norm": 2.4550326677156344, "learning_rate": 1.9296158361702223e-05, "loss": 0.7279, "step": 1916 }, { "epoch": 0.2933435348125478, "grad_norm": 2.643293435117193, "learning_rate": 1.9295244791333255e-05, "loss": 0.8779, "step": 1917 }, { "epoch": 0.2934965570007651, "grad_norm": 2.5727735075626557, "learning_rate": 1.929433065010694e-05, "loss": 0.7984, "step": 1918 }, { "epoch": 0.2936495791889824, "grad_norm": 2.434329291686872, "learning_rate": 1.929341593807942e-05, "loss": 0.858, "step": 1919 }, { "epoch": 0.2938026013771997, "grad_norm": 2.4388397392820393, "learning_rate": 1.9292500655306872e-05, "loss": 0.8588, "step": 1920 }, { "epoch": 0.293955623565417, "grad_norm": 2.469418880655886, "learning_rate": 1.9291584801845508e-05, "loss": 0.8674, "step": 1921 }, { "epoch": 0.29410864575363427, "grad_norm": 2.300386835534169, "learning_rate": 1.929066837775157e-05, "loss": 0.8688, "step": 1922 }, { "epoch": 0.29426166794185155, "grad_norm": 2.317847305611336, "learning_rate": 1.9289751383081342e-05, "loss": 0.7381, "step": 1923 }, { "epoch": 0.2944146901300689, "grad_norm": 2.4895403782250067, "learning_rate": 1.928883381789114e-05, "loss": 0.9119, "step": 1924 }, { "epoch": 0.29456771231828616, "grad_norm": 2.56795549530539, "learning_rate": 1.9287915682237314e-05, "loss": 0.7846, "step": 1925 }, { "epoch": 0.29472073450650343, "grad_norm": 2.549866674572653, "learning_rate": 1.928699697617625e-05, "loss": 0.7652, "step": 1926 }, { "epoch": 0.2948737566947207, "grad_norm": 2.4390351626614057, "learning_rate": 1.9286077699764376e-05, "loss": 0.7653, "step": 1927 }, { "epoch": 0.29502677888293805, "grad_norm": 2.4418868117867394, "learning_rate": 1.9285157853058142e-05, "loss": 0.8644, "step": 1928 }, { "epoch": 0.2951798010711553, "grad_norm": 2.502305903557577, "learning_rate": 1.9284237436114038e-05, "loss": 0.9264, "step": 1929 }, { "epoch": 0.2953328232593726, "grad_norm": 2.2963754527727933, "learning_rate": 1.9283316448988593e-05, "loss": 0.7938, "step": 1930 }, { "epoch": 0.2954858454475899, "grad_norm": 3.0614462809673526, "learning_rate": 1.928239489173837e-05, "loss": 0.8869, "step": 1931 }, { "epoch": 0.2956388676358072, "grad_norm": 2.4080850709017385, "learning_rate": 1.9281472764419962e-05, "loss": 0.8448, "step": 1932 }, { "epoch": 0.2957918898240245, "grad_norm": 2.5403826284863418, "learning_rate": 1.9280550067090003e-05, "loss": 0.7956, "step": 1933 }, { "epoch": 0.29594491201224177, "grad_norm": 2.2358612452947093, "learning_rate": 1.927962679980516e-05, "loss": 0.7314, "step": 1934 }, { "epoch": 0.29609793420045905, "grad_norm": 2.43089626487857, "learning_rate": 1.927870296262213e-05, "loss": 0.8507, "step": 1935 }, { "epoch": 0.2962509563886764, "grad_norm": 2.455578482886986, "learning_rate": 1.927777855559766e-05, "loss": 0.8509, "step": 1936 }, { "epoch": 0.29640397857689366, "grad_norm": 2.564323058941907, "learning_rate": 1.9276853578788502e-05, "loss": 0.9223, "step": 1937 }, { "epoch": 0.29655700076511093, "grad_norm": 2.7623433756645994, "learning_rate": 1.9275928032251484e-05, "loss": 0.9499, "step": 1938 }, { "epoch": 0.2967100229533282, "grad_norm": 2.3216848528978558, "learning_rate": 1.9275001916043436e-05, "loss": 0.7305, "step": 1939 }, { "epoch": 0.29686304514154555, "grad_norm": 2.388246616293578, "learning_rate": 1.927407523022123e-05, "loss": 0.928, "step": 1940 }, { "epoch": 0.2970160673297628, "grad_norm": 2.3536391454598906, "learning_rate": 1.927314797484179e-05, "loss": 0.7234, "step": 1941 }, { "epoch": 0.2971690895179801, "grad_norm": 2.424189518386324, "learning_rate": 1.9272220149962057e-05, "loss": 0.8198, "step": 1942 }, { "epoch": 0.2973221117061974, "grad_norm": 2.763169194414181, "learning_rate": 1.927129175563901e-05, "loss": 0.8644, "step": 1943 }, { "epoch": 0.2974751338944147, "grad_norm": 2.4385903706286274, "learning_rate": 1.9270362791929663e-05, "loss": 0.7624, "step": 1944 }, { "epoch": 0.297628156082632, "grad_norm": 2.3830375578538048, "learning_rate": 1.9269433258891076e-05, "loss": 0.785, "step": 1945 }, { "epoch": 0.29778117827084927, "grad_norm": 3.284351176237654, "learning_rate": 1.926850315658033e-05, "loss": 0.9291, "step": 1946 }, { "epoch": 0.29793420045906654, "grad_norm": 2.3165311177119188, "learning_rate": 1.9267572485054544e-05, "loss": 0.8254, "step": 1947 }, { "epoch": 0.2980872226472839, "grad_norm": 2.4524168762163483, "learning_rate": 1.926664124437088e-05, "loss": 0.7993, "step": 1948 }, { "epoch": 0.29824024483550116, "grad_norm": 2.6058768862175388, "learning_rate": 1.9265709434586522e-05, "loss": 0.816, "step": 1949 }, { "epoch": 0.29839326702371843, "grad_norm": 2.243298273276326, "learning_rate": 1.9264777055758704e-05, "loss": 0.7164, "step": 1950 }, { "epoch": 0.2985462892119357, "grad_norm": 2.62737281453155, "learning_rate": 1.926384410794468e-05, "loss": 0.9408, "step": 1951 }, { "epoch": 0.29869931140015304, "grad_norm": 2.661164387856838, "learning_rate": 1.9262910591201752e-05, "loss": 0.8959, "step": 1952 }, { "epoch": 0.2988523335883703, "grad_norm": 2.463256788156899, "learning_rate": 1.9261976505587245e-05, "loss": 0.7347, "step": 1953 }, { "epoch": 0.2990053557765876, "grad_norm": 2.696961971747593, "learning_rate": 1.9261041851158526e-05, "loss": 0.8956, "step": 1954 }, { "epoch": 0.2991583779648049, "grad_norm": 2.3188961750528403, "learning_rate": 1.9260106627973e-05, "loss": 0.8279, "step": 1955 }, { "epoch": 0.2993114001530222, "grad_norm": 2.4269478964045117, "learning_rate": 1.92591708360881e-05, "loss": 0.7931, "step": 1956 }, { "epoch": 0.2994644223412395, "grad_norm": 2.48052465786968, "learning_rate": 1.92582344755613e-05, "loss": 0.8876, "step": 1957 }, { "epoch": 0.29961744452945677, "grad_norm": 2.309190634912905, "learning_rate": 1.9257297546450097e-05, "loss": 0.8796, "step": 1958 }, { "epoch": 0.29977046671767404, "grad_norm": 2.5168712148937717, "learning_rate": 1.925636004881204e-05, "loss": 0.9515, "step": 1959 }, { "epoch": 0.2999234889058914, "grad_norm": 2.3043960080089723, "learning_rate": 1.92554219827047e-05, "loss": 0.8175, "step": 1960 }, { "epoch": 0.30007651109410866, "grad_norm": 2.5426296255764176, "learning_rate": 1.925448334818569e-05, "loss": 0.7521, "step": 1961 }, { "epoch": 0.30022953328232593, "grad_norm": 2.414666262662447, "learning_rate": 1.9253544145312654e-05, "loss": 0.9249, "step": 1962 }, { "epoch": 0.3003825554705432, "grad_norm": 2.5867144779294184, "learning_rate": 1.925260437414327e-05, "loss": 0.9349, "step": 1963 }, { "epoch": 0.30053557765876054, "grad_norm": 2.473431576610012, "learning_rate": 1.9251664034735258e-05, "loss": 0.8798, "step": 1964 }, { "epoch": 0.3006885998469778, "grad_norm": 2.1385680671542278, "learning_rate": 1.9250723127146362e-05, "loss": 0.7429, "step": 1965 }, { "epoch": 0.3008416220351951, "grad_norm": 2.5981076958560956, "learning_rate": 1.9249781651434372e-05, "loss": 0.7934, "step": 1966 }, { "epoch": 0.3009946442234124, "grad_norm": 2.455515129458916, "learning_rate": 1.92488396076571e-05, "loss": 0.905, "step": 1967 }, { "epoch": 0.3011476664116297, "grad_norm": 2.8389366912830303, "learning_rate": 1.9247896995872413e-05, "loss": 0.843, "step": 1968 }, { "epoch": 0.301300688599847, "grad_norm": 2.420967448885132, "learning_rate": 1.924695381613819e-05, "loss": 0.8977, "step": 1969 }, { "epoch": 0.30145371078806427, "grad_norm": 2.2695379545380314, "learning_rate": 1.924601006851236e-05, "loss": 0.6859, "step": 1970 }, { "epoch": 0.30160673297628154, "grad_norm": 2.2636926091187144, "learning_rate": 1.924506575305288e-05, "loss": 0.7773, "step": 1971 }, { "epoch": 0.3017597551644989, "grad_norm": 2.7898273917009755, "learning_rate": 1.9244120869817746e-05, "loss": 0.8155, "step": 1972 }, { "epoch": 0.30191277735271616, "grad_norm": 2.4340034177552856, "learning_rate": 1.924317541886499e-05, "loss": 0.8266, "step": 1973 }, { "epoch": 0.30206579954093343, "grad_norm": 2.2766416109438583, "learning_rate": 1.9242229400252666e-05, "loss": 0.8102, "step": 1974 }, { "epoch": 0.3022188217291507, "grad_norm": 2.5561178004217955, "learning_rate": 1.924128281403888e-05, "loss": 0.8036, "step": 1975 }, { "epoch": 0.30237184391736804, "grad_norm": 2.379553828425003, "learning_rate": 1.9240335660281764e-05, "loss": 0.7768, "step": 1976 }, { "epoch": 0.3025248661055853, "grad_norm": 2.5252891466757292, "learning_rate": 1.923938793903949e-05, "loss": 0.7895, "step": 1977 }, { "epoch": 0.3026778882938026, "grad_norm": 2.2715040776578252, "learning_rate": 1.9238439650370256e-05, "loss": 0.8965, "step": 1978 }, { "epoch": 0.3028309104820199, "grad_norm": 2.465898560897087, "learning_rate": 1.92374907943323e-05, "loss": 0.7972, "step": 1979 }, { "epoch": 0.3029839326702372, "grad_norm": 2.628192379084707, "learning_rate": 1.92365413709839e-05, "loss": 0.8772, "step": 1980 }, { "epoch": 0.3031369548584545, "grad_norm": 2.59756485098828, "learning_rate": 1.9235591380383362e-05, "loss": 0.8435, "step": 1981 }, { "epoch": 0.30328997704667177, "grad_norm": 2.3284828288876116, "learning_rate": 1.9234640822589023e-05, "loss": 0.7544, "step": 1982 }, { "epoch": 0.30344299923488904, "grad_norm": 2.603774040163367, "learning_rate": 1.9233689697659268e-05, "loss": 0.8142, "step": 1983 }, { "epoch": 0.3035960214231064, "grad_norm": 2.4923640409532393, "learning_rate": 1.923273800565251e-05, "loss": 0.9354, "step": 1984 }, { "epoch": 0.30374904361132365, "grad_norm": 2.6499802758692765, "learning_rate": 1.9231785746627186e-05, "loss": 0.8094, "step": 1985 }, { "epoch": 0.30390206579954093, "grad_norm": 2.2595162035840697, "learning_rate": 1.9230832920641784e-05, "loss": 0.8003, "step": 1986 }, { "epoch": 0.3040550879877582, "grad_norm": 2.4192559742903246, "learning_rate": 1.922987952775482e-05, "loss": 0.804, "step": 1987 }, { "epoch": 0.30420811017597554, "grad_norm": 2.316981550914932, "learning_rate": 1.9228925568024855e-05, "loss": 0.754, "step": 1988 }, { "epoch": 0.3043611323641928, "grad_norm": 7.151782661755483, "learning_rate": 1.9227971041510463e-05, "loss": 0.8844, "step": 1989 }, { "epoch": 0.3045141545524101, "grad_norm": 2.734107657629692, "learning_rate": 1.9227015948270266e-05, "loss": 0.7944, "step": 1990 }, { "epoch": 0.3046671767406274, "grad_norm": 2.529668625783798, "learning_rate": 1.9226060288362927e-05, "loss": 0.8836, "step": 1991 }, { "epoch": 0.3048201989288447, "grad_norm": 2.546234327275014, "learning_rate": 1.922510406184713e-05, "loss": 0.7898, "step": 1992 }, { "epoch": 0.304973221117062, "grad_norm": 2.395162331895941, "learning_rate": 1.922414726878161e-05, "loss": 0.7806, "step": 1993 }, { "epoch": 0.30512624330527927, "grad_norm": 2.55658252083043, "learning_rate": 1.9223189909225114e-05, "loss": 0.8855, "step": 1994 }, { "epoch": 0.30527926549349654, "grad_norm": 2.4501674002626976, "learning_rate": 1.9222231983236447e-05, "loss": 0.8802, "step": 1995 }, { "epoch": 0.3054322876817138, "grad_norm": 2.4889603134355314, "learning_rate": 1.9221273490874436e-05, "loss": 0.7977, "step": 1996 }, { "epoch": 0.30558530986993115, "grad_norm": 2.3944327612655725, "learning_rate": 1.922031443219795e-05, "loss": 0.832, "step": 1997 }, { "epoch": 0.30573833205814843, "grad_norm": 2.995473969176262, "learning_rate": 1.921935480726588e-05, "loss": 0.7878, "step": 1998 }, { "epoch": 0.3058913542463657, "grad_norm": 2.7168338445829474, "learning_rate": 1.9218394616137166e-05, "loss": 0.8424, "step": 1999 }, { "epoch": 0.306044376434583, "grad_norm": 2.5441476948697854, "learning_rate": 1.921743385887078e-05, "loss": 0.8708, "step": 2000 }, { "epoch": 0.3061973986228003, "grad_norm": 2.4494392535414096, "learning_rate": 1.9216472535525717e-05, "loss": 0.8481, "step": 2001 }, { "epoch": 0.3063504208110176, "grad_norm": 2.6112325729047923, "learning_rate": 1.921551064616102e-05, "loss": 0.8245, "step": 2002 }, { "epoch": 0.3065034429992349, "grad_norm": 2.3444443429432416, "learning_rate": 1.9214548190835766e-05, "loss": 0.7449, "step": 2003 }, { "epoch": 0.30665646518745215, "grad_norm": 2.446188822684606, "learning_rate": 1.9213585169609058e-05, "loss": 0.831, "step": 2004 }, { "epoch": 0.3068094873756695, "grad_norm": 2.2145856338125602, "learning_rate": 1.9212621582540042e-05, "loss": 0.7718, "step": 2005 }, { "epoch": 0.30696250956388677, "grad_norm": 2.5819277402890526, "learning_rate": 1.9211657429687895e-05, "loss": 0.8231, "step": 2006 }, { "epoch": 0.30711553175210404, "grad_norm": 2.0650856048224617, "learning_rate": 1.9210692711111827e-05, "loss": 0.8361, "step": 2007 }, { "epoch": 0.3072685539403213, "grad_norm": 2.6944527828836615, "learning_rate": 1.920972742687109e-05, "loss": 0.8581, "step": 2008 }, { "epoch": 0.30742157612853865, "grad_norm": 2.1937400834532825, "learning_rate": 1.920876157702496e-05, "loss": 0.7833, "step": 2009 }, { "epoch": 0.30757459831675593, "grad_norm": 2.5777698912582205, "learning_rate": 1.920779516163276e-05, "loss": 0.8232, "step": 2010 }, { "epoch": 0.3077276205049732, "grad_norm": 2.2556884773655463, "learning_rate": 1.920682818075384e-05, "loss": 0.7792, "step": 2011 }, { "epoch": 0.3078806426931905, "grad_norm": 2.682048126514191, "learning_rate": 1.920586063444758e-05, "loss": 0.8919, "step": 2012 }, { "epoch": 0.3080336648814078, "grad_norm": 2.570180102807476, "learning_rate": 1.9204892522773405e-05, "loss": 0.8524, "step": 2013 }, { "epoch": 0.3081866870696251, "grad_norm": 2.563307171833568, "learning_rate": 1.9203923845790767e-05, "loss": 0.9107, "step": 2014 }, { "epoch": 0.3083397092578424, "grad_norm": 2.2104842092303336, "learning_rate": 1.9202954603559164e-05, "loss": 0.843, "step": 2015 }, { "epoch": 0.30849273144605965, "grad_norm": 2.5360024853513417, "learning_rate": 1.920198479613812e-05, "loss": 0.7056, "step": 2016 }, { "epoch": 0.308645753634277, "grad_norm": 2.7399669049444255, "learning_rate": 1.9201014423587187e-05, "loss": 0.8708, "step": 2017 }, { "epoch": 0.30879877582249426, "grad_norm": 2.4553097150606447, "learning_rate": 1.9200043485965963e-05, "loss": 0.8834, "step": 2018 }, { "epoch": 0.30895179801071154, "grad_norm": 2.8655692998053697, "learning_rate": 1.919907198333408e-05, "loss": 0.9893, "step": 2019 }, { "epoch": 0.3091048201989288, "grad_norm": 2.2561640988275546, "learning_rate": 1.9198099915751198e-05, "loss": 0.8193, "step": 2020 }, { "epoch": 0.30925784238714615, "grad_norm": 2.4836616435229, "learning_rate": 1.9197127283277017e-05, "loss": 0.8635, "step": 2021 }, { "epoch": 0.30941086457536343, "grad_norm": 2.5715982339637065, "learning_rate": 1.919615408597127e-05, "loss": 0.8791, "step": 2022 }, { "epoch": 0.3095638867635807, "grad_norm": 2.5943107227567848, "learning_rate": 1.9195180323893728e-05, "loss": 0.8121, "step": 2023 }, { "epoch": 0.309716908951798, "grad_norm": 2.2801541193319492, "learning_rate": 1.9194205997104187e-05, "loss": 0.7939, "step": 2024 }, { "epoch": 0.3098699311400153, "grad_norm": 2.459839184463675, "learning_rate": 1.919323110566249e-05, "loss": 0.9424, "step": 2025 }, { "epoch": 0.3100229533282326, "grad_norm": 2.1911119079151993, "learning_rate": 1.9192255649628503e-05, "loss": 0.8289, "step": 2026 }, { "epoch": 0.3101759755164499, "grad_norm": 2.4644860950022722, "learning_rate": 1.919127962906214e-05, "loss": 0.8372, "step": 2027 }, { "epoch": 0.31032899770466715, "grad_norm": 2.2356742468806026, "learning_rate": 1.919030304402334e-05, "loss": 0.7998, "step": 2028 }, { "epoch": 0.3104820198928845, "grad_norm": 2.5171160782594795, "learning_rate": 1.9189325894572076e-05, "loss": 0.8296, "step": 2029 }, { "epoch": 0.31063504208110176, "grad_norm": 2.7014831046380934, "learning_rate": 1.9188348180768356e-05, "loss": 0.9374, "step": 2030 }, { "epoch": 0.31078806426931904, "grad_norm": 2.734471374350287, "learning_rate": 1.918736990267223e-05, "loss": 0.7857, "step": 2031 }, { "epoch": 0.3109410864575363, "grad_norm": 2.7131784176496487, "learning_rate": 1.918639106034378e-05, "loss": 0.8601, "step": 2032 }, { "epoch": 0.31109410864575365, "grad_norm": 2.513092380080759, "learning_rate": 1.918541165384312e-05, "loss": 0.7936, "step": 2033 }, { "epoch": 0.31124713083397093, "grad_norm": 2.927728563084092, "learning_rate": 1.9184431683230392e-05, "loss": 1.0046, "step": 2034 }, { "epoch": 0.3114001530221882, "grad_norm": 2.404772452838863, "learning_rate": 1.9183451148565787e-05, "loss": 0.7403, "step": 2035 }, { "epoch": 0.3115531752104055, "grad_norm": 2.516603840983081, "learning_rate": 1.918247004990952e-05, "loss": 0.7745, "step": 2036 }, { "epoch": 0.3117061973986228, "grad_norm": 2.386770013882607, "learning_rate": 1.9181488387321848e-05, "loss": 0.8298, "step": 2037 }, { "epoch": 0.3118592195868401, "grad_norm": 2.6428767252854555, "learning_rate": 1.918050616086305e-05, "loss": 0.8006, "step": 2038 }, { "epoch": 0.3120122417750574, "grad_norm": 3.1631302400947825, "learning_rate": 1.917952337059346e-05, "loss": 0.8649, "step": 2039 }, { "epoch": 0.31216526396327465, "grad_norm": 2.442779992040568, "learning_rate": 1.9178540016573425e-05, "loss": 0.6726, "step": 2040 }, { "epoch": 0.312318286151492, "grad_norm": 2.44707016093417, "learning_rate": 1.9177556098863345e-05, "loss": 0.8137, "step": 2041 }, { "epoch": 0.31247130833970926, "grad_norm": 2.525282513459153, "learning_rate": 1.9176571617523637e-05, "loss": 0.686, "step": 2042 }, { "epoch": 0.31262433052792654, "grad_norm": 2.5982517000505405, "learning_rate": 1.9175586572614772e-05, "loss": 0.7722, "step": 2043 }, { "epoch": 0.3127773527161438, "grad_norm": 2.452454622090508, "learning_rate": 1.9174600964197243e-05, "loss": 0.8334, "step": 2044 }, { "epoch": 0.31293037490436115, "grad_norm": 2.618928484678063, "learning_rate": 1.917361479233157e-05, "loss": 0.9241, "step": 2045 }, { "epoch": 0.31308339709257843, "grad_norm": 2.435076329407817, "learning_rate": 1.9172628057078328e-05, "loss": 0.7933, "step": 2046 }, { "epoch": 0.3132364192807957, "grad_norm": 2.533700576320131, "learning_rate": 1.9171640758498117e-05, "loss": 0.7797, "step": 2047 }, { "epoch": 0.313389441469013, "grad_norm": 2.4696336812088546, "learning_rate": 1.917065289665156e-05, "loss": 0.7301, "step": 2048 }, { "epoch": 0.3135424636572303, "grad_norm": 2.4661352148431868, "learning_rate": 1.9169664471599338e-05, "loss": 0.8389, "step": 2049 }, { "epoch": 0.3136954858454476, "grad_norm": 2.3361325316370767, "learning_rate": 1.916867548340215e-05, "loss": 0.7848, "step": 2050 }, { "epoch": 0.3138485080336649, "grad_norm": 2.7039151927382106, "learning_rate": 1.916768593212073e-05, "loss": 0.7517, "step": 2051 }, { "epoch": 0.31400153022188215, "grad_norm": 2.302389279273721, "learning_rate": 1.9166695817815854e-05, "loss": 0.816, "step": 2052 }, { "epoch": 0.3141545524100995, "grad_norm": 2.33484318748721, "learning_rate": 1.9165705140548325e-05, "loss": 0.7547, "step": 2053 }, { "epoch": 0.31430757459831676, "grad_norm": 2.4695189957920336, "learning_rate": 1.916471390037899e-05, "loss": 0.8419, "step": 2054 }, { "epoch": 0.31446059678653404, "grad_norm": 2.6640125173002573, "learning_rate": 1.916372209736872e-05, "loss": 0.8851, "step": 2055 }, { "epoch": 0.3146136189747513, "grad_norm": 2.5342638647426154, "learning_rate": 1.9162729731578428e-05, "loss": 0.7527, "step": 2056 }, { "epoch": 0.31476664116296865, "grad_norm": 2.534955417695161, "learning_rate": 1.916173680306906e-05, "loss": 0.8765, "step": 2057 }, { "epoch": 0.31491966335118593, "grad_norm": 2.6830494811488, "learning_rate": 1.9160743311901592e-05, "loss": 0.7909, "step": 2058 }, { "epoch": 0.3150726855394032, "grad_norm": 2.550549374015315, "learning_rate": 1.915974925813704e-05, "loss": 0.7659, "step": 2059 }, { "epoch": 0.3152257077276205, "grad_norm": 2.3928412556709624, "learning_rate": 1.9158754641836452e-05, "loss": 0.7826, "step": 2060 }, { "epoch": 0.3153787299158378, "grad_norm": 2.9224489475145274, "learning_rate": 1.9157759463060914e-05, "loss": 1.0251, "step": 2061 }, { "epoch": 0.3155317521040551, "grad_norm": 2.5484811016781466, "learning_rate": 1.9156763721871542e-05, "loss": 0.8764, "step": 2062 }, { "epoch": 0.3156847742922724, "grad_norm": 2.446852756534984, "learning_rate": 1.9155767418329488e-05, "loss": 0.8835, "step": 2063 }, { "epoch": 0.31583779648048965, "grad_norm": 2.72442600547987, "learning_rate": 1.915477055249594e-05, "loss": 0.8934, "step": 2064 }, { "epoch": 0.315990818668707, "grad_norm": 2.4582101077154195, "learning_rate": 1.9153773124432117e-05, "loss": 0.7802, "step": 2065 }, { "epoch": 0.31614384085692426, "grad_norm": 2.5396120010948358, "learning_rate": 1.9152775134199278e-05, "loss": 0.867, "step": 2066 }, { "epoch": 0.31629686304514154, "grad_norm": 2.3762790446329487, "learning_rate": 1.9151776581858708e-05, "loss": 0.7767, "step": 2067 }, { "epoch": 0.3164498852333588, "grad_norm": 2.433257226366239, "learning_rate": 1.915077746747174e-05, "loss": 0.791, "step": 2068 }, { "epoch": 0.31660290742157615, "grad_norm": 2.4692823299271867, "learning_rate": 1.914977779109973e-05, "loss": 0.8087, "step": 2069 }, { "epoch": 0.31675592960979343, "grad_norm": 2.131604538235996, "learning_rate": 1.9148777552804075e-05, "loss": 0.7566, "step": 2070 }, { "epoch": 0.3169089517980107, "grad_norm": 2.4472979026288533, "learning_rate": 1.9147776752646193e-05, "loss": 0.9065, "step": 2071 }, { "epoch": 0.317061973986228, "grad_norm": 2.484643272652111, "learning_rate": 1.914677539068756e-05, "loss": 0.8602, "step": 2072 }, { "epoch": 0.3172149961744453, "grad_norm": 2.5281446531443112, "learning_rate": 1.9145773466989665e-05, "loss": 0.8182, "step": 2073 }, { "epoch": 0.3173680183626626, "grad_norm": 2.5908840665780755, "learning_rate": 1.9144770981614043e-05, "loss": 0.7987, "step": 2074 }, { "epoch": 0.3175210405508799, "grad_norm": 2.0941908773437943, "learning_rate": 1.914376793462226e-05, "loss": 0.896, "step": 2075 }, { "epoch": 0.31767406273909715, "grad_norm": 2.2421817228071705, "learning_rate": 1.914276432607592e-05, "loss": 0.6662, "step": 2076 }, { "epoch": 0.3178270849273145, "grad_norm": 2.4922409133172394, "learning_rate": 1.9141760156036656e-05, "loss": 0.7889, "step": 2077 }, { "epoch": 0.31798010711553176, "grad_norm": 2.3788296857697695, "learning_rate": 1.9140755424566135e-05, "loss": 0.7257, "step": 2078 }, { "epoch": 0.31813312930374904, "grad_norm": 2.3379923933589017, "learning_rate": 1.9139750131726067e-05, "loss": 0.7745, "step": 2079 }, { "epoch": 0.3182861514919663, "grad_norm": 2.873252027227465, "learning_rate": 1.9138744277578186e-05, "loss": 0.8744, "step": 2080 }, { "epoch": 0.31843917368018365, "grad_norm": 2.4216698005628503, "learning_rate": 1.9137737862184272e-05, "loss": 0.8559, "step": 2081 }, { "epoch": 0.31859219586840093, "grad_norm": 2.4418905760060454, "learning_rate": 1.9136730885606124e-05, "loss": 0.7539, "step": 2082 }, { "epoch": 0.3187452180566182, "grad_norm": 2.4020958445199936, "learning_rate": 1.9135723347905593e-05, "loss": 0.827, "step": 2083 }, { "epoch": 0.3188982402448355, "grad_norm": 2.5406101984226908, "learning_rate": 1.913471524914455e-05, "loss": 0.8508, "step": 2084 }, { "epoch": 0.3190512624330528, "grad_norm": 2.2918266846443736, "learning_rate": 1.913370658938491e-05, "loss": 0.7455, "step": 2085 }, { "epoch": 0.3192042846212701, "grad_norm": 2.480568139223291, "learning_rate": 1.9132697368688616e-05, "loss": 0.8116, "step": 2086 }, { "epoch": 0.3193573068094874, "grad_norm": 2.854465154555059, "learning_rate": 1.913168758711765e-05, "loss": 0.9847, "step": 2087 }, { "epoch": 0.31951032899770465, "grad_norm": 2.572434816180969, "learning_rate": 1.9130677244734026e-05, "loss": 0.8183, "step": 2088 }, { "epoch": 0.319663351185922, "grad_norm": 3.003205537027778, "learning_rate": 1.9129666341599793e-05, "loss": 0.853, "step": 2089 }, { "epoch": 0.31981637337413926, "grad_norm": 2.4337824036824953, "learning_rate": 1.9128654877777034e-05, "loss": 0.7883, "step": 2090 }, { "epoch": 0.31996939556235654, "grad_norm": 2.5391007309566302, "learning_rate": 1.9127642853327867e-05, "loss": 0.7448, "step": 2091 }, { "epoch": 0.3201224177505738, "grad_norm": 2.5479704491291986, "learning_rate": 1.9126630268314447e-05, "loss": 0.7959, "step": 2092 }, { "epoch": 0.32027543993879115, "grad_norm": 2.7326391324081016, "learning_rate": 1.9125617122798952e-05, "loss": 0.8832, "step": 2093 }, { "epoch": 0.32042846212700843, "grad_norm": 2.1860153989983298, "learning_rate": 1.9124603416843617e-05, "loss": 0.8377, "step": 2094 }, { "epoch": 0.3205814843152257, "grad_norm": 2.503472081997963, "learning_rate": 1.912358915051069e-05, "loss": 0.7138, "step": 2095 }, { "epoch": 0.320734506503443, "grad_norm": 2.2062061908006845, "learning_rate": 1.912257432386246e-05, "loss": 0.7035, "step": 2096 }, { "epoch": 0.3208875286916603, "grad_norm": 2.077975827552243, "learning_rate": 1.912155893696125e-05, "loss": 0.71, "step": 2097 }, { "epoch": 0.3210405508798776, "grad_norm": 2.4162267436191307, "learning_rate": 1.9120542989869427e-05, "loss": 0.8393, "step": 2098 }, { "epoch": 0.3211935730680949, "grad_norm": 2.5988343283265154, "learning_rate": 1.911952648264938e-05, "loss": 0.8368, "step": 2099 }, { "epoch": 0.32134659525631215, "grad_norm": 2.2723363777964662, "learning_rate": 1.911850941536353e-05, "loss": 0.7547, "step": 2100 }, { "epoch": 0.3214996174445295, "grad_norm": 2.413632966823172, "learning_rate": 1.9117491788074348e-05, "loss": 0.774, "step": 2101 }, { "epoch": 0.32165263963274676, "grad_norm": 2.439605514523114, "learning_rate": 1.9116473600844327e-05, "loss": 0.7549, "step": 2102 }, { "epoch": 0.32180566182096404, "grad_norm": 2.2988123659235473, "learning_rate": 1.9115454853736e-05, "loss": 0.7875, "step": 2103 }, { "epoch": 0.3219586840091813, "grad_norm": 2.3470597989174697, "learning_rate": 1.9114435546811928e-05, "loss": 0.7903, "step": 2104 }, { "epoch": 0.3221117061973986, "grad_norm": 2.562614663596328, "learning_rate": 1.9113415680134717e-05, "loss": 0.8583, "step": 2105 }, { "epoch": 0.32226472838561593, "grad_norm": 2.6369756053615907, "learning_rate": 1.9112395253766995e-05, "loss": 0.9672, "step": 2106 }, { "epoch": 0.3224177505738332, "grad_norm": 2.914943083424258, "learning_rate": 1.911137426777143e-05, "loss": 0.827, "step": 2107 }, { "epoch": 0.3225707727620505, "grad_norm": 2.3463201198738863, "learning_rate": 1.911035272221073e-05, "loss": 0.8843, "step": 2108 }, { "epoch": 0.32272379495026776, "grad_norm": 2.5753832932412637, "learning_rate": 1.910933061714763e-05, "loss": 0.7845, "step": 2109 }, { "epoch": 0.3228768171384851, "grad_norm": 2.677331183762575, "learning_rate": 1.91083079526449e-05, "loss": 0.8315, "step": 2110 }, { "epoch": 0.3230298393267024, "grad_norm": 2.619466419949009, "learning_rate": 1.9107284728765347e-05, "loss": 0.9082, "step": 2111 }, { "epoch": 0.32318286151491965, "grad_norm": 2.318593309042997, "learning_rate": 1.910626094557181e-05, "loss": 0.7603, "step": 2112 }, { "epoch": 0.32333588370313693, "grad_norm": 2.5023945668186625, "learning_rate": 1.9105236603127167e-05, "loss": 0.8891, "step": 2113 }, { "epoch": 0.32348890589135426, "grad_norm": 2.6435011702222755, "learning_rate": 1.910421170149432e-05, "loss": 0.8287, "step": 2114 }, { "epoch": 0.32364192807957154, "grad_norm": 2.3861826759572966, "learning_rate": 1.910318624073622e-05, "loss": 0.7984, "step": 2115 }, { "epoch": 0.3237949502677888, "grad_norm": 2.5742746321849106, "learning_rate": 1.910216022091584e-05, "loss": 0.9795, "step": 2116 }, { "epoch": 0.3239479724560061, "grad_norm": 2.471229350103612, "learning_rate": 1.910113364209619e-05, "loss": 0.7576, "step": 2117 }, { "epoch": 0.32410099464422343, "grad_norm": 2.4970893298757546, "learning_rate": 1.9100106504340324e-05, "loss": 0.7732, "step": 2118 }, { "epoch": 0.3242540168324407, "grad_norm": 2.268656403612154, "learning_rate": 1.9099078807711318e-05, "loss": 0.7906, "step": 2119 }, { "epoch": 0.324407039020658, "grad_norm": 2.4314592035006557, "learning_rate": 1.9098050552272284e-05, "loss": 0.9111, "step": 2120 }, { "epoch": 0.32456006120887526, "grad_norm": 2.4708369395051046, "learning_rate": 1.909702173808637e-05, "loss": 0.8278, "step": 2121 }, { "epoch": 0.3247130833970926, "grad_norm": 2.6888386351450113, "learning_rate": 1.909599236521677e-05, "loss": 0.785, "step": 2122 }, { "epoch": 0.3248661055853099, "grad_norm": 2.530033705647798, "learning_rate": 1.9094962433726692e-05, "loss": 0.7601, "step": 2123 }, { "epoch": 0.32501912777352715, "grad_norm": 2.577311046308443, "learning_rate": 1.9093931943679395e-05, "loss": 0.8827, "step": 2124 }, { "epoch": 0.32517214996174443, "grad_norm": 2.682264853957065, "learning_rate": 1.909290089513816e-05, "loss": 0.8972, "step": 2125 }, { "epoch": 0.32532517214996176, "grad_norm": 2.292095938273781, "learning_rate": 1.9091869288166305e-05, "loss": 0.8293, "step": 2126 }, { "epoch": 0.32547819433817904, "grad_norm": 2.5223280792400704, "learning_rate": 1.9090837122827195e-05, "loss": 0.8509, "step": 2127 }, { "epoch": 0.3256312165263963, "grad_norm": 2.447119418811232, "learning_rate": 1.908980439918421e-05, "loss": 0.7685, "step": 2128 }, { "epoch": 0.3257842387146136, "grad_norm": 2.6601569588663048, "learning_rate": 1.9088771117300778e-05, "loss": 0.9052, "step": 2129 }, { "epoch": 0.3259372609028309, "grad_norm": 2.536018852055662, "learning_rate": 1.908773727724036e-05, "loss": 0.8709, "step": 2130 }, { "epoch": 0.3260902830910482, "grad_norm": 2.330125546657301, "learning_rate": 1.908670287906644e-05, "loss": 0.8163, "step": 2131 }, { "epoch": 0.3262433052792655, "grad_norm": 2.368910748289313, "learning_rate": 1.9085667922842547e-05, "loss": 0.8446, "step": 2132 }, { "epoch": 0.32639632746748276, "grad_norm": 2.460618804621041, "learning_rate": 1.908463240863225e-05, "loss": 0.8018, "step": 2133 }, { "epoch": 0.3265493496557001, "grad_norm": 2.5082175611570063, "learning_rate": 1.9083596336499133e-05, "loss": 0.7482, "step": 2134 }, { "epoch": 0.32670237184391737, "grad_norm": 2.8829890496898294, "learning_rate": 1.908255970650683e-05, "loss": 0.7766, "step": 2135 }, { "epoch": 0.32685539403213465, "grad_norm": 2.6191018339121346, "learning_rate": 1.9081522518719006e-05, "loss": 0.8283, "step": 2136 }, { "epoch": 0.3270084162203519, "grad_norm": 2.675185201207612, "learning_rate": 1.9080484773199356e-05, "loss": 1.0072, "step": 2137 }, { "epoch": 0.32716143840856926, "grad_norm": 2.5802166600723293, "learning_rate": 1.9079446470011615e-05, "loss": 0.789, "step": 2138 }, { "epoch": 0.32731446059678654, "grad_norm": 2.7923015418179107, "learning_rate": 1.9078407609219543e-05, "loss": 0.7883, "step": 2139 }, { "epoch": 0.3274674827850038, "grad_norm": 2.418163628175102, "learning_rate": 1.9077368190886946e-05, "loss": 0.7245, "step": 2140 }, { "epoch": 0.3276205049732211, "grad_norm": 2.598496284037849, "learning_rate": 1.907632821507766e-05, "loss": 0.7575, "step": 2141 }, { "epoch": 0.3277735271614384, "grad_norm": 2.7343117555473846, "learning_rate": 1.907528768185555e-05, "loss": 0.874, "step": 2142 }, { "epoch": 0.3279265493496557, "grad_norm": 2.642891739247081, "learning_rate": 1.9074246591284522e-05, "loss": 0.8379, "step": 2143 }, { "epoch": 0.328079571537873, "grad_norm": 2.608322969653126, "learning_rate": 1.9073204943428512e-05, "loss": 0.8056, "step": 2144 }, { "epoch": 0.32823259372609026, "grad_norm": 2.6873976836473767, "learning_rate": 1.907216273835149e-05, "loss": 0.7994, "step": 2145 }, { "epoch": 0.3283856159143076, "grad_norm": 2.4514711844365658, "learning_rate": 1.9071119976117465e-05, "loss": 0.8881, "step": 2146 }, { "epoch": 0.32853863810252487, "grad_norm": 2.5267854318495515, "learning_rate": 1.9070076656790475e-05, "loss": 0.8897, "step": 2147 }, { "epoch": 0.32869166029074215, "grad_norm": 2.1105651231627647, "learning_rate": 1.9069032780434596e-05, "loss": 0.7618, "step": 2148 }, { "epoch": 0.3288446824789594, "grad_norm": 2.3346993598677765, "learning_rate": 1.9067988347113934e-05, "loss": 0.785, "step": 2149 }, { "epoch": 0.32899770466717676, "grad_norm": 2.4214920221821203, "learning_rate": 1.9066943356892636e-05, "loss": 0.8432, "step": 2150 }, { "epoch": 0.32915072685539404, "grad_norm": 2.558431202242363, "learning_rate": 1.9065897809834877e-05, "loss": 0.9265, "step": 2151 }, { "epoch": 0.3293037490436113, "grad_norm": 2.411042024636327, "learning_rate": 1.9064851706004862e-05, "loss": 0.8417, "step": 2152 }, { "epoch": 0.3294567712318286, "grad_norm": 2.832038379528674, "learning_rate": 1.9063805045466847e-05, "loss": 0.8054, "step": 2153 }, { "epoch": 0.3296097934200459, "grad_norm": 2.6592576047547123, "learning_rate": 1.9062757828285105e-05, "loss": 0.9434, "step": 2154 }, { "epoch": 0.3297628156082632, "grad_norm": 2.352342451790287, "learning_rate": 1.9061710054523953e-05, "loss": 0.7586, "step": 2155 }, { "epoch": 0.3299158377964805, "grad_norm": 2.544209759964932, "learning_rate": 1.9060661724247734e-05, "loss": 0.8578, "step": 2156 }, { "epoch": 0.33006885998469776, "grad_norm": 2.276504238923556, "learning_rate": 1.9059612837520834e-05, "loss": 0.8868, "step": 2157 }, { "epoch": 0.3302218821729151, "grad_norm": 2.5733234300853938, "learning_rate": 1.9058563394407668e-05, "loss": 0.9171, "step": 2158 }, { "epoch": 0.33037490436113237, "grad_norm": 2.431640470434858, "learning_rate": 1.9057513394972687e-05, "loss": 0.6812, "step": 2159 }, { "epoch": 0.33052792654934965, "grad_norm": 2.603271164903475, "learning_rate": 1.9056462839280376e-05, "loss": 0.8457, "step": 2160 }, { "epoch": 0.3306809487375669, "grad_norm": 2.731126948671394, "learning_rate": 1.905541172739525e-05, "loss": 0.7824, "step": 2161 }, { "epoch": 0.33083397092578426, "grad_norm": 2.753119967092687, "learning_rate": 1.905436005938187e-05, "loss": 0.8542, "step": 2162 }, { "epoch": 0.33098699311400154, "grad_norm": 2.337685299070537, "learning_rate": 1.9053307835304812e-05, "loss": 0.8189, "step": 2163 }, { "epoch": 0.3311400153022188, "grad_norm": 2.320976989090415, "learning_rate": 1.9052255055228707e-05, "loss": 0.6544, "step": 2164 }, { "epoch": 0.3312930374904361, "grad_norm": 2.5843477334717653, "learning_rate": 1.9051201719218207e-05, "loss": 0.8549, "step": 2165 }, { "epoch": 0.3314460596786534, "grad_norm": 2.129202487167354, "learning_rate": 1.9050147827337996e-05, "loss": 0.7141, "step": 2166 }, { "epoch": 0.3315990818668707, "grad_norm": 2.5652483228964242, "learning_rate": 1.904909337965281e-05, "loss": 0.7735, "step": 2167 }, { "epoch": 0.331752104055088, "grad_norm": 2.3273524046603633, "learning_rate": 1.9048038376227392e-05, "loss": 0.8203, "step": 2168 }, { "epoch": 0.33190512624330526, "grad_norm": 2.731759408744939, "learning_rate": 1.9046982817126545e-05, "loss": 0.7929, "step": 2169 }, { "epoch": 0.3320581484315226, "grad_norm": 2.395296587410217, "learning_rate": 1.904592670241509e-05, "loss": 0.9106, "step": 2170 }, { "epoch": 0.33221117061973987, "grad_norm": 2.6281801579811566, "learning_rate": 1.904487003215789e-05, "loss": 0.8377, "step": 2171 }, { "epoch": 0.33236419280795715, "grad_norm": 2.496937430980088, "learning_rate": 1.9043812806419833e-05, "loss": 0.7716, "step": 2172 }, { "epoch": 0.3325172149961744, "grad_norm": 2.335221367154538, "learning_rate": 1.9042755025265858e-05, "loss": 0.9313, "step": 2173 }, { "epoch": 0.33267023718439176, "grad_norm": 2.4705985458013457, "learning_rate": 1.904169668876092e-05, "loss": 0.8674, "step": 2174 }, { "epoch": 0.33282325937260904, "grad_norm": 2.470941759164986, "learning_rate": 1.9040637796970013e-05, "loss": 0.8822, "step": 2175 }, { "epoch": 0.3329762815608263, "grad_norm": 2.7471317200021725, "learning_rate": 1.9039578349958177e-05, "loss": 0.9441, "step": 2176 }, { "epoch": 0.3331293037490436, "grad_norm": 2.561583712652535, "learning_rate": 1.9038518347790468e-05, "loss": 0.8602, "step": 2177 }, { "epoch": 0.3332823259372609, "grad_norm": 2.636580914055904, "learning_rate": 1.9037457790531988e-05, "loss": 0.8297, "step": 2178 }, { "epoch": 0.3334353481254782, "grad_norm": 2.874894906101151, "learning_rate": 1.9036396678247872e-05, "loss": 0.7911, "step": 2179 }, { "epoch": 0.3335883703136955, "grad_norm": 2.449895925283834, "learning_rate": 1.9035335011003286e-05, "loss": 0.8366, "step": 2180 }, { "epoch": 0.33374139250191276, "grad_norm": 2.294977029076781, "learning_rate": 1.903427278886343e-05, "loss": 0.7386, "step": 2181 }, { "epoch": 0.3338944146901301, "grad_norm": 3.231281267707513, "learning_rate": 1.903321001189354e-05, "loss": 0.8684, "step": 2182 }, { "epoch": 0.33404743687834737, "grad_norm": 2.787124174288599, "learning_rate": 1.9032146680158884e-05, "loss": 0.8729, "step": 2183 }, { "epoch": 0.33420045906656465, "grad_norm": 2.636855875087689, "learning_rate": 1.903108279372477e-05, "loss": 0.8681, "step": 2184 }, { "epoch": 0.3343534812547819, "grad_norm": 2.3818211247344054, "learning_rate": 1.9030018352656525e-05, "loss": 0.9681, "step": 2185 }, { "epoch": 0.33450650344299926, "grad_norm": 2.6499490500959655, "learning_rate": 1.9028953357019534e-05, "loss": 0.7594, "step": 2186 }, { "epoch": 0.33465952563121654, "grad_norm": 2.803284880960482, "learning_rate": 1.902788780687919e-05, "loss": 0.8759, "step": 2187 }, { "epoch": 0.3348125478194338, "grad_norm": 2.6200835706423558, "learning_rate": 1.9026821702300942e-05, "loss": 0.8095, "step": 2188 }, { "epoch": 0.3349655700076511, "grad_norm": 2.5244225371364255, "learning_rate": 1.902575504335026e-05, "loss": 0.8257, "step": 2189 }, { "epoch": 0.3351185921958684, "grad_norm": 2.272322663580629, "learning_rate": 1.9024687830092653e-05, "loss": 0.8683, "step": 2190 }, { "epoch": 0.3352716143840857, "grad_norm": 2.3507940993495855, "learning_rate": 1.902362006259366e-05, "loss": 0.7174, "step": 2191 }, { "epoch": 0.335424636572303, "grad_norm": 2.3419236603411786, "learning_rate": 1.9022551740918858e-05, "loss": 0.8454, "step": 2192 }, { "epoch": 0.33557765876052026, "grad_norm": 2.2407962986631427, "learning_rate": 1.9021482865133857e-05, "loss": 0.8129, "step": 2193 }, { "epoch": 0.3357306809487376, "grad_norm": 2.829969199508418, "learning_rate": 1.90204134353043e-05, "loss": 0.9139, "step": 2194 }, { "epoch": 0.33588370313695487, "grad_norm": 2.570297450213411, "learning_rate": 1.901934345149587e-05, "loss": 0.8215, "step": 2195 }, { "epoch": 0.33603672532517215, "grad_norm": 2.1742508587047356, "learning_rate": 1.9018272913774272e-05, "loss": 0.7921, "step": 2196 }, { "epoch": 0.3361897475133894, "grad_norm": 2.5516210884648354, "learning_rate": 1.9017201822205257e-05, "loss": 0.777, "step": 2197 }, { "epoch": 0.33634276970160676, "grad_norm": 2.5965934611015524, "learning_rate": 1.9016130176854595e-05, "loss": 0.8242, "step": 2198 }, { "epoch": 0.33649579188982404, "grad_norm": 2.633683261887388, "learning_rate": 1.9015057977788115e-05, "loss": 0.8029, "step": 2199 }, { "epoch": 0.3366488140780413, "grad_norm": 2.6200741185136533, "learning_rate": 1.9013985225071657e-05, "loss": 0.8098, "step": 2200 }, { "epoch": 0.3368018362662586, "grad_norm": 2.385459716174837, "learning_rate": 1.90129119187711e-05, "loss": 0.9182, "step": 2201 }, { "epoch": 0.3369548584544759, "grad_norm": 2.8745143161019677, "learning_rate": 1.901183805895237e-05, "loss": 0.833, "step": 2202 }, { "epoch": 0.3371078806426932, "grad_norm": 2.316707026669433, "learning_rate": 1.90107636456814e-05, "loss": 0.8279, "step": 2203 }, { "epoch": 0.3372609028309105, "grad_norm": 2.7035829713270187, "learning_rate": 1.900968867902419e-05, "loss": 0.8465, "step": 2204 }, { "epoch": 0.33741392501912776, "grad_norm": 2.43981629724686, "learning_rate": 1.9008613159046755e-05, "loss": 0.8019, "step": 2205 }, { "epoch": 0.3375669472073451, "grad_norm": 2.2730341237566107, "learning_rate": 1.900753708581514e-05, "loss": 0.8162, "step": 2206 }, { "epoch": 0.33771996939556237, "grad_norm": 2.2234217115960955, "learning_rate": 1.9006460459395436e-05, "loss": 0.7875, "step": 2207 }, { "epoch": 0.33787299158377965, "grad_norm": 2.8015510519189264, "learning_rate": 1.9005383279853763e-05, "loss": 0.9112, "step": 2208 }, { "epoch": 0.3380260137719969, "grad_norm": 2.5704163897021255, "learning_rate": 1.900430554725627e-05, "loss": 0.904, "step": 2209 }, { "epoch": 0.33817903596021426, "grad_norm": 2.305524302766331, "learning_rate": 1.9003227261669154e-05, "loss": 0.8197, "step": 2210 }, { "epoch": 0.33833205814843154, "grad_norm": 2.5338540294728613, "learning_rate": 1.9002148423158626e-05, "loss": 0.7963, "step": 2211 }, { "epoch": 0.3384850803366488, "grad_norm": 2.767802849237396, "learning_rate": 1.9001069031790948e-05, "loss": 0.8008, "step": 2212 }, { "epoch": 0.3386381025248661, "grad_norm": 2.6297059670318785, "learning_rate": 1.899998908763241e-05, "loss": 0.8164, "step": 2213 }, { "epoch": 0.3387911247130834, "grad_norm": 2.5239777756138095, "learning_rate": 1.8998908590749333e-05, "loss": 0.7567, "step": 2214 }, { "epoch": 0.3389441469013007, "grad_norm": 2.6121503690567582, "learning_rate": 1.8997827541208073e-05, "loss": 0.7942, "step": 2215 }, { "epoch": 0.339097169089518, "grad_norm": 2.513509488316226, "learning_rate": 1.8996745939075024e-05, "loss": 0.8487, "step": 2216 }, { "epoch": 0.33925019127773526, "grad_norm": 2.406167309519931, "learning_rate": 1.8995663784416613e-05, "loss": 0.8935, "step": 2217 }, { "epoch": 0.33940321346595254, "grad_norm": 2.2466325362978314, "learning_rate": 1.8994581077299303e-05, "loss": 0.764, "step": 2218 }, { "epoch": 0.33955623565416987, "grad_norm": 2.3061195054332155, "learning_rate": 1.8993497817789574e-05, "loss": 0.7542, "step": 2219 }, { "epoch": 0.33970925784238715, "grad_norm": 2.8382048184352957, "learning_rate": 1.8992414005953964e-05, "loss": 0.9037, "step": 2220 }, { "epoch": 0.3398622800306044, "grad_norm": 2.3287211280392777, "learning_rate": 1.8991329641859035e-05, "loss": 0.8013, "step": 2221 }, { "epoch": 0.3400153022188217, "grad_norm": 2.299742943168751, "learning_rate": 1.8990244725571374e-05, "loss": 0.7663, "step": 2222 }, { "epoch": 0.34016832440703904, "grad_norm": 2.6471103760984764, "learning_rate": 1.8989159257157616e-05, "loss": 0.9033, "step": 2223 }, { "epoch": 0.3403213465952563, "grad_norm": 2.280805425315999, "learning_rate": 1.8988073236684422e-05, "loss": 0.8295, "step": 2224 }, { "epoch": 0.3404743687834736, "grad_norm": 2.141508514611218, "learning_rate": 1.898698666421849e-05, "loss": 0.7833, "step": 2225 }, { "epoch": 0.34062739097169087, "grad_norm": 2.7267105114369685, "learning_rate": 1.8985899539826547e-05, "loss": 0.7954, "step": 2226 }, { "epoch": 0.3407804131599082, "grad_norm": 2.2761348371968584, "learning_rate": 1.898481186357536e-05, "loss": 0.7791, "step": 2227 }, { "epoch": 0.3409334353481255, "grad_norm": 2.311320182693125, "learning_rate": 1.8983723635531733e-05, "loss": 0.7803, "step": 2228 }, { "epoch": 0.34108645753634276, "grad_norm": 2.416674743767647, "learning_rate": 1.898263485576249e-05, "loss": 0.7712, "step": 2229 }, { "epoch": 0.34123947972456004, "grad_norm": 2.41675063992554, "learning_rate": 1.8981545524334497e-05, "loss": 0.8321, "step": 2230 }, { "epoch": 0.34139250191277737, "grad_norm": 2.374216831787798, "learning_rate": 1.8980455641314658e-05, "loss": 0.759, "step": 2231 }, { "epoch": 0.34154552410099465, "grad_norm": 2.5464606847654414, "learning_rate": 1.897936520676991e-05, "loss": 0.8562, "step": 2232 }, { "epoch": 0.3416985462892119, "grad_norm": 2.138460875090068, "learning_rate": 1.8978274220767218e-05, "loss": 0.7936, "step": 2233 }, { "epoch": 0.3418515684774292, "grad_norm": 2.430427227076344, "learning_rate": 1.8977182683373577e-05, "loss": 0.7254, "step": 2234 }, { "epoch": 0.34200459066564654, "grad_norm": 2.4379531574708335, "learning_rate": 1.8976090594656034e-05, "loss": 0.7694, "step": 2235 }, { "epoch": 0.3421576128538638, "grad_norm": 2.3778128523783413, "learning_rate": 1.8974997954681652e-05, "loss": 0.7247, "step": 2236 }, { "epoch": 0.3423106350420811, "grad_norm": 2.6390102931039183, "learning_rate": 1.8973904763517534e-05, "loss": 0.8236, "step": 2237 }, { "epoch": 0.34246365723029837, "grad_norm": 2.298857942777125, "learning_rate": 1.8972811021230816e-05, "loss": 0.7822, "step": 2238 }, { "epoch": 0.3426166794185157, "grad_norm": 2.1998620792842702, "learning_rate": 1.8971716727888677e-05, "loss": 0.7806, "step": 2239 }, { "epoch": 0.342769701606733, "grad_norm": 2.656918161912584, "learning_rate": 1.897062188355831e-05, "loss": 0.9258, "step": 2240 }, { "epoch": 0.34292272379495026, "grad_norm": 2.4514776639915032, "learning_rate": 1.8969526488306965e-05, "loss": 0.771, "step": 2241 }, { "epoch": 0.34307574598316753, "grad_norm": 2.404064889745198, "learning_rate": 1.8968430542201905e-05, "loss": 0.7684, "step": 2242 }, { "epoch": 0.34322876817138487, "grad_norm": 2.289169317066614, "learning_rate": 1.8967334045310445e-05, "loss": 0.9253, "step": 2243 }, { "epoch": 0.34338179035960215, "grad_norm": 2.3330229020319337, "learning_rate": 1.896623699769992e-05, "loss": 0.8232, "step": 2244 }, { "epoch": 0.3435348125478194, "grad_norm": 2.513086642418052, "learning_rate": 1.89651393994377e-05, "loss": 0.8884, "step": 2245 }, { "epoch": 0.3436878347360367, "grad_norm": 2.8654016636145743, "learning_rate": 1.8964041250591203e-05, "loss": 0.873, "step": 2246 }, { "epoch": 0.34384085692425403, "grad_norm": 2.565973612887949, "learning_rate": 1.8962942551227862e-05, "loss": 0.7987, "step": 2247 }, { "epoch": 0.3439938791124713, "grad_norm": 2.36055185125566, "learning_rate": 1.896184330141516e-05, "loss": 0.7303, "step": 2248 }, { "epoch": 0.3441469013006886, "grad_norm": 2.3238221779857984, "learning_rate": 1.8960743501220597e-05, "loss": 0.797, "step": 2249 }, { "epoch": 0.34429992348890587, "grad_norm": 2.455737367611592, "learning_rate": 1.8959643150711723e-05, "loss": 0.759, "step": 2250 }, { "epoch": 0.3444529456771232, "grad_norm": 2.3782474528258106, "learning_rate": 1.8958542249956116e-05, "loss": 0.8592, "step": 2251 }, { "epoch": 0.3446059678653405, "grad_norm": 2.278193327077733, "learning_rate": 1.8957440799021377e-05, "loss": 0.7989, "step": 2252 }, { "epoch": 0.34475899005355776, "grad_norm": 2.299702774333914, "learning_rate": 1.895633879797516e-05, "loss": 0.8959, "step": 2253 }, { "epoch": 0.34491201224177503, "grad_norm": 2.050241850848599, "learning_rate": 1.8955236246885143e-05, "loss": 0.6608, "step": 2254 }, { "epoch": 0.34506503442999237, "grad_norm": 2.1911195420404725, "learning_rate": 1.8954133145819028e-05, "loss": 0.696, "step": 2255 }, { "epoch": 0.34521805661820965, "grad_norm": 2.505315912925584, "learning_rate": 1.8953029494844575e-05, "loss": 0.8803, "step": 2256 }, { "epoch": 0.3453710788064269, "grad_norm": 2.3649033111770716, "learning_rate": 1.8951925294029553e-05, "loss": 0.7616, "step": 2257 }, { "epoch": 0.3455241009946442, "grad_norm": 2.3759627706360256, "learning_rate": 1.895082054344178e-05, "loss": 0.8723, "step": 2258 }, { "epoch": 0.34567712318286153, "grad_norm": 2.4769807539083124, "learning_rate": 1.89497152431491e-05, "loss": 0.8755, "step": 2259 }, { "epoch": 0.3458301453710788, "grad_norm": 2.372527770894994, "learning_rate": 1.8948609393219394e-05, "loss": 0.8229, "step": 2260 }, { "epoch": 0.3459831675592961, "grad_norm": 2.30265390487159, "learning_rate": 1.894750299372058e-05, "loss": 0.7476, "step": 2261 }, { "epoch": 0.34613618974751337, "grad_norm": 2.355500546095671, "learning_rate": 1.8946396044720607e-05, "loss": 0.732, "step": 2262 }, { "epoch": 0.3462892119357307, "grad_norm": 2.324299123570443, "learning_rate": 1.8945288546287452e-05, "loss": 0.803, "step": 2263 }, { "epoch": 0.346442234123948, "grad_norm": 2.4044763714157695, "learning_rate": 1.894418049848913e-05, "loss": 0.7983, "step": 2264 }, { "epoch": 0.34659525631216526, "grad_norm": 2.6759518579838932, "learning_rate": 1.8943071901393698e-05, "loss": 0.8131, "step": 2265 }, { "epoch": 0.34674827850038253, "grad_norm": 2.3991278259603535, "learning_rate": 1.894196275506923e-05, "loss": 0.8602, "step": 2266 }, { "epoch": 0.34690130068859987, "grad_norm": 2.2733901741363542, "learning_rate": 1.8940853059583853e-05, "loss": 0.8241, "step": 2267 }, { "epoch": 0.34705432287681715, "grad_norm": 3.8759891438201906, "learning_rate": 1.8939742815005716e-05, "loss": 0.8396, "step": 2268 }, { "epoch": 0.3472073450650344, "grad_norm": 2.3324798219158316, "learning_rate": 1.8938632021402994e-05, "loss": 0.676, "step": 2269 }, { "epoch": 0.3473603672532517, "grad_norm": 2.2829117471511373, "learning_rate": 1.893752067884391e-05, "loss": 0.7266, "step": 2270 }, { "epoch": 0.34751338944146903, "grad_norm": 2.544061089313835, "learning_rate": 1.8936408787396724e-05, "loss": 0.7567, "step": 2271 }, { "epoch": 0.3476664116296863, "grad_norm": 2.8639141424306698, "learning_rate": 1.893529634712971e-05, "loss": 0.9786, "step": 2272 }, { "epoch": 0.3478194338179036, "grad_norm": 2.2951994146776244, "learning_rate": 1.8934183358111194e-05, "loss": 0.7286, "step": 2273 }, { "epoch": 0.34797245600612087, "grad_norm": 2.514494070203749, "learning_rate": 1.8933069820409528e-05, "loss": 0.8458, "step": 2274 }, { "epoch": 0.3481254781943382, "grad_norm": 2.3009159384379516, "learning_rate": 1.8931955734093096e-05, "loss": 0.809, "step": 2275 }, { "epoch": 0.3482785003825555, "grad_norm": 2.6169208483293187, "learning_rate": 1.893084109923032e-05, "loss": 0.7312, "step": 2276 }, { "epoch": 0.34843152257077276, "grad_norm": 2.318572642711375, "learning_rate": 1.8929725915889656e-05, "loss": 0.8057, "step": 2277 }, { "epoch": 0.34858454475899003, "grad_norm": 2.4384080072672605, "learning_rate": 1.892861018413959e-05, "loss": 0.7845, "step": 2278 }, { "epoch": 0.34873756694720737, "grad_norm": 2.233659076191945, "learning_rate": 1.8927493904048645e-05, "loss": 0.7987, "step": 2279 }, { "epoch": 0.34889058913542464, "grad_norm": 2.8132353615209555, "learning_rate": 1.8926377075685375e-05, "loss": 0.792, "step": 2280 }, { "epoch": 0.3490436113236419, "grad_norm": 2.745124612350017, "learning_rate": 1.8925259699118363e-05, "loss": 0.8503, "step": 2281 }, { "epoch": 0.3491966335118592, "grad_norm": 2.3941958793093483, "learning_rate": 1.8924141774416243e-05, "loss": 0.7677, "step": 2282 }, { "epoch": 0.34934965570007653, "grad_norm": 2.3379365924269493, "learning_rate": 1.892302330164766e-05, "loss": 0.8122, "step": 2283 }, { "epoch": 0.3495026778882938, "grad_norm": 2.533816103656603, "learning_rate": 1.8921904280881316e-05, "loss": 0.7815, "step": 2284 }, { "epoch": 0.3496557000765111, "grad_norm": 2.6931588712567947, "learning_rate": 1.8920784712185925e-05, "loss": 0.9097, "step": 2285 }, { "epoch": 0.34980872226472837, "grad_norm": 2.270706751809004, "learning_rate": 1.8919664595630244e-05, "loss": 0.8721, "step": 2286 }, { "epoch": 0.3499617444529457, "grad_norm": 2.645794975569509, "learning_rate": 1.8918543931283065e-05, "loss": 0.8858, "step": 2287 }, { "epoch": 0.350114766641163, "grad_norm": 2.747285576887863, "learning_rate": 1.891742271921322e-05, "loss": 0.87, "step": 2288 }, { "epoch": 0.35026778882938026, "grad_norm": 2.363498122115401, "learning_rate": 1.8916300959489555e-05, "loss": 0.7907, "step": 2289 }, { "epoch": 0.35042081101759753, "grad_norm": 2.4364836344114784, "learning_rate": 1.8915178652180968e-05, "loss": 0.8606, "step": 2290 }, { "epoch": 0.35057383320581487, "grad_norm": 2.353223078511656, "learning_rate": 1.8914055797356385e-05, "loss": 0.7986, "step": 2291 }, { "epoch": 0.35072685539403214, "grad_norm": 2.7270978738275513, "learning_rate": 1.8912932395084763e-05, "loss": 0.7931, "step": 2292 }, { "epoch": 0.3508798775822494, "grad_norm": 2.537289091735032, "learning_rate": 1.8911808445435097e-05, "loss": 0.9002, "step": 2293 }, { "epoch": 0.3510328997704667, "grad_norm": 2.6426827278918488, "learning_rate": 1.8910683948476407e-05, "loss": 0.7942, "step": 2294 }, { "epoch": 0.35118592195868403, "grad_norm": 2.2600513381334273, "learning_rate": 1.890955890427776e-05, "loss": 0.7864, "step": 2295 }, { "epoch": 0.3513389441469013, "grad_norm": 2.1081811704121085, "learning_rate": 1.8908433312908245e-05, "loss": 0.6954, "step": 2296 }, { "epoch": 0.3514919663351186, "grad_norm": 2.75647258716163, "learning_rate": 1.8907307174436993e-05, "loss": 0.833, "step": 2297 }, { "epoch": 0.35164498852333587, "grad_norm": 2.4321268972579246, "learning_rate": 1.8906180488933157e-05, "loss": 0.8105, "step": 2298 }, { "epoch": 0.3517980107115532, "grad_norm": 2.1495453410926504, "learning_rate": 1.8905053256465938e-05, "loss": 0.664, "step": 2299 }, { "epoch": 0.3519510328997705, "grad_norm": 2.6676960585373695, "learning_rate": 1.8903925477104564e-05, "loss": 0.8758, "step": 2300 }, { "epoch": 0.35210405508798776, "grad_norm": 2.7198945884747974, "learning_rate": 1.8902797150918296e-05, "loss": 0.7659, "step": 2301 }, { "epoch": 0.35225707727620503, "grad_norm": 2.6559777133855365, "learning_rate": 1.8901668277976418e-05, "loss": 0.8424, "step": 2302 }, { "epoch": 0.35241009946442237, "grad_norm": 2.434513815774383, "learning_rate": 1.8900538858348278e-05, "loss": 0.8141, "step": 2303 }, { "epoch": 0.35256312165263964, "grad_norm": 2.484738285350647, "learning_rate": 1.889940889210322e-05, "loss": 0.735, "step": 2304 }, { "epoch": 0.3527161438408569, "grad_norm": 2.2508751836804537, "learning_rate": 1.889827837931065e-05, "loss": 0.7989, "step": 2305 }, { "epoch": 0.3528691660290742, "grad_norm": 2.640756293380206, "learning_rate": 1.8897147320039996e-05, "loss": 0.881, "step": 2306 }, { "epoch": 0.35302218821729153, "grad_norm": 2.5263231329934652, "learning_rate": 1.8896015714360717e-05, "loss": 0.8671, "step": 2307 }, { "epoch": 0.3531752104055088, "grad_norm": 2.085279323673372, "learning_rate": 1.8894883562342312e-05, "loss": 0.6073, "step": 2308 }, { "epoch": 0.3533282325937261, "grad_norm": 2.691825024482245, "learning_rate": 1.8893750864054313e-05, "loss": 0.8882, "step": 2309 }, { "epoch": 0.35348125478194337, "grad_norm": 2.4796532361562282, "learning_rate": 1.8892617619566277e-05, "loss": 0.859, "step": 2310 }, { "epoch": 0.3536342769701607, "grad_norm": 2.498491204421089, "learning_rate": 1.8891483828947805e-05, "loss": 0.8075, "step": 2311 }, { "epoch": 0.353787299158378, "grad_norm": 2.4618340934724237, "learning_rate": 1.889034949226853e-05, "loss": 0.8162, "step": 2312 }, { "epoch": 0.35394032134659525, "grad_norm": 2.3787322282171117, "learning_rate": 1.8889214609598113e-05, "loss": 0.7916, "step": 2313 }, { "epoch": 0.35409334353481253, "grad_norm": 2.331498599753699, "learning_rate": 1.888807918100625e-05, "loss": 0.8632, "step": 2314 }, { "epoch": 0.35424636572302987, "grad_norm": 2.221565693142028, "learning_rate": 1.8886943206562677e-05, "loss": 0.7716, "step": 2315 }, { "epoch": 0.35439938791124714, "grad_norm": 2.7336828188903874, "learning_rate": 1.8885806686337154e-05, "loss": 0.8538, "step": 2316 }, { "epoch": 0.3545524100994644, "grad_norm": 2.3932636463702948, "learning_rate": 1.888466962039948e-05, "loss": 0.8395, "step": 2317 }, { "epoch": 0.3547054322876817, "grad_norm": 2.2898696428789775, "learning_rate": 1.888353200881949e-05, "loss": 0.7866, "step": 2318 }, { "epoch": 0.35485845447589903, "grad_norm": 2.2783308963748494, "learning_rate": 1.8882393851667046e-05, "loss": 0.7497, "step": 2319 }, { "epoch": 0.3550114766641163, "grad_norm": 2.3945871355573454, "learning_rate": 1.8881255149012047e-05, "loss": 0.8037, "step": 2320 }, { "epoch": 0.3551644988523336, "grad_norm": 2.4599488406377965, "learning_rate": 1.8880115900924426e-05, "loss": 0.7507, "step": 2321 }, { "epoch": 0.35531752104055087, "grad_norm": 2.4065925989291492, "learning_rate": 1.8878976107474147e-05, "loss": 0.7945, "step": 2322 }, { "epoch": 0.3554705432287682, "grad_norm": 2.4302388243500834, "learning_rate": 1.8877835768731212e-05, "loss": 0.8201, "step": 2323 }, { "epoch": 0.3556235654169855, "grad_norm": 2.446682716888137, "learning_rate": 1.8876694884765648e-05, "loss": 0.8339, "step": 2324 }, { "epoch": 0.35577658760520275, "grad_norm": 2.656460918351645, "learning_rate": 1.887555345564753e-05, "loss": 0.8646, "step": 2325 }, { "epoch": 0.35592960979342003, "grad_norm": 2.349546610049587, "learning_rate": 1.887441148144695e-05, "loss": 0.7222, "step": 2326 }, { "epoch": 0.3560826319816373, "grad_norm": 2.404762586158525, "learning_rate": 1.887326896223405e-05, "loss": 0.8084, "step": 2327 }, { "epoch": 0.35623565416985464, "grad_norm": 2.283494276451758, "learning_rate": 1.8872125898078985e-05, "loss": 0.841, "step": 2328 }, { "epoch": 0.3563886763580719, "grad_norm": 2.448512269421924, "learning_rate": 1.8870982289051962e-05, "loss": 0.7663, "step": 2329 }, { "epoch": 0.3565416985462892, "grad_norm": 2.457783473444631, "learning_rate": 1.8869838135223214e-05, "loss": 0.763, "step": 2330 }, { "epoch": 0.3566947207345065, "grad_norm": 2.5155960596034785, "learning_rate": 1.8868693436663006e-05, "loss": 0.8754, "step": 2331 }, { "epoch": 0.3568477429227238, "grad_norm": 2.4245646747227085, "learning_rate": 1.886754819344164e-05, "loss": 0.7733, "step": 2332 }, { "epoch": 0.3570007651109411, "grad_norm": 2.2376909756083623, "learning_rate": 1.8866402405629447e-05, "loss": 0.7319, "step": 2333 }, { "epoch": 0.35715378729915837, "grad_norm": 2.4584252279555714, "learning_rate": 1.88652560732968e-05, "loss": 0.8691, "step": 2334 }, { "epoch": 0.35730680948737564, "grad_norm": 2.541807107424281, "learning_rate": 1.8864109196514096e-05, "loss": 0.8073, "step": 2335 }, { "epoch": 0.357459831675593, "grad_norm": 2.359184716636419, "learning_rate": 1.886296177535177e-05, "loss": 0.7375, "step": 2336 }, { "epoch": 0.35761285386381025, "grad_norm": 2.3320992648966636, "learning_rate": 1.8861813809880288e-05, "loss": 0.7953, "step": 2337 }, { "epoch": 0.35776587605202753, "grad_norm": 2.304009796345526, "learning_rate": 1.8860665300170155e-05, "loss": 0.7176, "step": 2338 }, { "epoch": 0.3579188982402448, "grad_norm": 2.3503014104713094, "learning_rate": 1.8859516246291897e-05, "loss": 0.8045, "step": 2339 }, { "epoch": 0.35807192042846214, "grad_norm": 2.619508152477919, "learning_rate": 1.8858366648316093e-05, "loss": 0.908, "step": 2340 }, { "epoch": 0.3582249426166794, "grad_norm": 2.4429041886742264, "learning_rate": 1.8857216506313334e-05, "loss": 0.7526, "step": 2341 }, { "epoch": 0.3583779648048967, "grad_norm": 2.543918354148765, "learning_rate": 1.8856065820354267e-05, "loss": 0.9139, "step": 2342 }, { "epoch": 0.358530986993114, "grad_norm": 2.6126350593443632, "learning_rate": 1.8854914590509547e-05, "loss": 0.8782, "step": 2343 }, { "epoch": 0.3586840091813313, "grad_norm": 2.6110295147807583, "learning_rate": 1.8853762816849882e-05, "loss": 0.8034, "step": 2344 }, { "epoch": 0.3588370313695486, "grad_norm": 2.3093166827506866, "learning_rate": 1.885261049944601e-05, "loss": 0.8519, "step": 2345 }, { "epoch": 0.35899005355776586, "grad_norm": 2.355282542968486, "learning_rate": 1.885145763836869e-05, "loss": 0.8291, "step": 2346 }, { "epoch": 0.35914307574598314, "grad_norm": 2.347727191149938, "learning_rate": 1.8850304233688735e-05, "loss": 0.8327, "step": 2347 }, { "epoch": 0.3592960979342005, "grad_norm": 2.7381027335841504, "learning_rate": 1.884915028547697e-05, "loss": 0.8587, "step": 2348 }, { "epoch": 0.35944912012241775, "grad_norm": 2.2608152358156977, "learning_rate": 1.884799579380427e-05, "loss": 0.7851, "step": 2349 }, { "epoch": 0.35960214231063503, "grad_norm": 2.5461028673938064, "learning_rate": 1.8846840758741533e-05, "loss": 0.8457, "step": 2350 }, { "epoch": 0.3597551644988523, "grad_norm": 2.197749699940499, "learning_rate": 1.88456851803597e-05, "loss": 0.8232, "step": 2351 }, { "epoch": 0.35990818668706964, "grad_norm": 2.2081723097087247, "learning_rate": 1.8844529058729732e-05, "loss": 0.8364, "step": 2352 }, { "epoch": 0.3600612088752869, "grad_norm": 2.2167658053229014, "learning_rate": 1.8843372393922633e-05, "loss": 0.7587, "step": 2353 }, { "epoch": 0.3602142310635042, "grad_norm": 2.5104046616375006, "learning_rate": 1.8842215186009447e-05, "loss": 0.8437, "step": 2354 }, { "epoch": 0.3603672532517215, "grad_norm": 2.4725723695877857, "learning_rate": 1.884105743506123e-05, "loss": 0.7279, "step": 2355 }, { "epoch": 0.3605202754399388, "grad_norm": 2.457372115979769, "learning_rate": 1.883989914114909e-05, "loss": 0.827, "step": 2356 }, { "epoch": 0.3606732976281561, "grad_norm": 2.410585651428132, "learning_rate": 1.883874030434416e-05, "loss": 0.7795, "step": 2357 }, { "epoch": 0.36082631981637336, "grad_norm": 2.4141145834331135, "learning_rate": 1.8837580924717614e-05, "loss": 0.777, "step": 2358 }, { "epoch": 0.36097934200459064, "grad_norm": 2.2617813201912673, "learning_rate": 1.883642100234065e-05, "loss": 0.7614, "step": 2359 }, { "epoch": 0.361132364192808, "grad_norm": 2.2966321276793735, "learning_rate": 1.88352605372845e-05, "loss": 0.7941, "step": 2360 }, { "epoch": 0.36128538638102525, "grad_norm": 2.110102263249786, "learning_rate": 1.8834099529620442e-05, "loss": 0.6854, "step": 2361 }, { "epoch": 0.36143840856924253, "grad_norm": 2.7367322503438754, "learning_rate": 1.883293797941977e-05, "loss": 0.8223, "step": 2362 }, { "epoch": 0.3615914307574598, "grad_norm": 2.709621371396735, "learning_rate": 1.8831775886753822e-05, "loss": 0.7787, "step": 2363 }, { "epoch": 0.36174445294567714, "grad_norm": 2.440509867786845, "learning_rate": 1.883061325169397e-05, "loss": 0.8569, "step": 2364 }, { "epoch": 0.3618974751338944, "grad_norm": 2.504540793639551, "learning_rate": 1.882945007431161e-05, "loss": 0.7796, "step": 2365 }, { "epoch": 0.3620504973221117, "grad_norm": 2.6996923030735096, "learning_rate": 1.882828635467818e-05, "loss": 0.8674, "step": 2366 }, { "epoch": 0.362203519510329, "grad_norm": 2.8062029268635267, "learning_rate": 1.8827122092865147e-05, "loss": 0.9018, "step": 2367 }, { "epoch": 0.3623565416985463, "grad_norm": 2.393990682979781, "learning_rate": 1.8825957288944017e-05, "loss": 0.8371, "step": 2368 }, { "epoch": 0.3625095638867636, "grad_norm": 2.302645835173265, "learning_rate": 1.882479194298632e-05, "loss": 0.7554, "step": 2369 }, { "epoch": 0.36266258607498086, "grad_norm": 2.42744947114573, "learning_rate": 1.882362605506363e-05, "loss": 0.8365, "step": 2370 }, { "epoch": 0.36281560826319814, "grad_norm": 2.541019733868751, "learning_rate": 1.8822459625247544e-05, "loss": 0.9123, "step": 2371 }, { "epoch": 0.3629686304514155, "grad_norm": 1.9423446059365113, "learning_rate": 1.8821292653609698e-05, "loss": 0.6642, "step": 2372 }, { "epoch": 0.36312165263963275, "grad_norm": 2.662428175077465, "learning_rate": 1.8820125140221762e-05, "loss": 0.934, "step": 2373 }, { "epoch": 0.36327467482785003, "grad_norm": 2.3608324759387105, "learning_rate": 1.8818957085155437e-05, "loss": 0.8484, "step": 2374 }, { "epoch": 0.3634276970160673, "grad_norm": 2.43897927593089, "learning_rate": 1.881778848848246e-05, "loss": 0.9507, "step": 2375 }, { "epoch": 0.36358071920428464, "grad_norm": 2.2402163894990617, "learning_rate": 1.881661935027459e-05, "loss": 0.7647, "step": 2376 }, { "epoch": 0.3637337413925019, "grad_norm": 2.4107917616959322, "learning_rate": 1.881544967060364e-05, "loss": 0.8487, "step": 2377 }, { "epoch": 0.3638867635807192, "grad_norm": 2.291052028819227, "learning_rate": 1.881427944954144e-05, "loss": 0.765, "step": 2378 }, { "epoch": 0.3640397857689365, "grad_norm": 2.33261212765457, "learning_rate": 1.8813108687159852e-05, "loss": 0.7599, "step": 2379 }, { "epoch": 0.3641928079571538, "grad_norm": 2.3741799543583517, "learning_rate": 1.8811937383530786e-05, "loss": 0.9039, "step": 2380 }, { "epoch": 0.3643458301453711, "grad_norm": 2.697621817440523, "learning_rate": 1.8810765538726175e-05, "loss": 0.8248, "step": 2381 }, { "epoch": 0.36449885233358836, "grad_norm": 2.317964315825029, "learning_rate": 1.880959315281798e-05, "loss": 0.6964, "step": 2382 }, { "epoch": 0.36465187452180564, "grad_norm": 2.2431665552546427, "learning_rate": 1.8808420225878207e-05, "loss": 0.8107, "step": 2383 }, { "epoch": 0.364804896710023, "grad_norm": 2.4004243356517687, "learning_rate": 1.8807246757978892e-05, "loss": 0.7904, "step": 2384 }, { "epoch": 0.36495791889824025, "grad_norm": 2.7187226403241267, "learning_rate": 1.8806072749192096e-05, "loss": 0.9686, "step": 2385 }, { "epoch": 0.36511094108645753, "grad_norm": 2.8316217465961344, "learning_rate": 1.8804898199589925e-05, "loss": 0.8722, "step": 2386 }, { "epoch": 0.3652639632746748, "grad_norm": 2.3925054684622915, "learning_rate": 1.8803723109244513e-05, "loss": 0.9331, "step": 2387 }, { "epoch": 0.36541698546289214, "grad_norm": 2.4682270072369636, "learning_rate": 1.880254747822802e-05, "loss": 0.7587, "step": 2388 }, { "epoch": 0.3655700076511094, "grad_norm": 2.3194225175157093, "learning_rate": 1.8801371306612652e-05, "loss": 0.8076, "step": 2389 }, { "epoch": 0.3657230298393267, "grad_norm": 2.4399649171572486, "learning_rate": 1.880019459447064e-05, "loss": 0.8232, "step": 2390 }, { "epoch": 0.365876052027544, "grad_norm": 2.3831569144484126, "learning_rate": 1.8799017341874255e-05, "loss": 0.7088, "step": 2391 }, { "epoch": 0.3660290742157613, "grad_norm": 2.3319903377417854, "learning_rate": 1.879783954889579e-05, "loss": 0.7227, "step": 2392 }, { "epoch": 0.3661820964039786, "grad_norm": 2.495408648387555, "learning_rate": 1.879666121560758e-05, "loss": 0.8645, "step": 2393 }, { "epoch": 0.36633511859219586, "grad_norm": 2.008295423288152, "learning_rate": 1.8795482342081995e-05, "loss": 0.7434, "step": 2394 }, { "epoch": 0.36648814078041314, "grad_norm": 2.3226281552138484, "learning_rate": 1.879430292839143e-05, "loss": 0.7134, "step": 2395 }, { "epoch": 0.3666411629686305, "grad_norm": 2.7280664670536074, "learning_rate": 1.8793122974608318e-05, "loss": 0.8971, "step": 2396 }, { "epoch": 0.36679418515684775, "grad_norm": 2.417968916079334, "learning_rate": 1.8791942480805127e-05, "loss": 0.9469, "step": 2397 }, { "epoch": 0.36694720734506503, "grad_norm": 2.3321600957081703, "learning_rate": 1.8790761447054353e-05, "loss": 0.8373, "step": 2398 }, { "epoch": 0.3671002295332823, "grad_norm": 2.9661860123961263, "learning_rate": 1.8789579873428526e-05, "loss": 0.8054, "step": 2399 }, { "epoch": 0.36725325172149964, "grad_norm": 2.8557791816349822, "learning_rate": 1.878839776000022e-05, "loss": 0.8268, "step": 2400 }, { "epoch": 0.3674062739097169, "grad_norm": 2.5578308951105853, "learning_rate": 1.8787215106842022e-05, "loss": 0.7479, "step": 2401 }, { "epoch": 0.3675592960979342, "grad_norm": 2.9152984669056323, "learning_rate": 1.878603191402657e-05, "loss": 0.8661, "step": 2402 }, { "epoch": 0.3677123182861515, "grad_norm": 2.3207274090171843, "learning_rate": 1.878484818162653e-05, "loss": 0.805, "step": 2403 }, { "epoch": 0.3678653404743688, "grad_norm": 2.3006454257014557, "learning_rate": 1.878366390971459e-05, "loss": 0.7455, "step": 2404 }, { "epoch": 0.3680183626625861, "grad_norm": 2.3525946228739545, "learning_rate": 1.8782479098363494e-05, "loss": 0.8276, "step": 2405 }, { "epoch": 0.36817138485080336, "grad_norm": 2.4311958649745504, "learning_rate": 1.8781293747645998e-05, "loss": 0.8442, "step": 2406 }, { "epoch": 0.36832440703902064, "grad_norm": 2.2341059242635364, "learning_rate": 1.87801078576349e-05, "loss": 0.7892, "step": 2407 }, { "epoch": 0.368477429227238, "grad_norm": 2.4789478015956288, "learning_rate": 1.877892142840303e-05, "loss": 0.7717, "step": 2408 }, { "epoch": 0.36863045141545525, "grad_norm": 2.3251520271339743, "learning_rate": 1.877773446002325e-05, "loss": 0.9049, "step": 2409 }, { "epoch": 0.36878347360367253, "grad_norm": 2.4284067911124843, "learning_rate": 1.8776546952568456e-05, "loss": 0.8632, "step": 2410 }, { "epoch": 0.3689364957918898, "grad_norm": 2.407972854722055, "learning_rate": 1.8775358906111586e-05, "loss": 0.6936, "step": 2411 }, { "epoch": 0.36908951798010714, "grad_norm": 2.2389103408646136, "learning_rate": 1.8774170320725592e-05, "loss": 0.6321, "step": 2412 }, { "epoch": 0.3692425401683244, "grad_norm": 2.5869540597200396, "learning_rate": 1.8772981196483474e-05, "loss": 0.9377, "step": 2413 }, { "epoch": 0.3693955623565417, "grad_norm": 2.752160756736885, "learning_rate": 1.8771791533458264e-05, "loss": 0.8435, "step": 2414 }, { "epoch": 0.369548584544759, "grad_norm": 2.522539089724177, "learning_rate": 1.8770601331723017e-05, "loss": 0.8243, "step": 2415 }, { "epoch": 0.3697016067329763, "grad_norm": 2.810550793971596, "learning_rate": 1.8769410591350832e-05, "loss": 0.9468, "step": 2416 }, { "epoch": 0.3698546289211936, "grad_norm": 2.469864839731524, "learning_rate": 1.8768219312414838e-05, "loss": 0.7879, "step": 2417 }, { "epoch": 0.37000765110941086, "grad_norm": 2.271877626883605, "learning_rate": 1.8767027494988194e-05, "loss": 0.8244, "step": 2418 }, { "epoch": 0.37016067329762814, "grad_norm": 2.6112175112151044, "learning_rate": 1.8765835139144093e-05, "loss": 0.775, "step": 2419 }, { "epoch": 0.3703136954858455, "grad_norm": 2.266093145167797, "learning_rate": 1.8764642244955767e-05, "loss": 0.7782, "step": 2420 }, { "epoch": 0.37046671767406275, "grad_norm": 2.3075593915599724, "learning_rate": 1.876344881249647e-05, "loss": 0.8186, "step": 2421 }, { "epoch": 0.37061973986228003, "grad_norm": 2.560441628674782, "learning_rate": 1.8762254841839503e-05, "loss": 0.9051, "step": 2422 }, { "epoch": 0.3707727620504973, "grad_norm": 2.542019562348531, "learning_rate": 1.8761060333058183e-05, "loss": 0.8128, "step": 2423 }, { "epoch": 0.37092578423871464, "grad_norm": 2.6372207763697197, "learning_rate": 1.8759865286225878e-05, "loss": 0.8735, "step": 2424 }, { "epoch": 0.3710788064269319, "grad_norm": 2.5326515666594593, "learning_rate": 1.8758669701415975e-05, "loss": 0.7602, "step": 2425 }, { "epoch": 0.3712318286151492, "grad_norm": 2.368763311753748, "learning_rate": 1.8757473578701902e-05, "loss": 0.7612, "step": 2426 }, { "epoch": 0.3713848508033665, "grad_norm": 2.128504255109629, "learning_rate": 1.8756276918157118e-05, "loss": 0.7579, "step": 2427 }, { "epoch": 0.3715378729915838, "grad_norm": 2.471878320157333, "learning_rate": 1.8755079719855113e-05, "loss": 0.8326, "step": 2428 }, { "epoch": 0.3716908951798011, "grad_norm": 2.7949857804868428, "learning_rate": 1.8753881983869414e-05, "loss": 0.854, "step": 2429 }, { "epoch": 0.37184391736801836, "grad_norm": 2.7751068465784123, "learning_rate": 1.875268371027357e-05, "loss": 0.8798, "step": 2430 }, { "epoch": 0.37199693955623564, "grad_norm": 2.7780322221768863, "learning_rate": 1.8751484899141185e-05, "loss": 0.8468, "step": 2431 }, { "epoch": 0.372149961744453, "grad_norm": 2.4139073729391702, "learning_rate": 1.8750285550545877e-05, "loss": 0.807, "step": 2432 }, { "epoch": 0.37230298393267025, "grad_norm": 2.5798622524914503, "learning_rate": 1.87490856645613e-05, "loss": 0.7849, "step": 2433 }, { "epoch": 0.37245600612088753, "grad_norm": 2.3845029258074173, "learning_rate": 1.874788524126115e-05, "loss": 0.7316, "step": 2434 }, { "epoch": 0.3726090283091048, "grad_norm": 2.2918954995904763, "learning_rate": 1.8746684280719137e-05, "loss": 0.7754, "step": 2435 }, { "epoch": 0.37276205049732214, "grad_norm": 2.8944457285794725, "learning_rate": 1.874548278300903e-05, "loss": 0.8968, "step": 2436 }, { "epoch": 0.3729150726855394, "grad_norm": 2.512484041846354, "learning_rate": 1.8744280748204614e-05, "loss": 0.8099, "step": 2437 }, { "epoch": 0.3730680948737567, "grad_norm": 2.652834197290464, "learning_rate": 1.8743078176379707e-05, "loss": 0.841, "step": 2438 }, { "epoch": 0.373221117061974, "grad_norm": 2.784629393063227, "learning_rate": 1.8741875067608167e-05, "loss": 0.8983, "step": 2439 }, { "epoch": 0.37337413925019125, "grad_norm": 2.676099241658654, "learning_rate": 1.874067142196388e-05, "loss": 0.8388, "step": 2440 }, { "epoch": 0.3735271614384086, "grad_norm": 2.5217409667731534, "learning_rate": 1.8739467239520767e-05, "loss": 0.818, "step": 2441 }, { "epoch": 0.37368018362662586, "grad_norm": 2.1378603895356583, "learning_rate": 1.8738262520352783e-05, "loss": 0.7787, "step": 2442 }, { "epoch": 0.37383320581484314, "grad_norm": 2.5527568920487917, "learning_rate": 1.8737057264533915e-05, "loss": 0.8348, "step": 2443 }, { "epoch": 0.3739862280030604, "grad_norm": 2.624247786380471, "learning_rate": 1.873585147213818e-05, "loss": 0.7936, "step": 2444 }, { "epoch": 0.37413925019127775, "grad_norm": 2.3965554595283836, "learning_rate": 1.873464514323963e-05, "loss": 0.7522, "step": 2445 }, { "epoch": 0.37429227237949503, "grad_norm": 2.5187732475788396, "learning_rate": 1.873343827791235e-05, "loss": 0.753, "step": 2446 }, { "epoch": 0.3744452945677123, "grad_norm": 2.415007932936088, "learning_rate": 1.8732230876230463e-05, "loss": 0.865, "step": 2447 }, { "epoch": 0.3745983167559296, "grad_norm": 2.411623125230876, "learning_rate": 1.8731022938268114e-05, "loss": 0.7445, "step": 2448 }, { "epoch": 0.3747513389441469, "grad_norm": 2.504678058017117, "learning_rate": 1.8729814464099492e-05, "loss": 0.8019, "step": 2449 }, { "epoch": 0.3749043611323642, "grad_norm": 2.4121077808768683, "learning_rate": 1.8728605453798816e-05, "loss": 0.8243, "step": 2450 }, { "epoch": 0.37505738332058147, "grad_norm": 2.3792207835192927, "learning_rate": 1.8727395907440328e-05, "loss": 0.7216, "step": 2451 }, { "epoch": 0.37521040550879875, "grad_norm": 2.7011902930310323, "learning_rate": 1.8726185825098317e-05, "loss": 0.834, "step": 2452 }, { "epoch": 0.3753634276970161, "grad_norm": 2.37612362801207, "learning_rate": 1.87249752068471e-05, "loss": 0.7309, "step": 2453 }, { "epoch": 0.37551644988523336, "grad_norm": 2.5429947552774097, "learning_rate": 1.8723764052761018e-05, "loss": 0.7774, "step": 2454 }, { "epoch": 0.37566947207345064, "grad_norm": 2.399185893707355, "learning_rate": 1.8722552362914463e-05, "loss": 0.8234, "step": 2455 }, { "epoch": 0.3758224942616679, "grad_norm": 2.495485499967576, "learning_rate": 1.872134013738184e-05, "loss": 0.7609, "step": 2456 }, { "epoch": 0.37597551644988525, "grad_norm": 2.352447545297215, "learning_rate": 1.8720127376237606e-05, "loss": 0.7429, "step": 2457 }, { "epoch": 0.3761285386381025, "grad_norm": 2.040482423517943, "learning_rate": 1.871891407955623e-05, "loss": 0.7081, "step": 2458 }, { "epoch": 0.3762815608263198, "grad_norm": 2.4635143143098253, "learning_rate": 1.8717700247412237e-05, "loss": 0.8347, "step": 2459 }, { "epoch": 0.3764345830145371, "grad_norm": 2.4888516381763712, "learning_rate": 1.8716485879880165e-05, "loss": 0.7666, "step": 2460 }, { "epoch": 0.3765876052027544, "grad_norm": 2.3853061470198953, "learning_rate": 1.87152709770346e-05, "loss": 0.7858, "step": 2461 }, { "epoch": 0.3767406273909717, "grad_norm": 2.8397222753085583, "learning_rate": 1.8714055538950145e-05, "loss": 0.7969, "step": 2462 }, { "epoch": 0.37689364957918897, "grad_norm": 2.2192558442628387, "learning_rate": 1.8712839565701455e-05, "loss": 0.893, "step": 2463 }, { "epoch": 0.37704667176740625, "grad_norm": 2.1752035742885645, "learning_rate": 1.87116230573632e-05, "loss": 0.7395, "step": 2464 }, { "epoch": 0.3771996939556236, "grad_norm": 2.4877185407030167, "learning_rate": 1.8710406014010094e-05, "loss": 0.8267, "step": 2465 }, { "epoch": 0.37735271614384086, "grad_norm": 2.5339436958201067, "learning_rate": 1.870918843571688e-05, "loss": 0.7972, "step": 2466 }, { "epoch": 0.37750573833205814, "grad_norm": 2.557231748291859, "learning_rate": 1.8707970322558328e-05, "loss": 0.8761, "step": 2467 }, { "epoch": 0.3776587605202754, "grad_norm": 2.2694866860882574, "learning_rate": 1.8706751674609258e-05, "loss": 0.8071, "step": 2468 }, { "epoch": 0.37781178270849275, "grad_norm": 2.3669762986337104, "learning_rate": 1.8705532491944505e-05, "loss": 0.8245, "step": 2469 }, { "epoch": 0.37796480489671, "grad_norm": 2.3039821470771846, "learning_rate": 1.8704312774638945e-05, "loss": 0.6956, "step": 2470 }, { "epoch": 0.3781178270849273, "grad_norm": 2.438094533399992, "learning_rate": 1.8703092522767487e-05, "loss": 0.8558, "step": 2471 }, { "epoch": 0.3782708492731446, "grad_norm": 2.690744376860686, "learning_rate": 1.870187173640507e-05, "loss": 0.988, "step": 2472 }, { "epoch": 0.3784238714613619, "grad_norm": 2.410059836592625, "learning_rate": 1.870065041562667e-05, "loss": 0.7595, "step": 2473 }, { "epoch": 0.3785768936495792, "grad_norm": 2.3714407252038523, "learning_rate": 1.8699428560507288e-05, "loss": 0.8068, "step": 2474 }, { "epoch": 0.37872991583779647, "grad_norm": 2.87103136320842, "learning_rate": 1.8698206171121963e-05, "loss": 0.7212, "step": 2475 }, { "epoch": 0.37888293802601375, "grad_norm": 2.419031011584517, "learning_rate": 1.8696983247545776e-05, "loss": 0.8506, "step": 2476 }, { "epoch": 0.3790359602142311, "grad_norm": 2.3590944560091667, "learning_rate": 1.869575978985382e-05, "loss": 0.9662, "step": 2477 }, { "epoch": 0.37918898240244836, "grad_norm": 2.4656630507450314, "learning_rate": 1.8694535798121244e-05, "loss": 0.8289, "step": 2478 }, { "epoch": 0.37934200459066564, "grad_norm": 2.295792371436799, "learning_rate": 1.8693311272423204e-05, "loss": 0.8399, "step": 2479 }, { "epoch": 0.3794950267788829, "grad_norm": 2.961244599837281, "learning_rate": 1.8692086212834912e-05, "loss": 0.8367, "step": 2480 }, { "epoch": 0.37964804896710025, "grad_norm": 2.6693969074114365, "learning_rate": 1.8690860619431604e-05, "loss": 0.9139, "step": 2481 }, { "epoch": 0.3798010711553175, "grad_norm": 2.635077494495987, "learning_rate": 1.8689634492288547e-05, "loss": 0.7848, "step": 2482 }, { "epoch": 0.3799540933435348, "grad_norm": 2.561238780281626, "learning_rate": 1.8688407831481037e-05, "loss": 0.9398, "step": 2483 }, { "epoch": 0.3801071155317521, "grad_norm": 2.3136951444719007, "learning_rate": 1.8687180637084418e-05, "loss": 0.7758, "step": 2484 }, { "epoch": 0.3802601377199694, "grad_norm": 2.504907279331964, "learning_rate": 1.868595290917405e-05, "loss": 0.8735, "step": 2485 }, { "epoch": 0.3804131599081867, "grad_norm": 2.2085626782374854, "learning_rate": 1.8684724647825333e-05, "loss": 0.74, "step": 2486 }, { "epoch": 0.38056618209640397, "grad_norm": 2.6191916388341125, "learning_rate": 1.8683495853113703e-05, "loss": 0.787, "step": 2487 }, { "epoch": 0.38071920428462125, "grad_norm": 2.602426306454107, "learning_rate": 1.868226652511462e-05, "loss": 0.7568, "step": 2488 }, { "epoch": 0.3808722264728386, "grad_norm": 2.3846879215375076, "learning_rate": 1.8681036663903585e-05, "loss": 0.7223, "step": 2489 }, { "epoch": 0.38102524866105586, "grad_norm": 2.2431261468268686, "learning_rate": 1.867980626955613e-05, "loss": 0.7147, "step": 2490 }, { "epoch": 0.38117827084927314, "grad_norm": 2.5618430976248265, "learning_rate": 1.8678575342147815e-05, "loss": 0.8167, "step": 2491 }, { "epoch": 0.3813312930374904, "grad_norm": 2.7244295536027194, "learning_rate": 1.867734388175424e-05, "loss": 0.7683, "step": 2492 }, { "epoch": 0.38148431522570775, "grad_norm": 2.6472413539846857, "learning_rate": 1.8676111888451028e-05, "loss": 0.8227, "step": 2493 }, { "epoch": 0.381637337413925, "grad_norm": 2.308898420544816, "learning_rate": 1.8674879362313843e-05, "loss": 0.8609, "step": 2494 }, { "epoch": 0.3817903596021423, "grad_norm": 2.563563794604606, "learning_rate": 1.8673646303418382e-05, "loss": 0.8591, "step": 2495 }, { "epoch": 0.3819433817903596, "grad_norm": 2.327587598177729, "learning_rate": 1.867241271184037e-05, "loss": 0.7396, "step": 2496 }, { "epoch": 0.3820964039785769, "grad_norm": 2.2452666295710717, "learning_rate": 1.8671178587655567e-05, "loss": 0.8333, "step": 2497 }, { "epoch": 0.3822494261667942, "grad_norm": 2.358122242033259, "learning_rate": 1.8669943930939763e-05, "loss": 0.715, "step": 2498 }, { "epoch": 0.38240244835501147, "grad_norm": 2.242928896018524, "learning_rate": 1.866870874176879e-05, "loss": 0.8568, "step": 2499 }, { "epoch": 0.38255547054322875, "grad_norm": 2.393411774568154, "learning_rate": 1.8667473020218497e-05, "loss": 0.8591, "step": 2500 }, { "epoch": 0.3827084927314461, "grad_norm": 2.508761300085195, "learning_rate": 1.8666236766364778e-05, "loss": 0.6883, "step": 2501 }, { "epoch": 0.38286151491966336, "grad_norm": 2.273285381962697, "learning_rate": 1.8664999980283558e-05, "loss": 0.7873, "step": 2502 }, { "epoch": 0.38301453710788064, "grad_norm": 2.5456143107904405, "learning_rate": 1.8663762662050793e-05, "loss": 0.7534, "step": 2503 }, { "epoch": 0.3831675592960979, "grad_norm": 2.5340046766612954, "learning_rate": 1.8662524811742467e-05, "loss": 0.9405, "step": 2504 }, { "epoch": 0.38332058148431525, "grad_norm": 2.2030346255099857, "learning_rate": 1.8661286429434607e-05, "loss": 0.8104, "step": 2505 }, { "epoch": 0.3834736036725325, "grad_norm": 2.0899223105089977, "learning_rate": 1.8660047515203263e-05, "loss": 0.7298, "step": 2506 }, { "epoch": 0.3836266258607498, "grad_norm": 2.091576745913007, "learning_rate": 1.8658808069124523e-05, "loss": 0.7127, "step": 2507 }, { "epoch": 0.3837796480489671, "grad_norm": 2.442967562778881, "learning_rate": 1.8657568091274503e-05, "loss": 0.928, "step": 2508 }, { "epoch": 0.3839326702371844, "grad_norm": 2.6676064813396496, "learning_rate": 1.8656327581729364e-05, "loss": 0.8753, "step": 2509 }, { "epoch": 0.3840856924254017, "grad_norm": 2.9341603959582567, "learning_rate": 1.865508654056528e-05, "loss": 0.9654, "step": 2510 }, { "epoch": 0.38423871461361897, "grad_norm": 2.4344257952073156, "learning_rate": 1.8653844967858475e-05, "loss": 0.7526, "step": 2511 }, { "epoch": 0.38439173680183625, "grad_norm": 2.361905617278103, "learning_rate": 1.8652602863685195e-05, "loss": 0.8353, "step": 2512 }, { "epoch": 0.3845447589900536, "grad_norm": 2.218282200611466, "learning_rate": 1.8651360228121724e-05, "loss": 0.6705, "step": 2513 }, { "epoch": 0.38469778117827086, "grad_norm": 2.5345150182492953, "learning_rate": 1.8650117061244378e-05, "loss": 0.8431, "step": 2514 }, { "epoch": 0.38485080336648814, "grad_norm": 2.2846439326524712, "learning_rate": 1.8648873363129502e-05, "loss": 0.7474, "step": 2515 }, { "epoch": 0.3850038255547054, "grad_norm": 2.3803085886744992, "learning_rate": 1.864762913385348e-05, "loss": 0.7584, "step": 2516 }, { "epoch": 0.38515684774292275, "grad_norm": 2.257800923753178, "learning_rate": 1.864638437349272e-05, "loss": 0.751, "step": 2517 }, { "epoch": 0.38530986993114, "grad_norm": 2.4758559918625016, "learning_rate": 1.8645139082123675e-05, "loss": 0.8829, "step": 2518 }, { "epoch": 0.3854628921193573, "grad_norm": 2.508363591784869, "learning_rate": 1.8643893259822817e-05, "loss": 0.8091, "step": 2519 }, { "epoch": 0.3856159143075746, "grad_norm": 2.344123531418125, "learning_rate": 1.864264690666666e-05, "loss": 0.8086, "step": 2520 }, { "epoch": 0.3857689364957919, "grad_norm": 2.6911838867682327, "learning_rate": 1.8641400022731746e-05, "loss": 0.8829, "step": 2521 }, { "epoch": 0.3859219586840092, "grad_norm": 2.454881184279251, "learning_rate": 1.864015260809465e-05, "loss": 0.8059, "step": 2522 }, { "epoch": 0.38607498087222647, "grad_norm": 2.3972818653488353, "learning_rate": 1.8638904662831985e-05, "loss": 0.7309, "step": 2523 }, { "epoch": 0.38622800306044375, "grad_norm": 2.2703191459881573, "learning_rate": 1.8637656187020385e-05, "loss": 0.7365, "step": 2524 }, { "epoch": 0.3863810252486611, "grad_norm": 2.480521094293675, "learning_rate": 1.863640718073653e-05, "loss": 0.7481, "step": 2525 }, { "epoch": 0.38653404743687836, "grad_norm": 2.32410916843462, "learning_rate": 1.8635157644057124e-05, "loss": 0.7864, "step": 2526 }, { "epoch": 0.38668706962509564, "grad_norm": 2.1555637796345617, "learning_rate": 1.8633907577058905e-05, "loss": 0.7126, "step": 2527 }, { "epoch": 0.3868400918133129, "grad_norm": 2.0035595797324537, "learning_rate": 1.8632656979818645e-05, "loss": 0.7262, "step": 2528 }, { "epoch": 0.38699311400153025, "grad_norm": 2.500240065738295, "learning_rate": 1.863140585241315e-05, "loss": 0.8232, "step": 2529 }, { "epoch": 0.3871461361897475, "grad_norm": 2.182399464955277, "learning_rate": 1.8630154194919256e-05, "loss": 0.7309, "step": 2530 }, { "epoch": 0.3872991583779648, "grad_norm": 2.547552862072076, "learning_rate": 1.8628902007413835e-05, "loss": 0.9328, "step": 2531 }, { "epoch": 0.3874521805661821, "grad_norm": 2.4185769090684404, "learning_rate": 1.8627649289973776e-05, "loss": 0.8189, "step": 2532 }, { "epoch": 0.3876052027543994, "grad_norm": 2.220438695870478, "learning_rate": 1.862639604267603e-05, "loss": 0.7749, "step": 2533 }, { "epoch": 0.3877582249426167, "grad_norm": 2.578841434734547, "learning_rate": 1.8625142265597556e-05, "loss": 0.8771, "step": 2534 }, { "epoch": 0.38791124713083397, "grad_norm": 2.2401417785438382, "learning_rate": 1.862388795881535e-05, "loss": 0.7575, "step": 2535 }, { "epoch": 0.38806426931905125, "grad_norm": 2.478671799326241, "learning_rate": 1.862263312240645e-05, "loss": 0.7846, "step": 2536 }, { "epoch": 0.3882172915072686, "grad_norm": 2.2303927085740565, "learning_rate": 1.8621377756447918e-05, "loss": 0.8156, "step": 2537 }, { "epoch": 0.38837031369548586, "grad_norm": 2.247486841768642, "learning_rate": 1.8620121861016854e-05, "loss": 0.8024, "step": 2538 }, { "epoch": 0.38852333588370314, "grad_norm": 2.41891025833812, "learning_rate": 1.861886543619038e-05, "loss": 0.9377, "step": 2539 }, { "epoch": 0.3886763580719204, "grad_norm": 2.490839576225125, "learning_rate": 1.8617608482045662e-05, "loss": 0.7237, "step": 2540 }, { "epoch": 0.38882938026013775, "grad_norm": 2.204949493052091, "learning_rate": 1.8616350998659895e-05, "loss": 0.7152, "step": 2541 }, { "epoch": 0.388982402448355, "grad_norm": 2.520927297137216, "learning_rate": 1.8615092986110308e-05, "loss": 0.5829, "step": 2542 }, { "epoch": 0.3891354246365723, "grad_norm": 2.0925039293849834, "learning_rate": 1.861383444447416e-05, "loss": 0.6984, "step": 2543 }, { "epoch": 0.3892884468247896, "grad_norm": 2.425641347817572, "learning_rate": 1.8612575373828735e-05, "loss": 0.8305, "step": 2544 }, { "epoch": 0.3894414690130069, "grad_norm": 2.573051475202199, "learning_rate": 1.8611315774251367e-05, "loss": 0.7923, "step": 2545 }, { "epoch": 0.3895944912012242, "grad_norm": 2.5068821947821625, "learning_rate": 1.861005564581941e-05, "loss": 0.9357, "step": 2546 }, { "epoch": 0.38974751338944147, "grad_norm": 2.324998200221432, "learning_rate": 1.8608794988610256e-05, "loss": 0.8629, "step": 2547 }, { "epoch": 0.38990053557765875, "grad_norm": 2.52960849031247, "learning_rate": 1.8607533802701318e-05, "loss": 0.7974, "step": 2548 }, { "epoch": 0.390053557765876, "grad_norm": 2.4815636587464494, "learning_rate": 1.860627208817006e-05, "loss": 0.8583, "step": 2549 }, { "epoch": 0.39020657995409336, "grad_norm": 2.2653029816484582, "learning_rate": 1.8605009845093964e-05, "loss": 0.7958, "step": 2550 }, { "epoch": 0.39035960214231064, "grad_norm": 2.3990609889203807, "learning_rate": 1.8603747073550552e-05, "loss": 0.81, "step": 2551 }, { "epoch": 0.3905126243305279, "grad_norm": 2.32455958546504, "learning_rate": 1.8602483773617373e-05, "loss": 0.7526, "step": 2552 }, { "epoch": 0.3906656465187452, "grad_norm": 2.3427643871420774, "learning_rate": 1.8601219945372015e-05, "loss": 0.8194, "step": 2553 }, { "epoch": 0.3908186687069625, "grad_norm": 2.2513504889406226, "learning_rate": 1.8599955588892086e-05, "loss": 0.7728, "step": 2554 }, { "epoch": 0.3909716908951798, "grad_norm": 2.338401329885293, "learning_rate": 1.8598690704255245e-05, "loss": 0.8088, "step": 2555 }, { "epoch": 0.3911247130833971, "grad_norm": 2.3014418358202473, "learning_rate": 1.859742529153917e-05, "loss": 0.7908, "step": 2556 }, { "epoch": 0.39127773527161436, "grad_norm": 2.6220498441628957, "learning_rate": 1.8596159350821573e-05, "loss": 0.851, "step": 2557 }, { "epoch": 0.3914307574598317, "grad_norm": 2.5560864630883477, "learning_rate": 1.8594892882180202e-05, "loss": 0.7645, "step": 2558 }, { "epoch": 0.39158377964804897, "grad_norm": 2.4441604741310794, "learning_rate": 1.8593625885692835e-05, "loss": 0.8209, "step": 2559 }, { "epoch": 0.39173680183626625, "grad_norm": 2.394828726374545, "learning_rate": 1.8592358361437287e-05, "loss": 0.7607, "step": 2560 }, { "epoch": 0.3918898240244835, "grad_norm": 2.7066788691327055, "learning_rate": 1.8591090309491397e-05, "loss": 0.8225, "step": 2561 }, { "epoch": 0.39204284621270086, "grad_norm": 2.466903893172232, "learning_rate": 1.858982172993304e-05, "loss": 0.7865, "step": 2562 }, { "epoch": 0.39219586840091814, "grad_norm": 2.673740227798785, "learning_rate": 1.858855262284013e-05, "loss": 0.9003, "step": 2563 }, { "epoch": 0.3923488905891354, "grad_norm": 2.655379391443632, "learning_rate": 1.8587282988290604e-05, "loss": 0.8515, "step": 2564 }, { "epoch": 0.3925019127773527, "grad_norm": 2.387993715416807, "learning_rate": 1.8586012826362437e-05, "loss": 0.8344, "step": 2565 }, { "epoch": 0.39265493496557, "grad_norm": 2.122389308766076, "learning_rate": 1.8584742137133635e-05, "loss": 0.7624, "step": 2566 }, { "epoch": 0.3928079571537873, "grad_norm": 2.4675672480936175, "learning_rate": 1.8583470920682232e-05, "loss": 0.7786, "step": 2567 }, { "epoch": 0.3929609793420046, "grad_norm": 2.1616293938457827, "learning_rate": 1.8582199177086302e-05, "loss": 0.682, "step": 2568 }, { "epoch": 0.39311400153022186, "grad_norm": 2.424779733933877, "learning_rate": 1.8580926906423944e-05, "loss": 0.7539, "step": 2569 }, { "epoch": 0.3932670237184392, "grad_norm": 2.468301808496714, "learning_rate": 1.8579654108773296e-05, "loss": 0.7523, "step": 2570 }, { "epoch": 0.39342004590665647, "grad_norm": 2.5668439800269223, "learning_rate": 1.857838078421253e-05, "loss": 0.9001, "step": 2571 }, { "epoch": 0.39357306809487375, "grad_norm": 2.6861943370437285, "learning_rate": 1.857710693281984e-05, "loss": 1.0394, "step": 2572 }, { "epoch": 0.393726090283091, "grad_norm": 2.45276904000766, "learning_rate": 1.8575832554673457e-05, "loss": 0.7552, "step": 2573 }, { "epoch": 0.39387911247130836, "grad_norm": 2.355317126126015, "learning_rate": 1.857455764985165e-05, "loss": 0.723, "step": 2574 }, { "epoch": 0.39403213465952563, "grad_norm": 2.303305717824098, "learning_rate": 1.8573282218432712e-05, "loss": 0.8183, "step": 2575 }, { "epoch": 0.3941851568477429, "grad_norm": 2.274204248618728, "learning_rate": 1.8572006260494975e-05, "loss": 0.7922, "step": 2576 }, { "epoch": 0.3943381790359602, "grad_norm": 2.6013486170787905, "learning_rate": 1.85707297761168e-05, "loss": 0.8621, "step": 2577 }, { "epoch": 0.3944912012241775, "grad_norm": 2.6464460011195183, "learning_rate": 1.856945276537658e-05, "loss": 1.0042, "step": 2578 }, { "epoch": 0.3946442234123948, "grad_norm": 2.071644475187324, "learning_rate": 1.856817522835274e-05, "loss": 0.7901, "step": 2579 }, { "epoch": 0.3947972456006121, "grad_norm": 2.1078648872166936, "learning_rate": 1.8566897165123742e-05, "loss": 0.6951, "step": 2580 }, { "epoch": 0.39495026778882936, "grad_norm": 2.390748321191043, "learning_rate": 1.8565618575768078e-05, "loss": 0.7759, "step": 2581 }, { "epoch": 0.3951032899770467, "grad_norm": 2.3383337718182724, "learning_rate": 1.8564339460364268e-05, "loss": 0.7706, "step": 2582 }, { "epoch": 0.39525631216526397, "grad_norm": 2.2256538171713944, "learning_rate": 1.8563059818990864e-05, "loss": 0.756, "step": 2583 }, { "epoch": 0.39540933435348125, "grad_norm": 2.35477670073321, "learning_rate": 1.856177965172646e-05, "loss": 0.8469, "step": 2584 }, { "epoch": 0.3955623565416985, "grad_norm": 2.394424568314491, "learning_rate": 1.8560498958649675e-05, "loss": 0.8801, "step": 2585 }, { "epoch": 0.39571537872991586, "grad_norm": 2.200020298735911, "learning_rate": 1.8559217739839156e-05, "loss": 0.7756, "step": 2586 }, { "epoch": 0.39586840091813313, "grad_norm": 2.5743268374527686, "learning_rate": 1.8557935995373593e-05, "loss": 0.8344, "step": 2587 }, { "epoch": 0.3960214231063504, "grad_norm": 2.5434499031469753, "learning_rate": 1.8556653725331703e-05, "loss": 0.8237, "step": 2588 }, { "epoch": 0.3961744452945677, "grad_norm": 2.2710097319079243, "learning_rate": 1.8555370929792237e-05, "loss": 0.7575, "step": 2589 }, { "epoch": 0.396327467482785, "grad_norm": 2.1784171165885837, "learning_rate": 1.8554087608833967e-05, "loss": 0.7892, "step": 2590 }, { "epoch": 0.3964804896710023, "grad_norm": 2.4856988681527072, "learning_rate": 1.8552803762535717e-05, "loss": 0.9424, "step": 2591 }, { "epoch": 0.3966335118592196, "grad_norm": 2.7840026809571716, "learning_rate": 1.855151939097633e-05, "loss": 0.8897, "step": 2592 }, { "epoch": 0.39678653404743686, "grad_norm": 2.5930974653909384, "learning_rate": 1.855023449423468e-05, "loss": 0.8272, "step": 2593 }, { "epoch": 0.3969395562356542, "grad_norm": 2.3916332977591277, "learning_rate": 1.8548949072389684e-05, "loss": 0.8303, "step": 2594 }, { "epoch": 0.39709257842387147, "grad_norm": 2.465859918701186, "learning_rate": 1.8547663125520282e-05, "loss": 0.8453, "step": 2595 }, { "epoch": 0.39724560061208875, "grad_norm": 2.5220271459852586, "learning_rate": 1.8546376653705446e-05, "loss": 0.9473, "step": 2596 }, { "epoch": 0.397398622800306, "grad_norm": 2.2621031004903163, "learning_rate": 1.8545089657024185e-05, "loss": 0.7321, "step": 2597 }, { "epoch": 0.39755164498852336, "grad_norm": 2.21402172022864, "learning_rate": 1.8543802135555544e-05, "loss": 0.7898, "step": 2598 }, { "epoch": 0.39770466717674063, "grad_norm": 2.5160554926745142, "learning_rate": 1.854251408937859e-05, "loss": 0.8523, "step": 2599 }, { "epoch": 0.3978576893649579, "grad_norm": 2.472537617150156, "learning_rate": 1.8541225518572425e-05, "loss": 0.9072, "step": 2600 }, { "epoch": 0.3980107115531752, "grad_norm": 2.43355365679199, "learning_rate": 1.8539936423216187e-05, "loss": 0.8955, "step": 2601 }, { "epoch": 0.3981637337413925, "grad_norm": 2.352095084295794, "learning_rate": 1.8538646803389048e-05, "loss": 0.8204, "step": 2602 }, { "epoch": 0.3983167559296098, "grad_norm": 2.4611685288216885, "learning_rate": 1.8537356659170204e-05, "loss": 0.8142, "step": 2603 }, { "epoch": 0.3984697781178271, "grad_norm": 2.186102788808551, "learning_rate": 1.8536065990638884e-05, "loss": 0.7595, "step": 2604 }, { "epoch": 0.39862280030604436, "grad_norm": 2.202120235361913, "learning_rate": 1.8534774797874363e-05, "loss": 0.7505, "step": 2605 }, { "epoch": 0.3987758224942617, "grad_norm": 2.362986852327419, "learning_rate": 1.8533483080955938e-05, "loss": 0.7464, "step": 2606 }, { "epoch": 0.39892884468247897, "grad_norm": 2.200352843576885, "learning_rate": 1.8532190839962927e-05, "loss": 0.696, "step": 2607 }, { "epoch": 0.39908186687069624, "grad_norm": 2.255800540526346, "learning_rate": 1.85308980749747e-05, "loss": 0.8086, "step": 2608 }, { "epoch": 0.3992348890589135, "grad_norm": 2.469916004202286, "learning_rate": 1.852960478607065e-05, "loss": 0.8218, "step": 2609 }, { "epoch": 0.39938791124713086, "grad_norm": 2.2232561093923064, "learning_rate": 1.8528310973330202e-05, "loss": 0.7753, "step": 2610 }, { "epoch": 0.39954093343534813, "grad_norm": 2.465870849875202, "learning_rate": 1.8527016636832812e-05, "loss": 0.8572, "step": 2611 }, { "epoch": 0.3996939556235654, "grad_norm": 2.5829792855369487, "learning_rate": 1.8525721776657976e-05, "loss": 0.8348, "step": 2612 }, { "epoch": 0.3998469778117827, "grad_norm": 2.3325205099538198, "learning_rate": 1.8524426392885214e-05, "loss": 0.8031, "step": 2613 }, { "epoch": 0.4, "grad_norm": 2.276647860896121, "learning_rate": 1.8523130485594073e-05, "loss": 0.8267, "step": 2614 }, { "epoch": 0.4001530221882173, "grad_norm": 2.3928052935724744, "learning_rate": 1.8521834054864153e-05, "loss": 0.8535, "step": 2615 }, { "epoch": 0.4003060443764346, "grad_norm": 2.511577722772227, "learning_rate": 1.852053710077506e-05, "loss": 0.7447, "step": 2616 }, { "epoch": 0.40045906656465186, "grad_norm": 2.110248564429131, "learning_rate": 1.8519239623406458e-05, "loss": 0.7667, "step": 2617 }, { "epoch": 0.4006120887528692, "grad_norm": 2.488627880598713, "learning_rate": 1.8517941622838017e-05, "loss": 0.7103, "step": 2618 }, { "epoch": 0.40076511094108647, "grad_norm": 2.2009676567345653, "learning_rate": 1.851664309914946e-05, "loss": 0.7582, "step": 2619 }, { "epoch": 0.40091813312930374, "grad_norm": 2.1310514990703013, "learning_rate": 1.8515344052420537e-05, "loss": 0.7643, "step": 2620 }, { "epoch": 0.401071155317521, "grad_norm": 2.6259759882870988, "learning_rate": 1.851404448273102e-05, "loss": 0.9047, "step": 2621 }, { "epoch": 0.40122417750573836, "grad_norm": 2.2943155620505915, "learning_rate": 1.8512744390160723e-05, "loss": 0.8079, "step": 2622 }, { "epoch": 0.40137719969395563, "grad_norm": 2.4544127384323327, "learning_rate": 1.851144377478949e-05, "loss": 0.8562, "step": 2623 }, { "epoch": 0.4015302218821729, "grad_norm": 2.3951768753685965, "learning_rate": 1.8510142636697206e-05, "loss": 0.7953, "step": 2624 }, { "epoch": 0.4016832440703902, "grad_norm": 2.3766515796882186, "learning_rate": 1.8508840975963763e-05, "loss": 0.8557, "step": 2625 }, { "epoch": 0.4018362662586075, "grad_norm": 2.5023847534931067, "learning_rate": 1.850753879266911e-05, "loss": 0.812, "step": 2626 }, { "epoch": 0.4019892884468248, "grad_norm": 2.509151267955778, "learning_rate": 1.850623608689322e-05, "loss": 0.7625, "step": 2627 }, { "epoch": 0.4021423106350421, "grad_norm": 2.4184906288540073, "learning_rate": 1.8504932858716097e-05, "loss": 0.8336, "step": 2628 }, { "epoch": 0.40229533282325936, "grad_norm": 2.3168938864394515, "learning_rate": 1.8503629108217772e-05, "loss": 0.7943, "step": 2629 }, { "epoch": 0.4024483550114767, "grad_norm": 2.3915931853920256, "learning_rate": 1.8502324835478316e-05, "loss": 0.8279, "step": 2630 }, { "epoch": 0.40260137719969397, "grad_norm": 2.2349446691616706, "learning_rate": 1.8501020040577838e-05, "loss": 0.7348, "step": 2631 }, { "epoch": 0.40275439938791124, "grad_norm": 2.0151409908112594, "learning_rate": 1.8499714723596455e-05, "loss": 0.7296, "step": 2632 }, { "epoch": 0.4029074215761285, "grad_norm": 2.5360099818874646, "learning_rate": 1.8498408884614343e-05, "loss": 0.8572, "step": 2633 }, { "epoch": 0.40306044376434585, "grad_norm": 2.164183952890459, "learning_rate": 1.8497102523711698e-05, "loss": 0.6298, "step": 2634 }, { "epoch": 0.40321346595256313, "grad_norm": 2.4630834283639, "learning_rate": 1.849579564096874e-05, "loss": 0.7419, "step": 2635 }, { "epoch": 0.4033664881407804, "grad_norm": 2.350449227312488, "learning_rate": 1.849448823646574e-05, "loss": 0.8939, "step": 2636 }, { "epoch": 0.4035195103289977, "grad_norm": 2.3427406354106886, "learning_rate": 1.8493180310282985e-05, "loss": 0.9118, "step": 2637 }, { "epoch": 0.403672532517215, "grad_norm": 2.578044045167987, "learning_rate": 1.8491871862500805e-05, "loss": 0.8345, "step": 2638 }, { "epoch": 0.4038255547054323, "grad_norm": 2.5999091900545612, "learning_rate": 1.849056289319955e-05, "loss": 0.9699, "step": 2639 }, { "epoch": 0.4039785768936496, "grad_norm": 2.4447201013727304, "learning_rate": 1.8489253402459615e-05, "loss": 0.9392, "step": 2640 }, { "epoch": 0.40413159908186685, "grad_norm": 2.232214493716397, "learning_rate": 1.848794339036142e-05, "loss": 0.819, "step": 2641 }, { "epoch": 0.4042846212700842, "grad_norm": 2.3284688831273765, "learning_rate": 1.8486632856985413e-05, "loss": 0.7519, "step": 2642 }, { "epoch": 0.40443764345830147, "grad_norm": 2.363636249635232, "learning_rate": 1.848532180241208e-05, "loss": 0.8482, "step": 2643 }, { "epoch": 0.40459066564651874, "grad_norm": 2.2295062565276074, "learning_rate": 1.8484010226721943e-05, "loss": 0.8025, "step": 2644 }, { "epoch": 0.404743687834736, "grad_norm": 2.358853456902877, "learning_rate": 1.8482698129995552e-05, "loss": 0.7455, "step": 2645 }, { "epoch": 0.40489671002295335, "grad_norm": 2.150877684604761, "learning_rate": 1.848138551231348e-05, "loss": 0.7321, "step": 2646 }, { "epoch": 0.40504973221117063, "grad_norm": 2.4426513307013624, "learning_rate": 1.8480072373756344e-05, "loss": 0.8077, "step": 2647 }, { "epoch": 0.4052027543993879, "grad_norm": 2.2088244537437114, "learning_rate": 1.847875871440479e-05, "loss": 0.7363, "step": 2648 }, { "epoch": 0.4053557765876052, "grad_norm": 2.4812489057806553, "learning_rate": 1.8477444534339494e-05, "loss": 0.8045, "step": 2649 }, { "epoch": 0.4055087987758225, "grad_norm": 2.540939066788791, "learning_rate": 1.8476129833641167e-05, "loss": 0.8186, "step": 2650 }, { "epoch": 0.4056618209640398, "grad_norm": 2.212331345125525, "learning_rate": 1.847481461239055e-05, "loss": 0.809, "step": 2651 }, { "epoch": 0.4058148431522571, "grad_norm": 2.401064578558019, "learning_rate": 1.847349887066841e-05, "loss": 0.7607, "step": 2652 }, { "epoch": 0.40596786534047435, "grad_norm": 2.3926401890762485, "learning_rate": 1.8472182608555554e-05, "loss": 0.8287, "step": 2653 }, { "epoch": 0.4061208875286917, "grad_norm": 2.4175965911225785, "learning_rate": 1.8470865826132823e-05, "loss": 0.9406, "step": 2654 }, { "epoch": 0.40627390971690897, "grad_norm": 2.4696218970242025, "learning_rate": 1.8469548523481084e-05, "loss": 0.8894, "step": 2655 }, { "epoch": 0.40642693190512624, "grad_norm": 2.6511801074082726, "learning_rate": 1.846823070068124e-05, "loss": 0.8922, "step": 2656 }, { "epoch": 0.4065799540933435, "grad_norm": 2.314650500785454, "learning_rate": 1.846691235781422e-05, "loss": 0.7068, "step": 2657 }, { "epoch": 0.40673297628156085, "grad_norm": 2.4866883666060637, "learning_rate": 1.8465593494960984e-05, "loss": 0.8283, "step": 2658 }, { "epoch": 0.40688599846977813, "grad_norm": 2.1590639354930192, "learning_rate": 1.846427411220254e-05, "loss": 0.7625, "step": 2659 }, { "epoch": 0.4070390206579954, "grad_norm": 2.3846505274108063, "learning_rate": 1.8462954209619906e-05, "loss": 0.9031, "step": 2660 }, { "epoch": 0.4071920428462127, "grad_norm": 2.2267645532444775, "learning_rate": 1.846163378729415e-05, "loss": 0.7436, "step": 2661 }, { "epoch": 0.40734506503442997, "grad_norm": 2.398986928697425, "learning_rate": 1.8460312845306355e-05, "loss": 0.8195, "step": 2662 }, { "epoch": 0.4074980872226473, "grad_norm": 2.61225008925308, "learning_rate": 1.8458991383737658e-05, "loss": 0.7914, "step": 2663 }, { "epoch": 0.4076511094108646, "grad_norm": 2.356902302425031, "learning_rate": 1.8457669402669204e-05, "loss": 0.8502, "step": 2664 }, { "epoch": 0.40780413159908185, "grad_norm": 2.560117797837266, "learning_rate": 1.8456346902182186e-05, "loss": 0.781, "step": 2665 }, { "epoch": 0.40795715378729913, "grad_norm": 2.4632168985463303, "learning_rate": 1.8455023882357828e-05, "loss": 0.8861, "step": 2666 }, { "epoch": 0.40811017597551646, "grad_norm": 2.3380920013005424, "learning_rate": 1.845370034327737e-05, "loss": 0.7989, "step": 2667 }, { "epoch": 0.40826319816373374, "grad_norm": 2.718768158973965, "learning_rate": 1.845237628502211e-05, "loss": 0.8724, "step": 2668 }, { "epoch": 0.408416220351951, "grad_norm": 2.2335921822951366, "learning_rate": 1.8451051707673354e-05, "loss": 0.786, "step": 2669 }, { "epoch": 0.4085692425401683, "grad_norm": 2.2166311232529865, "learning_rate": 1.8449726611312448e-05, "loss": 0.7835, "step": 2670 }, { "epoch": 0.40872226472838563, "grad_norm": 2.46656392359808, "learning_rate": 1.844840099602078e-05, "loss": 0.8221, "step": 2671 }, { "epoch": 0.4088752869166029, "grad_norm": 2.222395842583903, "learning_rate": 1.8447074861879755e-05, "loss": 0.8084, "step": 2672 }, { "epoch": 0.4090283091048202, "grad_norm": 2.4012567726291816, "learning_rate": 1.8445748208970817e-05, "loss": 0.8552, "step": 2673 }, { "epoch": 0.40918133129303746, "grad_norm": 2.5844110968126883, "learning_rate": 1.844442103737544e-05, "loss": 0.8058, "step": 2674 }, { "epoch": 0.4093343534812548, "grad_norm": 2.4874765360529185, "learning_rate": 1.8443093347175136e-05, "loss": 0.8288, "step": 2675 }, { "epoch": 0.4094873756694721, "grad_norm": 2.3078682180408956, "learning_rate": 1.8441765138451436e-05, "loss": 0.929, "step": 2676 }, { "epoch": 0.40964039785768935, "grad_norm": 2.2629872570756553, "learning_rate": 1.8440436411285917e-05, "loss": 0.7427, "step": 2677 }, { "epoch": 0.40979342004590663, "grad_norm": 2.4094935497777974, "learning_rate": 1.843910716576018e-05, "loss": 0.7792, "step": 2678 }, { "epoch": 0.40994644223412396, "grad_norm": 2.2940908840934267, "learning_rate": 1.8437777401955855e-05, "loss": 0.7945, "step": 2679 }, { "epoch": 0.41009946442234124, "grad_norm": 2.4885993708595526, "learning_rate": 1.8436447119954614e-05, "loss": 0.8593, "step": 2680 }, { "epoch": 0.4102524866105585, "grad_norm": 2.193032443148415, "learning_rate": 1.843511631983815e-05, "loss": 0.7174, "step": 2681 }, { "epoch": 0.4104055087987758, "grad_norm": 2.3278540581719143, "learning_rate": 1.8433785001688193e-05, "loss": 0.7327, "step": 2682 }, { "epoch": 0.41055853098699313, "grad_norm": 3.241194420703757, "learning_rate": 1.843245316558651e-05, "loss": 0.6707, "step": 2683 }, { "epoch": 0.4107115531752104, "grad_norm": 2.4359746230564747, "learning_rate": 1.843112081161489e-05, "loss": 0.8612, "step": 2684 }, { "epoch": 0.4108645753634277, "grad_norm": 2.3943737897219366, "learning_rate": 1.8429787939855153e-05, "loss": 0.695, "step": 2685 }, { "epoch": 0.41101759755164496, "grad_norm": 2.4256681273927287, "learning_rate": 1.8428454550389167e-05, "loss": 0.7882, "step": 2686 }, { "epoch": 0.4111706197398623, "grad_norm": 3.3705908991777704, "learning_rate": 1.842712064329881e-05, "loss": 0.8501, "step": 2687 }, { "epoch": 0.4113236419280796, "grad_norm": 2.419989046073004, "learning_rate": 1.8425786218666008e-05, "loss": 0.9159, "step": 2688 }, { "epoch": 0.41147666411629685, "grad_norm": 2.4530844333412904, "learning_rate": 1.8424451276572712e-05, "loss": 0.7439, "step": 2689 }, { "epoch": 0.41162968630451413, "grad_norm": 2.7154613196392163, "learning_rate": 1.8423115817100908e-05, "loss": 0.7944, "step": 2690 }, { "epoch": 0.41178270849273146, "grad_norm": 2.0993592442625464, "learning_rate": 1.842177984033261e-05, "loss": 0.6573, "step": 2691 }, { "epoch": 0.41193573068094874, "grad_norm": 2.251977839855377, "learning_rate": 1.8420443346349866e-05, "loss": 0.7723, "step": 2692 }, { "epoch": 0.412088752869166, "grad_norm": 2.4973317883727355, "learning_rate": 1.8419106335234757e-05, "loss": 0.8563, "step": 2693 }, { "epoch": 0.4122417750573833, "grad_norm": 2.16198116036908, "learning_rate": 1.8417768807069388e-05, "loss": 0.7834, "step": 2694 }, { "epoch": 0.41239479724560063, "grad_norm": 2.2356466035192177, "learning_rate": 1.8416430761935912e-05, "loss": 0.8431, "step": 2695 }, { "epoch": 0.4125478194338179, "grad_norm": 2.3601155991963574, "learning_rate": 1.8415092199916493e-05, "loss": 0.7505, "step": 2696 }, { "epoch": 0.4127008416220352, "grad_norm": 2.4839307976282297, "learning_rate": 1.8413753121093342e-05, "loss": 0.834, "step": 2697 }, { "epoch": 0.41285386381025246, "grad_norm": 2.5380141603149733, "learning_rate": 1.84124135255487e-05, "loss": 0.8884, "step": 2698 }, { "epoch": 0.4130068859984698, "grad_norm": 2.359684411575416, "learning_rate": 1.841107341336483e-05, "loss": 0.8136, "step": 2699 }, { "epoch": 0.4131599081866871, "grad_norm": 2.246915060713202, "learning_rate": 1.840973278462404e-05, "loss": 0.853, "step": 2700 }, { "epoch": 0.41331293037490435, "grad_norm": 2.743278992870679, "learning_rate": 1.8408391639408662e-05, "loss": 0.9538, "step": 2701 }, { "epoch": 0.41346595256312163, "grad_norm": 2.3414868128353445, "learning_rate": 1.8407049977801057e-05, "loss": 0.7642, "step": 2702 }, { "epoch": 0.41361897475133896, "grad_norm": 2.2150403808055095, "learning_rate": 1.8405707799883625e-05, "loss": 0.8568, "step": 2703 }, { "epoch": 0.41377199693955624, "grad_norm": 2.1600067523897595, "learning_rate": 1.8404365105738793e-05, "loss": 0.7698, "step": 2704 }, { "epoch": 0.4139250191277735, "grad_norm": 2.5630873870166027, "learning_rate": 1.8403021895449022e-05, "loss": 0.8071, "step": 2705 }, { "epoch": 0.4140780413159908, "grad_norm": 2.2015946582709724, "learning_rate": 1.8401678169096803e-05, "loss": 0.7721, "step": 2706 }, { "epoch": 0.41423106350420813, "grad_norm": 2.3404124500020616, "learning_rate": 1.840033392676466e-05, "loss": 0.7017, "step": 2707 }, { "epoch": 0.4143840856924254, "grad_norm": 2.5603216912077142, "learning_rate": 1.8398989168535153e-05, "loss": 0.7995, "step": 2708 }, { "epoch": 0.4145371078806427, "grad_norm": 2.588436369527036, "learning_rate": 1.8397643894490857e-05, "loss": 0.8638, "step": 2709 }, { "epoch": 0.41469013006885996, "grad_norm": 2.2405037181742418, "learning_rate": 1.8396298104714404e-05, "loss": 0.7631, "step": 2710 }, { "epoch": 0.4148431522570773, "grad_norm": 2.4837864680181805, "learning_rate": 1.8394951799288433e-05, "loss": 0.8381, "step": 2711 }, { "epoch": 0.4149961744452946, "grad_norm": 2.434315227598334, "learning_rate": 1.8393604978295634e-05, "loss": 0.7599, "step": 2712 }, { "epoch": 0.41514919663351185, "grad_norm": 2.3929964810627875, "learning_rate": 1.8392257641818717e-05, "loss": 0.8392, "step": 2713 }, { "epoch": 0.41530221882172913, "grad_norm": 2.3769240010713784, "learning_rate": 1.8390909789940424e-05, "loss": 0.914, "step": 2714 }, { "epoch": 0.41545524100994646, "grad_norm": 2.4156353290483996, "learning_rate": 1.8389561422743537e-05, "loss": 0.8067, "step": 2715 }, { "epoch": 0.41560826319816374, "grad_norm": 2.3762181100367727, "learning_rate": 1.838821254031087e-05, "loss": 0.8145, "step": 2716 }, { "epoch": 0.415761285386381, "grad_norm": 2.7032932387797994, "learning_rate": 1.8386863142725245e-05, "loss": 0.7724, "step": 2717 }, { "epoch": 0.4159143075745983, "grad_norm": 2.4199311872817675, "learning_rate": 1.8385513230069552e-05, "loss": 0.796, "step": 2718 }, { "epoch": 0.41606732976281563, "grad_norm": 2.611915068916634, "learning_rate": 1.8384162802426683e-05, "loss": 0.8, "step": 2719 }, { "epoch": 0.4162203519510329, "grad_norm": 2.4719883493550663, "learning_rate": 1.838281185987958e-05, "loss": 0.7554, "step": 2720 }, { "epoch": 0.4163733741392502, "grad_norm": 6.780799641945747, "learning_rate": 1.8381460402511206e-05, "loss": 0.8375, "step": 2721 }, { "epoch": 0.41652639632746746, "grad_norm": 2.2876882146123765, "learning_rate": 1.8380108430404558e-05, "loss": 0.8064, "step": 2722 }, { "epoch": 0.4166794185156848, "grad_norm": 2.451624523809711, "learning_rate": 1.8378755943642674e-05, "loss": 0.7935, "step": 2723 }, { "epoch": 0.4168324407039021, "grad_norm": 2.8942584708078827, "learning_rate": 1.8377402942308606e-05, "loss": 0.7752, "step": 2724 }, { "epoch": 0.41698546289211935, "grad_norm": 2.4380212312342024, "learning_rate": 1.837604942648545e-05, "loss": 0.8369, "step": 2725 }, { "epoch": 0.41713848508033663, "grad_norm": 2.42312875758766, "learning_rate": 1.8374695396256333e-05, "loss": 0.8675, "step": 2726 }, { "epoch": 0.41729150726855396, "grad_norm": 2.822238056417803, "learning_rate": 1.837334085170441e-05, "loss": 0.8107, "step": 2727 }, { "epoch": 0.41744452945677124, "grad_norm": 2.7255201999810397, "learning_rate": 1.8371985792912867e-05, "loss": 0.8395, "step": 2728 }, { "epoch": 0.4175975516449885, "grad_norm": 2.7738793587791295, "learning_rate": 1.8370630219964924e-05, "loss": 0.7883, "step": 2729 }, { "epoch": 0.4177505738332058, "grad_norm": 3.0113058281040987, "learning_rate": 1.8369274132943836e-05, "loss": 0.928, "step": 2730 }, { "epoch": 0.41790359602142313, "grad_norm": 2.3332835609228626, "learning_rate": 1.8367917531932883e-05, "loss": 0.9316, "step": 2731 }, { "epoch": 0.4180566182096404, "grad_norm": 2.52833709344253, "learning_rate": 1.8366560417015376e-05, "loss": 0.7773, "step": 2732 }, { "epoch": 0.4182096403978577, "grad_norm": 2.776855292461947, "learning_rate": 1.8365202788274665e-05, "loss": 0.9908, "step": 2733 }, { "epoch": 0.41836266258607496, "grad_norm": 3.1038722511338848, "learning_rate": 1.8363844645794127e-05, "loss": 0.7926, "step": 2734 }, { "epoch": 0.4185156847742923, "grad_norm": 2.6755227443722966, "learning_rate": 1.836248598965717e-05, "loss": 0.7397, "step": 2735 }, { "epoch": 0.4186687069625096, "grad_norm": 2.193456846703546, "learning_rate": 1.836112681994723e-05, "loss": 0.7661, "step": 2736 }, { "epoch": 0.41882172915072685, "grad_norm": 2.4328277091982704, "learning_rate": 1.8359767136747787e-05, "loss": 0.8149, "step": 2737 }, { "epoch": 0.41897475133894413, "grad_norm": 2.5640952513553996, "learning_rate": 1.835840694014234e-05, "loss": 0.8672, "step": 2738 }, { "epoch": 0.41912777352716146, "grad_norm": 2.372258279722657, "learning_rate": 1.8357046230214424e-05, "loss": 0.7194, "step": 2739 }, { "epoch": 0.41928079571537874, "grad_norm": 2.694024313287626, "learning_rate": 1.8355685007047602e-05, "loss": 0.8198, "step": 2740 }, { "epoch": 0.419433817903596, "grad_norm": 2.717663032912964, "learning_rate": 1.8354323270725483e-05, "loss": 0.754, "step": 2741 }, { "epoch": 0.4195868400918133, "grad_norm": 2.1220023192705466, "learning_rate": 1.8352961021331686e-05, "loss": 0.7195, "step": 2742 }, { "epoch": 0.41973986228003063, "grad_norm": 2.5635730910519428, "learning_rate": 1.8351598258949878e-05, "loss": 0.6638, "step": 2743 }, { "epoch": 0.4198928844682479, "grad_norm": 2.307124143928578, "learning_rate": 1.8350234983663745e-05, "loss": 0.779, "step": 2744 }, { "epoch": 0.4200459066564652, "grad_norm": 2.581024604321476, "learning_rate": 1.834887119555702e-05, "loss": 0.9084, "step": 2745 }, { "epoch": 0.42019892884468246, "grad_norm": 2.486303856122351, "learning_rate": 1.834750689471345e-05, "loss": 0.8191, "step": 2746 }, { "epoch": 0.4203519510328998, "grad_norm": 2.794251757152545, "learning_rate": 1.8346142081216828e-05, "loss": 0.7747, "step": 2747 }, { "epoch": 0.4205049732211171, "grad_norm": 2.5361298102326923, "learning_rate": 1.8344776755150972e-05, "loss": 0.8419, "step": 2748 }, { "epoch": 0.42065799540933435, "grad_norm": 2.453075570214805, "learning_rate": 1.8343410916599728e-05, "loss": 0.7913, "step": 2749 }, { "epoch": 0.42081101759755163, "grad_norm": 2.658949348190422, "learning_rate": 1.8342044565646985e-05, "loss": 0.8207, "step": 2750 }, { "epoch": 0.42096403978576896, "grad_norm": 2.6080813836023204, "learning_rate": 1.8340677702376646e-05, "loss": 0.9163, "step": 2751 }, { "epoch": 0.42111706197398624, "grad_norm": 2.4580878290478765, "learning_rate": 1.833931032687266e-05, "loss": 0.7212, "step": 2752 }, { "epoch": 0.4212700841622035, "grad_norm": 2.6866209108407992, "learning_rate": 1.8337942439219002e-05, "loss": 0.9074, "step": 2753 }, { "epoch": 0.4214231063504208, "grad_norm": 2.5069055372513565, "learning_rate": 1.8336574039499686e-05, "loss": 0.8655, "step": 2754 }, { "epoch": 0.42157612853863813, "grad_norm": 2.8233674919495506, "learning_rate": 1.8335205127798743e-05, "loss": 0.8258, "step": 2755 }, { "epoch": 0.4217291507268554, "grad_norm": 2.455363331998748, "learning_rate": 1.8333835704200244e-05, "loss": 0.7243, "step": 2756 }, { "epoch": 0.4218821729150727, "grad_norm": 2.6976338544948764, "learning_rate": 1.8332465768788294e-05, "loss": 0.8472, "step": 2757 }, { "epoch": 0.42203519510328996, "grad_norm": 2.2772022318054317, "learning_rate": 1.8331095321647024e-05, "loss": 0.7334, "step": 2758 }, { "epoch": 0.4221882172915073, "grad_norm": 2.332527092805009, "learning_rate": 1.83297243628606e-05, "loss": 0.8488, "step": 2759 }, { "epoch": 0.4223412394797246, "grad_norm": 2.65635386373047, "learning_rate": 1.8328352892513213e-05, "loss": 0.8725, "step": 2760 }, { "epoch": 0.42249426166794185, "grad_norm": 2.595147960241488, "learning_rate": 1.8326980910689097e-05, "loss": 0.8825, "step": 2761 }, { "epoch": 0.42264728385615913, "grad_norm": 2.4590965620170357, "learning_rate": 1.832560841747251e-05, "loss": 0.7872, "step": 2762 }, { "epoch": 0.42280030604437646, "grad_norm": 2.105599760748347, "learning_rate": 1.8324235412947734e-05, "loss": 0.7948, "step": 2763 }, { "epoch": 0.42295332823259374, "grad_norm": 2.1926721287195607, "learning_rate": 1.83228618971991e-05, "loss": 0.6939, "step": 2764 }, { "epoch": 0.423106350420811, "grad_norm": 2.3227283833432444, "learning_rate": 1.832148787031096e-05, "loss": 0.7924, "step": 2765 }, { "epoch": 0.4232593726090283, "grad_norm": 2.439579480687344, "learning_rate": 1.8320113332367695e-05, "loss": 0.798, "step": 2766 }, { "epoch": 0.42341239479724563, "grad_norm": 2.4905389517694707, "learning_rate": 1.831873828345372e-05, "loss": 0.9101, "step": 2767 }, { "epoch": 0.4235654169854629, "grad_norm": 1.9986218789081267, "learning_rate": 1.8317362723653486e-05, "loss": 0.6391, "step": 2768 }, { "epoch": 0.4237184391736802, "grad_norm": 2.265857474114456, "learning_rate": 1.8315986653051466e-05, "loss": 0.8241, "step": 2769 }, { "epoch": 0.42387146136189746, "grad_norm": 2.4080058443982058, "learning_rate": 1.8314610071732178e-05, "loss": 0.7932, "step": 2770 }, { "epoch": 0.42402448355011474, "grad_norm": 2.3055478158698404, "learning_rate": 1.8313232979780154e-05, "loss": 0.7345, "step": 2771 }, { "epoch": 0.4241775057383321, "grad_norm": 2.5461071258196064, "learning_rate": 1.8311855377279974e-05, "loss": 0.6186, "step": 2772 }, { "epoch": 0.42433052792654935, "grad_norm": 2.4759293912836555, "learning_rate": 1.831047726431624e-05, "loss": 0.8608, "step": 2773 }, { "epoch": 0.42448355011476663, "grad_norm": 2.228142862655671, "learning_rate": 1.8309098640973584e-05, "loss": 0.7126, "step": 2774 }, { "epoch": 0.4246365723029839, "grad_norm": 2.5373975946691507, "learning_rate": 1.8307719507336675e-05, "loss": 0.7881, "step": 2775 }, { "epoch": 0.42478959449120124, "grad_norm": 2.33822674451866, "learning_rate": 1.8306339863490216e-05, "loss": 0.8431, "step": 2776 }, { "epoch": 0.4249426166794185, "grad_norm": 2.7554536395279507, "learning_rate": 1.8304959709518923e-05, "loss": 0.8821, "step": 2777 }, { "epoch": 0.4250956388676358, "grad_norm": 2.5830841255768244, "learning_rate": 1.830357904550757e-05, "loss": 0.8288, "step": 2778 }, { "epoch": 0.42524866105585307, "grad_norm": 2.5029825041231204, "learning_rate": 1.8302197871540945e-05, "loss": 0.8809, "step": 2779 }, { "epoch": 0.4254016832440704, "grad_norm": 2.575665600700266, "learning_rate": 1.830081618770387e-05, "loss": 0.8533, "step": 2780 }, { "epoch": 0.4255547054322877, "grad_norm": 2.5700770047121124, "learning_rate": 1.8299433994081197e-05, "loss": 0.8074, "step": 2781 }, { "epoch": 0.42570772762050496, "grad_norm": 2.0623106933950304, "learning_rate": 1.8298051290757818e-05, "loss": 0.7773, "step": 2782 }, { "epoch": 0.42586074980872224, "grad_norm": 2.18537150872912, "learning_rate": 1.8296668077818644e-05, "loss": 0.8344, "step": 2783 }, { "epoch": 0.42601377199693957, "grad_norm": 2.3942580809651477, "learning_rate": 1.8295284355348628e-05, "loss": 0.7428, "step": 2784 }, { "epoch": 0.42616679418515685, "grad_norm": 2.780737215240603, "learning_rate": 1.829390012343275e-05, "loss": 0.8756, "step": 2785 }, { "epoch": 0.4263198163733741, "grad_norm": 2.2278400641722795, "learning_rate": 1.8292515382156017e-05, "loss": 0.8758, "step": 2786 }, { "epoch": 0.4264728385615914, "grad_norm": 2.3121894916116164, "learning_rate": 1.8291130131603476e-05, "loss": 0.7272, "step": 2787 }, { "epoch": 0.42662586074980874, "grad_norm": 2.69045702826044, "learning_rate": 1.8289744371860196e-05, "loss": 0.7238, "step": 2788 }, { "epoch": 0.426778882938026, "grad_norm": 2.315130684005548, "learning_rate": 1.8288358103011286e-05, "loss": 0.7843, "step": 2789 }, { "epoch": 0.4269319051262433, "grad_norm": 2.18678882527759, "learning_rate": 1.828697132514188e-05, "loss": 0.8087, "step": 2790 }, { "epoch": 0.42708492731446057, "grad_norm": 2.381064095904689, "learning_rate": 1.828558403833715e-05, "loss": 0.8279, "step": 2791 }, { "epoch": 0.4272379495026779, "grad_norm": 2.577619698682901, "learning_rate": 1.8284196242682287e-05, "loss": 0.8842, "step": 2792 }, { "epoch": 0.4273909716908952, "grad_norm": 2.4459585233481103, "learning_rate": 1.8282807938262525e-05, "loss": 0.7657, "step": 2793 }, { "epoch": 0.42754399387911246, "grad_norm": 2.4999096948175894, "learning_rate": 1.8281419125163124e-05, "loss": 0.8371, "step": 2794 }, { "epoch": 0.42769701606732974, "grad_norm": 2.10695554914119, "learning_rate": 1.8280029803469383e-05, "loss": 0.7368, "step": 2795 }, { "epoch": 0.42785003825554707, "grad_norm": 2.568702428809719, "learning_rate": 1.8278639973266614e-05, "loss": 0.8826, "step": 2796 }, { "epoch": 0.42800306044376435, "grad_norm": 2.4571418370595373, "learning_rate": 1.827724963464018e-05, "loss": 0.7719, "step": 2797 }, { "epoch": 0.4281560826319816, "grad_norm": 2.321507335931533, "learning_rate": 1.8275858787675464e-05, "loss": 0.801, "step": 2798 }, { "epoch": 0.4283091048201989, "grad_norm": 1.9921891114138417, "learning_rate": 1.8274467432457888e-05, "loss": 0.6825, "step": 2799 }, { "epoch": 0.42846212700841624, "grad_norm": 2.4680977264565325, "learning_rate": 1.8273075569072893e-05, "loss": 0.7475, "step": 2800 }, { "epoch": 0.4286151491966335, "grad_norm": 2.3974672040643776, "learning_rate": 1.8271683197605966e-05, "loss": 0.79, "step": 2801 }, { "epoch": 0.4287681713848508, "grad_norm": 2.448891743243999, "learning_rate": 1.8270290318142613e-05, "loss": 0.8843, "step": 2802 }, { "epoch": 0.42892119357306807, "grad_norm": 2.9211845208427603, "learning_rate": 1.8268896930768378e-05, "loss": 0.9263, "step": 2803 }, { "epoch": 0.4290742157612854, "grad_norm": 2.2479016422193916, "learning_rate": 1.8267503035568834e-05, "loss": 0.7761, "step": 2804 }, { "epoch": 0.4292272379495027, "grad_norm": 2.256379799241374, "learning_rate": 1.8266108632629585e-05, "loss": 0.8842, "step": 2805 }, { "epoch": 0.42938026013771996, "grad_norm": 2.230328900853721, "learning_rate": 1.8264713722036272e-05, "loss": 0.7677, "step": 2806 }, { "epoch": 0.42953328232593724, "grad_norm": 2.4734719612301808, "learning_rate": 1.8263318303874555e-05, "loss": 0.8867, "step": 2807 }, { "epoch": 0.42968630451415457, "grad_norm": 2.4024715893301445, "learning_rate": 1.8261922378230134e-05, "loss": 0.7662, "step": 2808 }, { "epoch": 0.42983932670237185, "grad_norm": 2.3019580177421495, "learning_rate": 1.8260525945188742e-05, "loss": 0.7525, "step": 2809 }, { "epoch": 0.4299923488905891, "grad_norm": 2.268610624691826, "learning_rate": 1.825912900483613e-05, "loss": 0.8512, "step": 2810 }, { "epoch": 0.4301453710788064, "grad_norm": 2.50834037125462, "learning_rate": 1.82577315572581e-05, "loss": 0.7428, "step": 2811 }, { "epoch": 0.43029839326702374, "grad_norm": 1.9801505295055397, "learning_rate": 1.8256333602540472e-05, "loss": 0.6802, "step": 2812 }, { "epoch": 0.430451415455241, "grad_norm": 2.2031680211345988, "learning_rate": 1.82549351407691e-05, "loss": 0.7427, "step": 2813 }, { "epoch": 0.4306044376434583, "grad_norm": 2.07270281399312, "learning_rate": 1.8253536172029863e-05, "loss": 0.7541, "step": 2814 }, { "epoch": 0.43075745983167557, "grad_norm": 2.4736090185078616, "learning_rate": 1.8252136696408683e-05, "loss": 0.8778, "step": 2815 }, { "epoch": 0.4309104820198929, "grad_norm": 2.1084099756052574, "learning_rate": 1.8250736713991505e-05, "loss": 0.7589, "step": 2816 }, { "epoch": 0.4310635042081102, "grad_norm": 2.377251552697672, "learning_rate": 1.824933622486431e-05, "loss": 0.8503, "step": 2817 }, { "epoch": 0.43121652639632746, "grad_norm": 2.5075890326248964, "learning_rate": 1.8247935229113106e-05, "loss": 0.8015, "step": 2818 }, { "epoch": 0.43136954858454474, "grad_norm": 2.4737690463297124, "learning_rate": 1.824653372682393e-05, "loss": 0.9242, "step": 2819 }, { "epoch": 0.43152257077276207, "grad_norm": 2.5168772301247424, "learning_rate": 1.824513171808286e-05, "loss": 0.6882, "step": 2820 }, { "epoch": 0.43167559296097935, "grad_norm": 2.4850955909543324, "learning_rate": 1.8243729202975998e-05, "loss": 0.8031, "step": 2821 }, { "epoch": 0.4318286151491966, "grad_norm": 2.4310792570377364, "learning_rate": 1.8242326181589472e-05, "loss": 0.7786, "step": 2822 }, { "epoch": 0.4319816373374139, "grad_norm": 2.2162562659795566, "learning_rate": 1.8240922654009453e-05, "loss": 0.8593, "step": 2823 }, { "epoch": 0.43213465952563124, "grad_norm": 2.1604512997876615, "learning_rate": 1.823951862032214e-05, "loss": 0.7262, "step": 2824 }, { "epoch": 0.4322876817138485, "grad_norm": 2.2860096253234334, "learning_rate": 1.8238114080613748e-05, "loss": 0.8108, "step": 2825 }, { "epoch": 0.4324407039020658, "grad_norm": 2.1896817856146926, "learning_rate": 1.8236709034970547e-05, "loss": 0.7637, "step": 2826 }, { "epoch": 0.43259372609028307, "grad_norm": 2.5100011119270365, "learning_rate": 1.8235303483478816e-05, "loss": 0.8717, "step": 2827 }, { "epoch": 0.4327467482785004, "grad_norm": 2.2395244764068494, "learning_rate": 1.8233897426224885e-05, "loss": 0.8137, "step": 2828 }, { "epoch": 0.4328997704667177, "grad_norm": 2.454140156073576, "learning_rate": 1.8232490863295104e-05, "loss": 0.7323, "step": 2829 }, { "epoch": 0.43305279265493496, "grad_norm": 2.782749694102962, "learning_rate": 1.8231083794775855e-05, "loss": 0.8337, "step": 2830 }, { "epoch": 0.43320581484315224, "grad_norm": 2.4708319224270787, "learning_rate": 1.8229676220753544e-05, "loss": 0.7952, "step": 2831 }, { "epoch": 0.43335883703136957, "grad_norm": 2.450763340830767, "learning_rate": 1.8228268141314625e-05, "loss": 0.7822, "step": 2832 }, { "epoch": 0.43351185921958685, "grad_norm": 2.6846953429174265, "learning_rate": 1.8226859556545567e-05, "loss": 0.9821, "step": 2833 }, { "epoch": 0.4336648814078041, "grad_norm": 2.384263297031218, "learning_rate": 1.8225450466532886e-05, "loss": 0.7387, "step": 2834 }, { "epoch": 0.4338179035960214, "grad_norm": 2.5933253496031314, "learning_rate": 1.8224040871363108e-05, "loss": 0.8023, "step": 2835 }, { "epoch": 0.43397092578423874, "grad_norm": 2.1470594209931164, "learning_rate": 1.8222630771122813e-05, "loss": 0.7342, "step": 2836 }, { "epoch": 0.434123947972456, "grad_norm": 2.177838898350675, "learning_rate": 1.822122016589859e-05, "loss": 0.7216, "step": 2837 }, { "epoch": 0.4342769701606733, "grad_norm": 2.318251382225496, "learning_rate": 1.8219809055777078e-05, "loss": 0.8282, "step": 2838 }, { "epoch": 0.43442999234889057, "grad_norm": 2.485311148535998, "learning_rate": 1.8218397440844935e-05, "loss": 0.9077, "step": 2839 }, { "epoch": 0.4345830145371079, "grad_norm": 2.187991898973603, "learning_rate": 1.8216985321188857e-05, "loss": 0.6987, "step": 2840 }, { "epoch": 0.4347360367253252, "grad_norm": 2.954983399291798, "learning_rate": 1.821557269689556e-05, "loss": 0.8829, "step": 2841 }, { "epoch": 0.43488905891354246, "grad_norm": 2.3435852188557136, "learning_rate": 1.8214159568051808e-05, "loss": 0.7581, "step": 2842 }, { "epoch": 0.43504208110175974, "grad_norm": 2.506988531798837, "learning_rate": 1.821274593474438e-05, "loss": 0.9141, "step": 2843 }, { "epoch": 0.43519510328997707, "grad_norm": 2.376637972611891, "learning_rate": 1.82113317970601e-05, "loss": 0.7158, "step": 2844 }, { "epoch": 0.43534812547819435, "grad_norm": 2.426861380496052, "learning_rate": 1.8209917155085807e-05, "loss": 0.8952, "step": 2845 }, { "epoch": 0.4355011476664116, "grad_norm": 2.217284799803432, "learning_rate": 1.820850200890839e-05, "loss": 0.7568, "step": 2846 }, { "epoch": 0.4356541698546289, "grad_norm": 3.406641979935441, "learning_rate": 1.8207086358614748e-05, "loss": 0.7537, "step": 2847 }, { "epoch": 0.43580719204284624, "grad_norm": 2.2708735332971397, "learning_rate": 1.8205670204291827e-05, "loss": 0.7447, "step": 2848 }, { "epoch": 0.4359602142310635, "grad_norm": 2.4379444362854796, "learning_rate": 1.82042535460266e-05, "loss": 0.8063, "step": 2849 }, { "epoch": 0.4361132364192808, "grad_norm": 2.644713852715883, "learning_rate": 1.8202836383906065e-05, "loss": 0.8981, "step": 2850 }, { "epoch": 0.43626625860749807, "grad_norm": 2.3110592853637324, "learning_rate": 1.8201418718017257e-05, "loss": 0.828, "step": 2851 }, { "epoch": 0.4364192807957154, "grad_norm": 2.19094109374234, "learning_rate": 1.8200000548447242e-05, "loss": 0.7952, "step": 2852 }, { "epoch": 0.4365723029839327, "grad_norm": 2.4567016099138814, "learning_rate": 1.8198581875283116e-05, "loss": 0.8291, "step": 2853 }, { "epoch": 0.43672532517214996, "grad_norm": 2.5230151299441665, "learning_rate": 1.8197162698612008e-05, "loss": 0.8983, "step": 2854 }, { "epoch": 0.43687834736036724, "grad_norm": 2.2014429530314477, "learning_rate": 1.8195743018521067e-05, "loss": 0.8416, "step": 2855 }, { "epoch": 0.43703136954858457, "grad_norm": 2.798858313321146, "learning_rate": 1.8194322835097484e-05, "loss": 0.6999, "step": 2856 }, { "epoch": 0.43718439173680185, "grad_norm": 2.559519260396577, "learning_rate": 1.8192902148428482e-05, "loss": 1.0143, "step": 2857 }, { "epoch": 0.4373374139250191, "grad_norm": 2.3153587914032254, "learning_rate": 1.8191480958601307e-05, "loss": 0.8853, "step": 2858 }, { "epoch": 0.4374904361132364, "grad_norm": 2.6106845850972498, "learning_rate": 1.819005926570324e-05, "loss": 0.8967, "step": 2859 }, { "epoch": 0.43764345830145374, "grad_norm": 2.6529185075899244, "learning_rate": 1.8188637069821595e-05, "loss": 0.7725, "step": 2860 }, { "epoch": 0.437796480489671, "grad_norm": 2.4152913946558248, "learning_rate": 1.8187214371043717e-05, "loss": 0.7995, "step": 2861 }, { "epoch": 0.4379495026778883, "grad_norm": 2.7423652149993165, "learning_rate": 1.818579116945697e-05, "loss": 0.9248, "step": 2862 }, { "epoch": 0.43810252486610557, "grad_norm": 2.1886173607384904, "learning_rate": 1.8184367465148768e-05, "loss": 0.769, "step": 2863 }, { "epoch": 0.4382555470543229, "grad_norm": 2.2565851532333654, "learning_rate": 1.818294325820654e-05, "loss": 0.8638, "step": 2864 }, { "epoch": 0.4384085692425402, "grad_norm": 2.476506526281387, "learning_rate": 1.8181518548717757e-05, "loss": 0.8676, "step": 2865 }, { "epoch": 0.43856159143075746, "grad_norm": 2.4366507731535068, "learning_rate": 1.818009333676991e-05, "loss": 0.785, "step": 2866 }, { "epoch": 0.43871461361897474, "grad_norm": 2.4976448948903784, "learning_rate": 1.8178667622450536e-05, "loss": 0.8044, "step": 2867 }, { "epoch": 0.43886763580719207, "grad_norm": 2.2797810019467306, "learning_rate": 1.8177241405847185e-05, "loss": 0.7425, "step": 2868 }, { "epoch": 0.43902065799540935, "grad_norm": 2.189360272061294, "learning_rate": 1.817581468704745e-05, "loss": 0.8035, "step": 2869 }, { "epoch": 0.4391736801836266, "grad_norm": 2.189311640496233, "learning_rate": 1.817438746613895e-05, "loss": 0.7365, "step": 2870 }, { "epoch": 0.4393267023718439, "grad_norm": 2.35659633222008, "learning_rate": 1.817295974320934e-05, "loss": 0.8272, "step": 2871 }, { "epoch": 0.43947972456006124, "grad_norm": 2.093541114804993, "learning_rate": 1.8171531518346296e-05, "loss": 0.7011, "step": 2872 }, { "epoch": 0.4396327467482785, "grad_norm": 2.3219154246904203, "learning_rate": 1.8170102791637538e-05, "loss": 0.6946, "step": 2873 }, { "epoch": 0.4397857689364958, "grad_norm": 2.578669440132516, "learning_rate": 1.81686735631708e-05, "loss": 0.8617, "step": 2874 }, { "epoch": 0.43993879112471307, "grad_norm": 2.194341864294571, "learning_rate": 1.8167243833033866e-05, "loss": 0.7764, "step": 2875 }, { "epoch": 0.4400918133129304, "grad_norm": 2.543107156092695, "learning_rate": 1.8165813601314538e-05, "loss": 0.8232, "step": 2876 }, { "epoch": 0.4402448355011477, "grad_norm": 2.1321281234592964, "learning_rate": 1.8164382868100655e-05, "loss": 0.8494, "step": 2877 }, { "epoch": 0.44039785768936496, "grad_norm": 2.1116804083957996, "learning_rate": 1.8162951633480076e-05, "loss": 0.689, "step": 2878 }, { "epoch": 0.44055087987758224, "grad_norm": 2.6610632363025735, "learning_rate": 1.8161519897540706e-05, "loss": 0.9145, "step": 2879 }, { "epoch": 0.4407039020657995, "grad_norm": 2.4608039381193283, "learning_rate": 1.816008766037047e-05, "loss": 0.8626, "step": 2880 }, { "epoch": 0.44085692425401685, "grad_norm": 2.0608838256285527, "learning_rate": 1.815865492205733e-05, "loss": 0.7088, "step": 2881 }, { "epoch": 0.4410099464422341, "grad_norm": 2.266950103228107, "learning_rate": 1.8157221682689274e-05, "loss": 0.7986, "step": 2882 }, { "epoch": 0.4411629686304514, "grad_norm": 2.320138111880372, "learning_rate": 1.8155787942354323e-05, "loss": 0.7952, "step": 2883 }, { "epoch": 0.4413159908186687, "grad_norm": 2.030179057500993, "learning_rate": 1.815435370114053e-05, "loss": 0.7375, "step": 2884 }, { "epoch": 0.441469013006886, "grad_norm": 2.277446909757116, "learning_rate": 1.8152918959135972e-05, "loss": 0.7952, "step": 2885 }, { "epoch": 0.4416220351951033, "grad_norm": 2.336938364081513, "learning_rate": 1.815148371642877e-05, "loss": 0.7348, "step": 2886 }, { "epoch": 0.44177505738332057, "grad_norm": 2.068616185340274, "learning_rate": 1.8150047973107062e-05, "loss": 0.7344, "step": 2887 }, { "epoch": 0.44192807957153785, "grad_norm": 2.378424795705215, "learning_rate": 1.8148611729259028e-05, "loss": 0.8116, "step": 2888 }, { "epoch": 0.4420811017597552, "grad_norm": 2.315366839645358, "learning_rate": 1.8147174984972866e-05, "loss": 0.8843, "step": 2889 }, { "epoch": 0.44223412394797246, "grad_norm": 2.282370051057871, "learning_rate": 1.814573774033682e-05, "loss": 0.6782, "step": 2890 }, { "epoch": 0.44238714613618974, "grad_norm": 2.3368147120736054, "learning_rate": 1.814429999543915e-05, "loss": 0.7882, "step": 2891 }, { "epoch": 0.442540168324407, "grad_norm": 2.1649407313959386, "learning_rate": 1.8142861750368157e-05, "loss": 0.697, "step": 2892 }, { "epoch": 0.44269319051262435, "grad_norm": 2.5351682813389043, "learning_rate": 1.8141423005212168e-05, "loss": 0.8544, "step": 2893 }, { "epoch": 0.4428462127008416, "grad_norm": 2.4679098802704553, "learning_rate": 1.8139983760059546e-05, "loss": 0.8602, "step": 2894 }, { "epoch": 0.4429992348890589, "grad_norm": 2.4500507125200657, "learning_rate": 1.8138544014998676e-05, "loss": 0.8638, "step": 2895 }, { "epoch": 0.4431522570772762, "grad_norm": 2.907302704924948, "learning_rate": 1.813710377011798e-05, "loss": 0.8541, "step": 2896 }, { "epoch": 0.4433052792654935, "grad_norm": 2.668616754615131, "learning_rate": 1.813566302550591e-05, "loss": 0.8274, "step": 2897 }, { "epoch": 0.4434583014537108, "grad_norm": 2.3327329427740255, "learning_rate": 1.8134221781250946e-05, "loss": 0.785, "step": 2898 }, { "epoch": 0.44361132364192807, "grad_norm": 2.276328368333612, "learning_rate": 1.81327800374416e-05, "loss": 0.7404, "step": 2899 }, { "epoch": 0.44376434583014535, "grad_norm": 2.196279587222736, "learning_rate": 1.8131337794166417e-05, "loss": 0.7991, "step": 2900 }, { "epoch": 0.4439173680183627, "grad_norm": 2.4529440496173684, "learning_rate": 1.812989505151397e-05, "loss": 0.8399, "step": 2901 }, { "epoch": 0.44407039020657996, "grad_norm": 2.2537061718280325, "learning_rate": 1.8128451809572865e-05, "loss": 0.7269, "step": 2902 }, { "epoch": 0.44422341239479723, "grad_norm": 2.322994259587146, "learning_rate": 1.8127008068431734e-05, "loss": 0.7474, "step": 2903 }, { "epoch": 0.4443764345830145, "grad_norm": 2.3975452426239405, "learning_rate": 1.8125563828179244e-05, "loss": 0.8157, "step": 2904 }, { "epoch": 0.44452945677123185, "grad_norm": 2.0799712070502197, "learning_rate": 1.8124119088904094e-05, "loss": 0.7465, "step": 2905 }, { "epoch": 0.4446824789594491, "grad_norm": 2.445692299342647, "learning_rate": 1.8122673850695007e-05, "loss": 0.833, "step": 2906 }, { "epoch": 0.4448355011476664, "grad_norm": 2.347753895749705, "learning_rate": 1.8121228113640744e-05, "loss": 0.6599, "step": 2907 }, { "epoch": 0.4449885233358837, "grad_norm": 2.294931489132973, "learning_rate": 1.8119781877830093e-05, "loss": 0.901, "step": 2908 }, { "epoch": 0.445141545524101, "grad_norm": 2.2286810517605655, "learning_rate": 1.8118335143351868e-05, "loss": 0.7316, "step": 2909 }, { "epoch": 0.4452945677123183, "grad_norm": 2.4912510299578057, "learning_rate": 1.8116887910294925e-05, "loss": 0.8443, "step": 2910 }, { "epoch": 0.44544758990053557, "grad_norm": 2.459604830145821, "learning_rate": 1.8115440178748142e-05, "loss": 0.7226, "step": 2911 }, { "epoch": 0.44560061208875285, "grad_norm": 2.6383148521577873, "learning_rate": 1.811399194880043e-05, "loss": 0.8549, "step": 2912 }, { "epoch": 0.4457536342769702, "grad_norm": 2.280528485015101, "learning_rate": 1.8112543220540733e-05, "loss": 0.8502, "step": 2913 }, { "epoch": 0.44590665646518746, "grad_norm": 2.4402696603587426, "learning_rate": 1.8111093994058017e-05, "loss": 0.8576, "step": 2914 }, { "epoch": 0.44605967865340473, "grad_norm": 2.2925670057229977, "learning_rate": 1.810964426944129e-05, "loss": 0.7551, "step": 2915 }, { "epoch": 0.446212700841622, "grad_norm": 2.2902795548425723, "learning_rate": 1.8108194046779584e-05, "loss": 0.7498, "step": 2916 }, { "epoch": 0.44636572302983935, "grad_norm": 2.510625241344344, "learning_rate": 1.8106743326161957e-05, "loss": 0.826, "step": 2917 }, { "epoch": 0.4465187452180566, "grad_norm": 2.4701981018218793, "learning_rate": 1.810529210767751e-05, "loss": 0.8336, "step": 2918 }, { "epoch": 0.4466717674062739, "grad_norm": 2.5259770545154865, "learning_rate": 1.8103840391415372e-05, "loss": 0.7502, "step": 2919 }, { "epoch": 0.4468247895944912, "grad_norm": 2.5249562507172287, "learning_rate": 1.810238817746469e-05, "loss": 0.7999, "step": 2920 }, { "epoch": 0.4469778117827085, "grad_norm": 2.4396115738869444, "learning_rate": 1.8100935465914654e-05, "loss": 0.8055, "step": 2921 }, { "epoch": 0.4471308339709258, "grad_norm": 2.3065700824046282, "learning_rate": 1.8099482256854477e-05, "loss": 0.8083, "step": 2922 }, { "epoch": 0.44728385615914307, "grad_norm": 2.349949003979491, "learning_rate": 1.8098028550373415e-05, "loss": 0.8084, "step": 2923 }, { "epoch": 0.44743687834736035, "grad_norm": 2.389430202044748, "learning_rate": 1.8096574346560736e-05, "loss": 0.782, "step": 2924 }, { "epoch": 0.4475899005355777, "grad_norm": 2.210746221263167, "learning_rate": 1.809511964550575e-05, "loss": 0.6941, "step": 2925 }, { "epoch": 0.44774292272379496, "grad_norm": 2.1626760051564897, "learning_rate": 1.8093664447297802e-05, "loss": 0.796, "step": 2926 }, { "epoch": 0.44789594491201223, "grad_norm": 2.251030501988546, "learning_rate": 1.809220875202626e-05, "loss": 0.8277, "step": 2927 }, { "epoch": 0.4480489671002295, "grad_norm": 2.1946018916323435, "learning_rate": 1.8090752559780515e-05, "loss": 0.7804, "step": 2928 }, { "epoch": 0.44820198928844684, "grad_norm": 2.139406362787451, "learning_rate": 1.8089295870650007e-05, "loss": 0.7893, "step": 2929 }, { "epoch": 0.4483550114766641, "grad_norm": 2.648584053744639, "learning_rate": 1.8087838684724196e-05, "loss": 0.7844, "step": 2930 }, { "epoch": 0.4485080336648814, "grad_norm": 2.8523250188221625, "learning_rate": 1.8086381002092573e-05, "loss": 0.8421, "step": 2931 }, { "epoch": 0.4486610558530987, "grad_norm": 2.316275777085334, "learning_rate": 1.8084922822844655e-05, "loss": 0.8628, "step": 2932 }, { "epoch": 0.448814078041316, "grad_norm": 2.7344218627285857, "learning_rate": 1.8083464147069997e-05, "loss": 0.7968, "step": 2933 }, { "epoch": 0.4489671002295333, "grad_norm": 2.5515179425547747, "learning_rate": 1.8082004974858186e-05, "loss": 0.6988, "step": 2934 }, { "epoch": 0.44912012241775057, "grad_norm": 2.274009925043369, "learning_rate": 1.808054530629883e-05, "loss": 0.8237, "step": 2935 }, { "epoch": 0.44927314460596784, "grad_norm": 2.2219946969109006, "learning_rate": 1.8079085141481577e-05, "loss": 0.8523, "step": 2936 }, { "epoch": 0.4494261667941852, "grad_norm": 2.2150504476863295, "learning_rate": 1.8077624480496102e-05, "loss": 0.8053, "step": 2937 }, { "epoch": 0.44957918898240246, "grad_norm": 2.419606454998503, "learning_rate": 1.8076163323432104e-05, "loss": 0.8921, "step": 2938 }, { "epoch": 0.44973221117061973, "grad_norm": 2.36212827728317, "learning_rate": 1.8074701670379324e-05, "loss": 0.7622, "step": 2939 }, { "epoch": 0.449885233358837, "grad_norm": 2.3272707660273726, "learning_rate": 1.8073239521427524e-05, "loss": 0.8535, "step": 2940 }, { "epoch": 0.45003825554705434, "grad_norm": 2.516712688408001, "learning_rate": 1.8071776876666506e-05, "loss": 0.8504, "step": 2941 }, { "epoch": 0.4501912777352716, "grad_norm": 2.223349887585654, "learning_rate": 1.807031373618609e-05, "loss": 0.8479, "step": 2942 }, { "epoch": 0.4503442999234889, "grad_norm": 2.251986025412135, "learning_rate": 1.806885010007614e-05, "loss": 0.7571, "step": 2943 }, { "epoch": 0.4504973221117062, "grad_norm": 2.910578161536275, "learning_rate": 1.8067385968426533e-05, "loss": 0.8832, "step": 2944 }, { "epoch": 0.4506503442999235, "grad_norm": 2.5658246641317435, "learning_rate": 1.8065921341327203e-05, "loss": 0.8364, "step": 2945 }, { "epoch": 0.4508033664881408, "grad_norm": 2.5441684589337923, "learning_rate": 1.806445621886808e-05, "loss": 0.8535, "step": 2946 }, { "epoch": 0.45095638867635807, "grad_norm": 2.346003869170097, "learning_rate": 1.806299060113916e-05, "loss": 0.7937, "step": 2947 }, { "epoch": 0.45110941086457534, "grad_norm": 2.3686802916882264, "learning_rate": 1.8061524488230443e-05, "loss": 0.9208, "step": 2948 }, { "epoch": 0.4512624330527927, "grad_norm": 2.315318897085286, "learning_rate": 1.8060057880231966e-05, "loss": 0.7576, "step": 2949 }, { "epoch": 0.45141545524100996, "grad_norm": 2.304204698051078, "learning_rate": 1.8058590777233807e-05, "loss": 0.8123, "step": 2950 }, { "epoch": 0.45156847742922723, "grad_norm": 2.4172138592186547, "learning_rate": 1.805712317932606e-05, "loss": 0.7145, "step": 2951 }, { "epoch": 0.4517214996174445, "grad_norm": 2.1016129747785213, "learning_rate": 1.8055655086598864e-05, "loss": 0.7545, "step": 2952 }, { "epoch": 0.45187452180566184, "grad_norm": 2.49976385009864, "learning_rate": 1.8054186499142373e-05, "loss": 0.8997, "step": 2953 }, { "epoch": 0.4520275439938791, "grad_norm": 2.1015201909120953, "learning_rate": 1.8052717417046776e-05, "loss": 0.7667, "step": 2954 }, { "epoch": 0.4521805661820964, "grad_norm": 2.191104113892472, "learning_rate": 1.8051247840402305e-05, "loss": 0.7319, "step": 2955 }, { "epoch": 0.4523335883703137, "grad_norm": 2.4834077756294, "learning_rate": 1.8049777769299207e-05, "loss": 0.8395, "step": 2956 }, { "epoch": 0.452486610558531, "grad_norm": 2.6233714667161534, "learning_rate": 1.8048307203827766e-05, "loss": 0.7377, "step": 2957 }, { "epoch": 0.4526396327467483, "grad_norm": 2.3852562290363855, "learning_rate": 1.804683614407829e-05, "loss": 0.8138, "step": 2958 }, { "epoch": 0.45279265493496557, "grad_norm": 2.405802744082427, "learning_rate": 1.8045364590141132e-05, "loss": 0.7731, "step": 2959 }, { "epoch": 0.45294567712318284, "grad_norm": 2.281624831951309, "learning_rate": 1.804389254210666e-05, "loss": 0.7172, "step": 2960 }, { "epoch": 0.4530986993114002, "grad_norm": 2.5650189896049183, "learning_rate": 1.8042420000065276e-05, "loss": 0.8563, "step": 2961 }, { "epoch": 0.45325172149961745, "grad_norm": 2.186990313937297, "learning_rate": 1.8040946964107423e-05, "loss": 0.7669, "step": 2962 }, { "epoch": 0.45340474368783473, "grad_norm": 2.5993364039741147, "learning_rate": 1.8039473434323555e-05, "loss": 0.8328, "step": 2963 }, { "epoch": 0.453557765876052, "grad_norm": 2.690424314638924, "learning_rate": 1.803799941080418e-05, "loss": 0.7906, "step": 2964 }, { "epoch": 0.45371078806426934, "grad_norm": 2.3615307843715803, "learning_rate": 1.803652489363981e-05, "loss": 0.8846, "step": 2965 }, { "epoch": 0.4538638102524866, "grad_norm": 2.3028327800647923, "learning_rate": 1.8035049882921008e-05, "loss": 0.7632, "step": 2966 }, { "epoch": 0.4540168324407039, "grad_norm": 2.6992236760908104, "learning_rate": 1.803357437873836e-05, "loss": 0.8173, "step": 2967 }, { "epoch": 0.4541698546289212, "grad_norm": 2.1987188003454072, "learning_rate": 1.8032098381182483e-05, "loss": 0.8117, "step": 2968 }, { "epoch": 0.4543228768171385, "grad_norm": 2.6263119915845605, "learning_rate": 1.8030621890344023e-05, "loss": 0.8381, "step": 2969 }, { "epoch": 0.4544758990053558, "grad_norm": 2.5134499182053296, "learning_rate": 1.8029144906313653e-05, "loss": 0.8841, "step": 2970 }, { "epoch": 0.45462892119357307, "grad_norm": 2.3417192495256867, "learning_rate": 1.8027667429182087e-05, "loss": 0.765, "step": 2971 }, { "epoch": 0.45478194338179034, "grad_norm": 2.3521797349686455, "learning_rate": 1.8026189459040063e-05, "loss": 0.7582, "step": 2972 }, { "epoch": 0.4549349655700077, "grad_norm": 2.2634494087856964, "learning_rate": 1.802471099597834e-05, "loss": 0.7566, "step": 2973 }, { "epoch": 0.45508798775822495, "grad_norm": 2.757423631425634, "learning_rate": 1.8023232040087723e-05, "loss": 0.7558, "step": 2974 }, { "epoch": 0.45524100994644223, "grad_norm": 2.4974299117094536, "learning_rate": 1.802175259145904e-05, "loss": 0.904, "step": 2975 }, { "epoch": 0.4553940321346595, "grad_norm": 2.4164896648530365, "learning_rate": 1.8020272650183152e-05, "loss": 0.7928, "step": 2976 }, { "epoch": 0.45554705432287684, "grad_norm": 2.278461747453805, "learning_rate": 1.801879221635094e-05, "loss": 0.7486, "step": 2977 }, { "epoch": 0.4557000765110941, "grad_norm": 2.325475613320422, "learning_rate": 1.8017311290053335e-05, "loss": 0.7439, "step": 2978 }, { "epoch": 0.4558530986993114, "grad_norm": 2.5560265554303316, "learning_rate": 1.8015829871381272e-05, "loss": 0.8628, "step": 2979 }, { "epoch": 0.4560061208875287, "grad_norm": 2.7690977507099417, "learning_rate": 1.8014347960425742e-05, "loss": 0.8506, "step": 2980 }, { "epoch": 0.456159143075746, "grad_norm": 2.4905184954891544, "learning_rate": 1.801286555727775e-05, "loss": 0.7848, "step": 2981 }, { "epoch": 0.4563121652639633, "grad_norm": 2.7241255638703317, "learning_rate": 1.8011382662028343e-05, "loss": 0.7943, "step": 2982 }, { "epoch": 0.45646518745218057, "grad_norm": 2.3442771277370404, "learning_rate": 1.800989927476858e-05, "loss": 0.8593, "step": 2983 }, { "epoch": 0.45661820964039784, "grad_norm": 2.3831996561185234, "learning_rate": 1.800841539558957e-05, "loss": 0.7683, "step": 2984 }, { "epoch": 0.4567712318286152, "grad_norm": 2.529846692344636, "learning_rate": 1.800693102458244e-05, "loss": 0.7843, "step": 2985 }, { "epoch": 0.45692425401683245, "grad_norm": 2.5994620405027686, "learning_rate": 1.8005446161838355e-05, "loss": 0.7773, "step": 2986 }, { "epoch": 0.45707727620504973, "grad_norm": 2.1902994542558947, "learning_rate": 1.8003960807448505e-05, "loss": 0.8147, "step": 2987 }, { "epoch": 0.457230298393267, "grad_norm": 2.666362830144391, "learning_rate": 1.800247496150411e-05, "loss": 0.8922, "step": 2988 }, { "epoch": 0.45738332058148434, "grad_norm": 2.348016100092245, "learning_rate": 1.800098862409642e-05, "loss": 0.8225, "step": 2989 }, { "epoch": 0.4575363427697016, "grad_norm": 2.6178249141604364, "learning_rate": 1.7999501795316712e-05, "loss": 0.7883, "step": 2990 }, { "epoch": 0.4576893649579189, "grad_norm": 2.6567570604699315, "learning_rate": 1.7998014475256313e-05, "loss": 0.8205, "step": 2991 }, { "epoch": 0.4578423871461362, "grad_norm": 2.1047805071412014, "learning_rate": 1.7996526664006553e-05, "loss": 0.6739, "step": 2992 }, { "epoch": 0.45799540933435345, "grad_norm": 2.404156937766429, "learning_rate": 1.7995038361658813e-05, "loss": 0.687, "step": 2993 }, { "epoch": 0.4581484315225708, "grad_norm": 2.3096394834735277, "learning_rate": 1.7993549568304485e-05, "loss": 0.8982, "step": 2994 }, { "epoch": 0.45830145371078806, "grad_norm": 2.3445700625391535, "learning_rate": 1.799206028403501e-05, "loss": 0.8137, "step": 2995 }, { "epoch": 0.45845447589900534, "grad_norm": 2.370222594067083, "learning_rate": 1.7990570508941845e-05, "loss": 0.6931, "step": 2996 }, { "epoch": 0.4586074980872226, "grad_norm": 2.1936059754346706, "learning_rate": 1.7989080243116486e-05, "loss": 0.6917, "step": 2997 }, { "epoch": 0.45876052027543995, "grad_norm": 2.139823927624093, "learning_rate": 1.7987589486650457e-05, "loss": 0.7353, "step": 2998 }, { "epoch": 0.45891354246365723, "grad_norm": 2.418548770240328, "learning_rate": 1.7986098239635313e-05, "loss": 0.7797, "step": 2999 }, { "epoch": 0.4590665646518745, "grad_norm": 2.427079798960257, "learning_rate": 1.798460650216263e-05, "loss": 0.8436, "step": 3000 }, { "epoch": 0.4592195868400918, "grad_norm": 3.182514965849544, "learning_rate": 1.7983114274324026e-05, "loss": 0.9386, "step": 3001 }, { "epoch": 0.4593726090283091, "grad_norm": 2.396757540197267, "learning_rate": 1.7981621556211146e-05, "loss": 0.7159, "step": 3002 }, { "epoch": 0.4595256312165264, "grad_norm": 2.5679978233582004, "learning_rate": 1.7980128347915663e-05, "loss": 0.8365, "step": 3003 }, { "epoch": 0.4596786534047437, "grad_norm": 2.38552520369705, "learning_rate": 1.7978634649529276e-05, "loss": 0.9041, "step": 3004 }, { "epoch": 0.45983167559296095, "grad_norm": 2.2016042252587416, "learning_rate": 1.7977140461143724e-05, "loss": 0.7491, "step": 3005 }, { "epoch": 0.4599846977811783, "grad_norm": 2.2356987100497134, "learning_rate": 1.797564578285077e-05, "loss": 0.7265, "step": 3006 }, { "epoch": 0.46013771996939556, "grad_norm": 2.2342192382296, "learning_rate": 1.797415061474221e-05, "loss": 0.8228, "step": 3007 }, { "epoch": 0.46029074215761284, "grad_norm": 2.4500043399004334, "learning_rate": 1.7972654956909864e-05, "loss": 0.755, "step": 3008 }, { "epoch": 0.4604437643458301, "grad_norm": 2.0413908159279393, "learning_rate": 1.7971158809445586e-05, "loss": 0.6604, "step": 3009 }, { "epoch": 0.46059678653404745, "grad_norm": 2.4438109905320067, "learning_rate": 1.7969662172441266e-05, "loss": 0.7801, "step": 3010 }, { "epoch": 0.46074980872226473, "grad_norm": 2.5796430716040493, "learning_rate": 1.796816504598881e-05, "loss": 0.7737, "step": 3011 }, { "epoch": 0.460902830910482, "grad_norm": 2.4072337562016366, "learning_rate": 1.7966667430180168e-05, "loss": 0.7351, "step": 3012 }, { "epoch": 0.4610558530986993, "grad_norm": 2.4134621653012025, "learning_rate": 1.7965169325107316e-05, "loss": 0.679, "step": 3013 }, { "epoch": 0.4612088752869166, "grad_norm": 2.382826189730748, "learning_rate": 1.7963670730862253e-05, "loss": 0.8454, "step": 3014 }, { "epoch": 0.4613618974751339, "grad_norm": 2.461863361146385, "learning_rate": 1.796217164753702e-05, "loss": 0.8673, "step": 3015 }, { "epoch": 0.4615149196633512, "grad_norm": 2.162119030473225, "learning_rate": 1.7960672075223674e-05, "loss": 0.7094, "step": 3016 }, { "epoch": 0.46166794185156845, "grad_norm": 2.4495640775338123, "learning_rate": 1.7959172014014315e-05, "loss": 0.8153, "step": 3017 }, { "epoch": 0.4618209640397858, "grad_norm": 2.8686927876563653, "learning_rate": 1.7957671464001064e-05, "loss": 0.7448, "step": 3018 }, { "epoch": 0.46197398622800306, "grad_norm": 2.375874766610349, "learning_rate": 1.7956170425276084e-05, "loss": 0.7054, "step": 3019 }, { "epoch": 0.46212700841622034, "grad_norm": 2.2043885507178445, "learning_rate": 1.7954668897931545e-05, "loss": 0.8179, "step": 3020 }, { "epoch": 0.4622800306044376, "grad_norm": 2.2472971973036433, "learning_rate": 1.7953166882059677e-05, "loss": 0.8027, "step": 3021 }, { "epoch": 0.46243305279265495, "grad_norm": 2.1792083036915866, "learning_rate": 1.7951664377752714e-05, "loss": 0.6854, "step": 3022 }, { "epoch": 0.46258607498087223, "grad_norm": 2.4282694085901917, "learning_rate": 1.7950161385102936e-05, "loss": 0.8682, "step": 3023 }, { "epoch": 0.4627390971690895, "grad_norm": 2.5164884020527114, "learning_rate": 1.7948657904202645e-05, "loss": 0.7298, "step": 3024 }, { "epoch": 0.4628921193573068, "grad_norm": 2.2296415956211653, "learning_rate": 1.7947153935144182e-05, "loss": 0.7645, "step": 3025 }, { "epoch": 0.4630451415455241, "grad_norm": 2.4863277949035556, "learning_rate": 1.79456494780199e-05, "loss": 0.8104, "step": 3026 }, { "epoch": 0.4631981637337414, "grad_norm": 2.5801820662776995, "learning_rate": 1.79441445329222e-05, "loss": 0.8021, "step": 3027 }, { "epoch": 0.4633511859219587, "grad_norm": 2.6857176336923536, "learning_rate": 1.794263909994351e-05, "loss": 0.8011, "step": 3028 }, { "epoch": 0.46350420811017595, "grad_norm": 2.5215164941271895, "learning_rate": 1.794113317917628e-05, "loss": 0.9214, "step": 3029 }, { "epoch": 0.4636572302983933, "grad_norm": 2.3776208831265877, "learning_rate": 1.7939626770712996e-05, "loss": 0.7993, "step": 3030 }, { "epoch": 0.46381025248661056, "grad_norm": 2.3370137755310134, "learning_rate": 1.7938119874646173e-05, "loss": 0.9326, "step": 3031 }, { "epoch": 0.46396327467482784, "grad_norm": 2.2444810838602196, "learning_rate": 1.793661249106835e-05, "loss": 0.78, "step": 3032 }, { "epoch": 0.4641162968630451, "grad_norm": 2.626977361377603, "learning_rate": 1.793510462007211e-05, "loss": 0.8295, "step": 3033 }, { "epoch": 0.46426931905126245, "grad_norm": 2.279663603934519, "learning_rate": 1.793359626175005e-05, "loss": 0.7931, "step": 3034 }, { "epoch": 0.46442234123947973, "grad_norm": 2.2209566116817783, "learning_rate": 1.793208741619481e-05, "loss": 0.8991, "step": 3035 }, { "epoch": 0.464575363427697, "grad_norm": 2.3367089021530467, "learning_rate": 1.793057808349905e-05, "loss": 0.6924, "step": 3036 }, { "epoch": 0.4647283856159143, "grad_norm": 2.416662478259207, "learning_rate": 1.7929068263755465e-05, "loss": 0.9007, "step": 3037 }, { "epoch": 0.4648814078041316, "grad_norm": 2.3202236234576756, "learning_rate": 1.792755795705678e-05, "loss": 0.8085, "step": 3038 }, { "epoch": 0.4650344299923489, "grad_norm": 2.2844609011360815, "learning_rate": 1.792604716349575e-05, "loss": 0.5956, "step": 3039 }, { "epoch": 0.4651874521805662, "grad_norm": 2.1019501713247006, "learning_rate": 1.7924535883165154e-05, "loss": 0.7306, "step": 3040 }, { "epoch": 0.46534047436878345, "grad_norm": 2.371476838905052, "learning_rate": 1.7923024116157806e-05, "loss": 0.7912, "step": 3041 }, { "epoch": 0.4654934965570008, "grad_norm": 2.2453509974503847, "learning_rate": 1.7921511862566557e-05, "loss": 0.7831, "step": 3042 }, { "epoch": 0.46564651874521806, "grad_norm": 2.516526580358889, "learning_rate": 1.7919999122484273e-05, "loss": 0.8007, "step": 3043 }, { "epoch": 0.46579954093343534, "grad_norm": 2.3714993400159288, "learning_rate": 1.791848589600386e-05, "loss": 0.8676, "step": 3044 }, { "epoch": 0.4659525631216526, "grad_norm": 2.4338479274309766, "learning_rate": 1.791697218321825e-05, "loss": 0.7427, "step": 3045 }, { "epoch": 0.46610558530986995, "grad_norm": 2.377571211070433, "learning_rate": 1.7915457984220406e-05, "loss": 0.8151, "step": 3046 }, { "epoch": 0.46625860749808723, "grad_norm": 2.3621546987748907, "learning_rate": 1.7913943299103325e-05, "loss": 0.7547, "step": 3047 }, { "epoch": 0.4664116296863045, "grad_norm": 2.339112355183538, "learning_rate": 1.7912428127960025e-05, "loss": 0.83, "step": 3048 }, { "epoch": 0.4665646518745218, "grad_norm": 2.685139879857841, "learning_rate": 1.791091247088356e-05, "loss": 0.8156, "step": 3049 }, { "epoch": 0.4667176740627391, "grad_norm": 2.531331145019246, "learning_rate": 1.790939632796701e-05, "loss": 0.7796, "step": 3050 }, { "epoch": 0.4668706962509564, "grad_norm": 2.4861225552288864, "learning_rate": 1.7907879699303494e-05, "loss": 0.7717, "step": 3051 }, { "epoch": 0.4670237184391737, "grad_norm": 2.310760243025036, "learning_rate": 1.790636258498615e-05, "loss": 0.7763, "step": 3052 }, { "epoch": 0.46717674062739095, "grad_norm": 2.3903087000299084, "learning_rate": 1.7904844985108146e-05, "loss": 0.7714, "step": 3053 }, { "epoch": 0.4673297628156083, "grad_norm": 2.2206279036603482, "learning_rate": 1.790332689976269e-05, "loss": 0.7524, "step": 3054 }, { "epoch": 0.46748278500382556, "grad_norm": 2.3187476587864095, "learning_rate": 1.790180832904301e-05, "loss": 0.8059, "step": 3055 }, { "epoch": 0.46763580719204284, "grad_norm": 2.710134518322272, "learning_rate": 1.7900289273042367e-05, "loss": 0.7208, "step": 3056 }, { "epoch": 0.4677888293802601, "grad_norm": 2.35421789922996, "learning_rate": 1.7898769731854058e-05, "loss": 0.7486, "step": 3057 }, { "epoch": 0.46794185156847745, "grad_norm": 2.286655013492438, "learning_rate": 1.7897249705571397e-05, "loss": 0.7359, "step": 3058 }, { "epoch": 0.46809487375669473, "grad_norm": 2.11297135275565, "learning_rate": 1.7895729194287733e-05, "loss": 0.6781, "step": 3059 }, { "epoch": 0.468247895944912, "grad_norm": 2.406508933681986, "learning_rate": 1.789420819809646e-05, "loss": 0.7598, "step": 3060 }, { "epoch": 0.4684009181331293, "grad_norm": 2.6366510236837075, "learning_rate": 1.7892686717090972e-05, "loss": 0.9273, "step": 3061 }, { "epoch": 0.4685539403213466, "grad_norm": 2.532411121338478, "learning_rate": 1.789116475136472e-05, "loss": 0.7786, "step": 3062 }, { "epoch": 0.4687069625095639, "grad_norm": 2.594310936151825, "learning_rate": 1.788964230101117e-05, "loss": 0.8882, "step": 3063 }, { "epoch": 0.4688599846977812, "grad_norm": 2.358180777677221, "learning_rate": 1.788811936612382e-05, "loss": 0.835, "step": 3064 }, { "epoch": 0.46901300688599845, "grad_norm": 2.2454409418760903, "learning_rate": 1.7886595946796202e-05, "loss": 0.8277, "step": 3065 }, { "epoch": 0.4691660290742158, "grad_norm": 2.329341381250108, "learning_rate": 1.7885072043121874e-05, "loss": 0.8119, "step": 3066 }, { "epoch": 0.46931905126243306, "grad_norm": 1.9843107842254468, "learning_rate": 1.7883547655194426e-05, "loss": 0.7151, "step": 3067 }, { "epoch": 0.46947207345065034, "grad_norm": 2.3293478291000755, "learning_rate": 1.7882022783107475e-05, "loss": 0.7741, "step": 3068 }, { "epoch": 0.4696250956388676, "grad_norm": 2.4350391900241997, "learning_rate": 1.7880497426954673e-05, "loss": 0.8019, "step": 3069 }, { "epoch": 0.46977811782708495, "grad_norm": 2.144141917751492, "learning_rate": 1.787897158682969e-05, "loss": 0.7007, "step": 3070 }, { "epoch": 0.46993114001530223, "grad_norm": 2.174973634706149, "learning_rate": 1.787744526282625e-05, "loss": 0.792, "step": 3071 }, { "epoch": 0.4700841622035195, "grad_norm": 2.618268443070123, "learning_rate": 1.7875918455038068e-05, "loss": 0.9176, "step": 3072 }, { "epoch": 0.4702371843917368, "grad_norm": 2.128847386701305, "learning_rate": 1.7874391163558926e-05, "loss": 0.7339, "step": 3073 }, { "epoch": 0.4703902065799541, "grad_norm": 2.4522718758498314, "learning_rate": 1.787286338848262e-05, "loss": 0.8111, "step": 3074 }, { "epoch": 0.4705432287681714, "grad_norm": 2.2474549670739736, "learning_rate": 1.7871335129902974e-05, "loss": 0.8114, "step": 3075 }, { "epoch": 0.4706962509563887, "grad_norm": 2.4366628479396724, "learning_rate": 1.7869806387913845e-05, "loss": 0.8638, "step": 3076 }, { "epoch": 0.47084927314460595, "grad_norm": 2.220474175033701, "learning_rate": 1.7868277162609116e-05, "loss": 0.7199, "step": 3077 }, { "epoch": 0.4710022953328233, "grad_norm": 2.4551268926851, "learning_rate": 1.786674745408271e-05, "loss": 0.7692, "step": 3078 }, { "epoch": 0.47115531752104056, "grad_norm": 2.464234476027355, "learning_rate": 1.7865217262428564e-05, "loss": 0.8443, "step": 3079 }, { "epoch": 0.47130833970925784, "grad_norm": 2.422126071311725, "learning_rate": 1.786368658774066e-05, "loss": 0.742, "step": 3080 }, { "epoch": 0.4714613618974751, "grad_norm": 2.5067019319029566, "learning_rate": 1.7862155430112997e-05, "loss": 0.7451, "step": 3081 }, { "epoch": 0.47161438408569245, "grad_norm": 2.260090103969428, "learning_rate": 1.7860623789639614e-05, "loss": 0.715, "step": 3082 }, { "epoch": 0.47176740627390973, "grad_norm": 2.57018335192414, "learning_rate": 1.7859091666414574e-05, "loss": 0.8114, "step": 3083 }, { "epoch": 0.471920428462127, "grad_norm": 2.3585692516115517, "learning_rate": 1.7857559060531967e-05, "loss": 0.7992, "step": 3084 }, { "epoch": 0.4720734506503443, "grad_norm": 2.35928380554881, "learning_rate": 1.785602597208592e-05, "loss": 0.8346, "step": 3085 }, { "epoch": 0.4722264728385616, "grad_norm": 2.462727119352491, "learning_rate": 1.7854492401170587e-05, "loss": 0.6754, "step": 3086 }, { "epoch": 0.4723794950267789, "grad_norm": 2.196818002617913, "learning_rate": 1.7852958347880147e-05, "loss": 0.832, "step": 3087 }, { "epoch": 0.4725325172149962, "grad_norm": 2.449395292618889, "learning_rate": 1.7851423812308814e-05, "loss": 0.8474, "step": 3088 }, { "epoch": 0.47268553940321345, "grad_norm": 2.0127666982797643, "learning_rate": 1.784988879455083e-05, "loss": 0.6382, "step": 3089 }, { "epoch": 0.4728385615914308, "grad_norm": 2.339434631709559, "learning_rate": 1.7848353294700467e-05, "loss": 0.7453, "step": 3090 }, { "epoch": 0.47299158377964806, "grad_norm": 2.36812382885095, "learning_rate": 1.7846817312852025e-05, "loss": 0.7293, "step": 3091 }, { "epoch": 0.47314460596786534, "grad_norm": 2.3238439044790975, "learning_rate": 1.7845280849099835e-05, "loss": 0.8259, "step": 3092 }, { "epoch": 0.4732976281560826, "grad_norm": 2.26683061558248, "learning_rate": 1.7843743903538255e-05, "loss": 0.7869, "step": 3093 }, { "epoch": 0.47345065034429995, "grad_norm": 2.2685721302716657, "learning_rate": 1.784220647626168e-05, "loss": 0.7982, "step": 3094 }, { "epoch": 0.47360367253251723, "grad_norm": 2.1515562913610893, "learning_rate": 1.7840668567364524e-05, "loss": 0.7836, "step": 3095 }, { "epoch": 0.4737566947207345, "grad_norm": 2.2986525906713404, "learning_rate": 1.783913017694124e-05, "loss": 0.7999, "step": 3096 }, { "epoch": 0.4739097169089518, "grad_norm": 2.14804110147054, "learning_rate": 1.7837591305086304e-05, "loss": 0.8341, "step": 3097 }, { "epoch": 0.4740627390971691, "grad_norm": 2.032811223621796, "learning_rate": 1.7836051951894226e-05, "loss": 0.7648, "step": 3098 }, { "epoch": 0.4742157612853864, "grad_norm": 2.366229763538631, "learning_rate": 1.7834512117459542e-05, "loss": 0.7466, "step": 3099 }, { "epoch": 0.4743687834736037, "grad_norm": 2.4477203066920836, "learning_rate": 1.783297180187682e-05, "loss": 0.8449, "step": 3100 }, { "epoch": 0.47452180566182095, "grad_norm": 2.4758677333507015, "learning_rate": 1.7831431005240655e-05, "loss": 0.879, "step": 3101 }, { "epoch": 0.47467482785003823, "grad_norm": 2.175130815541815, "learning_rate": 1.782988972764568e-05, "loss": 0.8596, "step": 3102 }, { "epoch": 0.47482785003825556, "grad_norm": 2.6280119624613936, "learning_rate": 1.7828347969186542e-05, "loss": 0.8391, "step": 3103 }, { "epoch": 0.47498087222647284, "grad_norm": 2.3033026539480175, "learning_rate": 1.782680572995793e-05, "loss": 0.8477, "step": 3104 }, { "epoch": 0.4751338944146901, "grad_norm": 2.3324067024311006, "learning_rate": 1.7825263010054565e-05, "loss": 0.9281, "step": 3105 }, { "epoch": 0.4752869166029074, "grad_norm": 2.4416725086218873, "learning_rate": 1.7823719809571183e-05, "loss": 0.7544, "step": 3106 }, { "epoch": 0.4754399387911247, "grad_norm": 2.3461897741365663, "learning_rate": 1.782217612860256e-05, "loss": 0.7901, "step": 3107 }, { "epoch": 0.475592960979342, "grad_norm": 2.258825700505424, "learning_rate": 1.7820631967243502e-05, "loss": 0.74, "step": 3108 }, { "epoch": 0.4757459831675593, "grad_norm": 2.236350731521977, "learning_rate": 1.781908732558884e-05, "loss": 0.8043, "step": 3109 }, { "epoch": 0.47589900535577656, "grad_norm": 2.306541883073562, "learning_rate": 1.7817542203733435e-05, "loss": 0.7783, "step": 3110 }, { "epoch": 0.4760520275439939, "grad_norm": 2.132024776419034, "learning_rate": 1.781599660177218e-05, "loss": 0.7408, "step": 3111 }, { "epoch": 0.47620504973221117, "grad_norm": 2.3182129805418006, "learning_rate": 1.7814450519800003e-05, "loss": 0.8, "step": 3112 }, { "epoch": 0.47635807192042845, "grad_norm": 2.7774847658958324, "learning_rate": 1.7812903957911845e-05, "loss": 0.8339, "step": 3113 }, { "epoch": 0.4765110941086457, "grad_norm": 2.4081664390527022, "learning_rate": 1.781135691620269e-05, "loss": 0.8104, "step": 3114 }, { "epoch": 0.47666411629686306, "grad_norm": 2.2680414415315977, "learning_rate": 1.7809809394767553e-05, "loss": 0.7734, "step": 3115 }, { "epoch": 0.47681713848508034, "grad_norm": 2.5455145450874874, "learning_rate": 1.7808261393701466e-05, "loss": 0.6636, "step": 3116 }, { "epoch": 0.4769701606732976, "grad_norm": 2.402374800475588, "learning_rate": 1.78067129130995e-05, "loss": 0.8384, "step": 3117 }, { "epoch": 0.4771231828615149, "grad_norm": 2.3897889539372366, "learning_rate": 1.7805163953056755e-05, "loss": 0.8397, "step": 3118 }, { "epoch": 0.4772762050497322, "grad_norm": 2.4403644458499887, "learning_rate": 1.7803614513668357e-05, "loss": 0.8328, "step": 3119 }, { "epoch": 0.4774292272379495, "grad_norm": 2.298660833843685, "learning_rate": 1.7802064595029463e-05, "loss": 0.7801, "step": 3120 }, { "epoch": 0.4775822494261668, "grad_norm": 2.488715580053673, "learning_rate": 1.7800514197235262e-05, "loss": 0.9101, "step": 3121 }, { "epoch": 0.47773527161438406, "grad_norm": 2.377244202377784, "learning_rate": 1.7798963320380965e-05, "loss": 0.7036, "step": 3122 }, { "epoch": 0.4778882938026014, "grad_norm": 2.4289693996129165, "learning_rate": 1.7797411964561825e-05, "loss": 0.814, "step": 3123 }, { "epoch": 0.47804131599081867, "grad_norm": 2.5574585527951905, "learning_rate": 1.7795860129873113e-05, "loss": 0.8414, "step": 3124 }, { "epoch": 0.47819433817903595, "grad_norm": 2.145881998828633, "learning_rate": 1.779430781641013e-05, "loss": 0.7363, "step": 3125 }, { "epoch": 0.4783473603672532, "grad_norm": 2.2443790813070317, "learning_rate": 1.7792755024268213e-05, "loss": 0.8263, "step": 3126 }, { "epoch": 0.47850038255547056, "grad_norm": 2.4804357389915483, "learning_rate": 1.7791201753542725e-05, "loss": 0.9297, "step": 3127 }, { "epoch": 0.47865340474368784, "grad_norm": 2.409155080634445, "learning_rate": 1.778964800432906e-05, "loss": 0.8459, "step": 3128 }, { "epoch": 0.4788064269319051, "grad_norm": 2.0586342296667213, "learning_rate": 1.7788093776722634e-05, "loss": 0.7397, "step": 3129 }, { "epoch": 0.4789594491201224, "grad_norm": 2.3936689258666943, "learning_rate": 1.7786539070818903e-05, "loss": 0.8269, "step": 3130 }, { "epoch": 0.4791124713083397, "grad_norm": 2.359822665681357, "learning_rate": 1.778498388671335e-05, "loss": 0.7837, "step": 3131 }, { "epoch": 0.479265493496557, "grad_norm": 2.6016378686683455, "learning_rate": 1.778342822450148e-05, "loss": 0.8188, "step": 3132 }, { "epoch": 0.4794185156847743, "grad_norm": 2.2391289244689783, "learning_rate": 1.778187208427883e-05, "loss": 0.7139, "step": 3133 }, { "epoch": 0.47957153787299156, "grad_norm": 2.4395509973947904, "learning_rate": 1.7780315466140975e-05, "loss": 0.6954, "step": 3134 }, { "epoch": 0.4797245600612089, "grad_norm": 2.3167840415266783, "learning_rate": 1.777875837018351e-05, "loss": 0.8495, "step": 3135 }, { "epoch": 0.47987758224942617, "grad_norm": 2.207197768229709, "learning_rate": 1.7777200796502065e-05, "loss": 0.7324, "step": 3136 }, { "epoch": 0.48003060443764345, "grad_norm": 2.3388895918578405, "learning_rate": 1.7775642745192293e-05, "loss": 0.8092, "step": 3137 }, { "epoch": 0.4801836266258607, "grad_norm": 2.5991436129658134, "learning_rate": 1.777408421634988e-05, "loss": 0.7951, "step": 3138 }, { "epoch": 0.48033664881407806, "grad_norm": 2.614271482790651, "learning_rate": 1.7772525210070545e-05, "loss": 0.9615, "step": 3139 }, { "epoch": 0.48048967100229534, "grad_norm": 2.280529742595915, "learning_rate": 1.777096572645003e-05, "loss": 0.8814, "step": 3140 }, { "epoch": 0.4806426931905126, "grad_norm": 2.22455747968137, "learning_rate": 1.7769405765584112e-05, "loss": 0.7447, "step": 3141 }, { "epoch": 0.4807957153787299, "grad_norm": 1.9704088622500777, "learning_rate": 1.7767845327568585e-05, "loss": 0.6619, "step": 3142 }, { "epoch": 0.4809487375669472, "grad_norm": 2.440214460615727, "learning_rate": 1.7766284412499295e-05, "loss": 0.829, "step": 3143 }, { "epoch": 0.4811017597551645, "grad_norm": 2.474091480639532, "learning_rate": 1.7764723020472098e-05, "loss": 0.8021, "step": 3144 }, { "epoch": 0.4812547819433818, "grad_norm": 2.159308906503231, "learning_rate": 1.7763161151582878e-05, "loss": 0.7575, "step": 3145 }, { "epoch": 0.48140780413159906, "grad_norm": 2.2634318435235814, "learning_rate": 1.7761598805927564e-05, "loss": 0.7508, "step": 3146 }, { "epoch": 0.4815608263198164, "grad_norm": 2.2808387798097574, "learning_rate": 1.7760035983602107e-05, "loss": 0.8432, "step": 3147 }, { "epoch": 0.48171384850803367, "grad_norm": 2.2333734359872213, "learning_rate": 1.775847268470248e-05, "loss": 0.7977, "step": 3148 }, { "epoch": 0.48186687069625095, "grad_norm": 2.175497185361418, "learning_rate": 1.7756908909324697e-05, "loss": 0.8576, "step": 3149 }, { "epoch": 0.4820198928844682, "grad_norm": 2.141222377290015, "learning_rate": 1.7755344657564792e-05, "loss": 0.7083, "step": 3150 }, { "epoch": 0.48217291507268556, "grad_norm": 2.204712165087772, "learning_rate": 1.7753779929518834e-05, "loss": 0.7488, "step": 3151 }, { "epoch": 0.48232593726090284, "grad_norm": 2.6909546478854023, "learning_rate": 1.7752214725282912e-05, "loss": 0.8871, "step": 3152 }, { "epoch": 0.4824789594491201, "grad_norm": 2.4007662255888182, "learning_rate": 1.775064904495316e-05, "loss": 0.8097, "step": 3153 }, { "epoch": 0.4826319816373374, "grad_norm": 2.281531360791035, "learning_rate": 1.774908288862573e-05, "loss": 0.6746, "step": 3154 }, { "epoch": 0.4827850038255547, "grad_norm": 2.2461468476777973, "learning_rate": 1.7747516256396805e-05, "loss": 0.7814, "step": 3155 }, { "epoch": 0.482938026013772, "grad_norm": 2.8940583997852145, "learning_rate": 1.7745949148362598e-05, "loss": 0.9052, "step": 3156 }, { "epoch": 0.4830910482019893, "grad_norm": 2.151146909327901, "learning_rate": 1.774438156461935e-05, "loss": 0.6908, "step": 3157 }, { "epoch": 0.48324407039020656, "grad_norm": 2.215843800992492, "learning_rate": 1.7742813505263337e-05, "loss": 0.7155, "step": 3158 }, { "epoch": 0.4833970925784239, "grad_norm": 2.7562581153783814, "learning_rate": 1.7741244970390854e-05, "loss": 0.8132, "step": 3159 }, { "epoch": 0.48355011476664117, "grad_norm": 2.151781872160964, "learning_rate": 1.7739675960098233e-05, "loss": 0.6925, "step": 3160 }, { "epoch": 0.48370313695485845, "grad_norm": 2.31679038784991, "learning_rate": 1.7738106474481838e-05, "loss": 0.8587, "step": 3161 }, { "epoch": 0.4838561591430757, "grad_norm": 2.2757888463384135, "learning_rate": 1.773653651363805e-05, "loss": 0.7059, "step": 3162 }, { "epoch": 0.48400918133129306, "grad_norm": 2.6012748147156315, "learning_rate": 1.7734966077663288e-05, "loss": 0.7631, "step": 3163 }, { "epoch": 0.48416220351951034, "grad_norm": 2.27116501374633, "learning_rate": 1.7733395166654e-05, "loss": 0.7921, "step": 3164 }, { "epoch": 0.4843152257077276, "grad_norm": 2.463737754185167, "learning_rate": 1.7731823780706664e-05, "loss": 0.8805, "step": 3165 }, { "epoch": 0.4844682478959449, "grad_norm": 2.457864756479185, "learning_rate": 1.773025191991778e-05, "loss": 0.8466, "step": 3166 }, { "epoch": 0.4846212700841622, "grad_norm": 2.3691514278135695, "learning_rate": 1.7728679584383884e-05, "loss": 0.7611, "step": 3167 }, { "epoch": 0.4847742922723795, "grad_norm": 2.6690788196830746, "learning_rate": 1.7727106774201542e-05, "loss": 0.7784, "step": 3168 }, { "epoch": 0.4849273144605968, "grad_norm": 3.050441302794346, "learning_rate": 1.7725533489467345e-05, "loss": 0.9023, "step": 3169 }, { "epoch": 0.48508033664881406, "grad_norm": 2.242247145906977, "learning_rate": 1.7723959730277912e-05, "loss": 0.6486, "step": 3170 }, { "epoch": 0.4852333588370314, "grad_norm": 2.171151620961211, "learning_rate": 1.7722385496729896e-05, "loss": 0.7588, "step": 3171 }, { "epoch": 0.48538638102524867, "grad_norm": 2.1872229724795673, "learning_rate": 1.7720810788919974e-05, "loss": 0.7352, "step": 3172 }, { "epoch": 0.48553940321346595, "grad_norm": 2.2381038298991487, "learning_rate": 1.7719235606944864e-05, "loss": 0.777, "step": 3173 }, { "epoch": 0.4856924254016832, "grad_norm": 2.1952939393056026, "learning_rate": 1.7717659950901293e-05, "loss": 0.738, "step": 3174 }, { "epoch": 0.48584544758990056, "grad_norm": 2.4037553430751863, "learning_rate": 1.771608382088603e-05, "loss": 0.7518, "step": 3175 }, { "epoch": 0.48599846977811784, "grad_norm": 2.298974912728606, "learning_rate": 1.771450721699588e-05, "loss": 0.7406, "step": 3176 }, { "epoch": 0.4861514919663351, "grad_norm": 2.4675400243903214, "learning_rate": 1.7712930139327663e-05, "loss": 0.8112, "step": 3177 }, { "epoch": 0.4863045141545524, "grad_norm": 2.332643028056019, "learning_rate": 1.771135258797823e-05, "loss": 0.7814, "step": 3178 }, { "epoch": 0.4864575363427697, "grad_norm": 2.3206125208022685, "learning_rate": 1.7709774563044467e-05, "loss": 0.7689, "step": 3179 }, { "epoch": 0.486610558530987, "grad_norm": 2.2311701929496537, "learning_rate": 1.7708196064623288e-05, "loss": 0.8517, "step": 3180 }, { "epoch": 0.4867635807192043, "grad_norm": 1.9798751484367991, "learning_rate": 1.7706617092811634e-05, "loss": 0.7107, "step": 3181 }, { "epoch": 0.48691660290742156, "grad_norm": 2.665140548734543, "learning_rate": 1.7705037647706476e-05, "loss": 0.8483, "step": 3182 }, { "epoch": 0.4870696250956389, "grad_norm": 2.3060955142676525, "learning_rate": 1.7703457729404814e-05, "loss": 0.7595, "step": 3183 }, { "epoch": 0.48722264728385617, "grad_norm": 2.201706180521868, "learning_rate": 1.7701877338003677e-05, "loss": 0.6579, "step": 3184 }, { "epoch": 0.48737566947207345, "grad_norm": 2.4863780424056436, "learning_rate": 1.7700296473600122e-05, "loss": 0.8218, "step": 3185 }, { "epoch": 0.4875286916602907, "grad_norm": 2.5234475881120972, "learning_rate": 1.7698715136291238e-05, "loss": 0.9158, "step": 3186 }, { "epoch": 0.48768171384850806, "grad_norm": 2.230782070237414, "learning_rate": 1.769713332617414e-05, "loss": 0.8134, "step": 3187 }, { "epoch": 0.48783473603672534, "grad_norm": 2.555470250956034, "learning_rate": 1.7695551043345972e-05, "loss": 0.7862, "step": 3188 }, { "epoch": 0.4879877582249426, "grad_norm": 2.435982366105735, "learning_rate": 1.7693968287903906e-05, "loss": 0.8112, "step": 3189 }, { "epoch": 0.4881407804131599, "grad_norm": 2.079276970212424, "learning_rate": 1.7692385059945154e-05, "loss": 0.7436, "step": 3190 }, { "epoch": 0.4882938026013772, "grad_norm": 2.503362168438321, "learning_rate": 1.769080135956694e-05, "loss": 0.8012, "step": 3191 }, { "epoch": 0.4884468247895945, "grad_norm": 2.3646623422395905, "learning_rate": 1.768921718686653e-05, "loss": 0.8065, "step": 3192 }, { "epoch": 0.4885998469778118, "grad_norm": 2.428668920391647, "learning_rate": 1.7687632541941208e-05, "loss": 0.8703, "step": 3193 }, { "epoch": 0.48875286916602906, "grad_norm": 2.592725000921327, "learning_rate": 1.7686047424888303e-05, "loss": 0.9091, "step": 3194 }, { "epoch": 0.4889058913542464, "grad_norm": 2.3455071603658335, "learning_rate": 1.7684461835805153e-05, "loss": 0.7535, "step": 3195 }, { "epoch": 0.48905891354246367, "grad_norm": 2.7711528247671837, "learning_rate": 1.7682875774789143e-05, "loss": 0.8585, "step": 3196 }, { "epoch": 0.48921193573068095, "grad_norm": 2.5378645714466836, "learning_rate": 1.7681289241937675e-05, "loss": 0.806, "step": 3197 }, { "epoch": 0.4893649579188982, "grad_norm": 2.302542926603842, "learning_rate": 1.7679702237348185e-05, "loss": 0.775, "step": 3198 }, { "epoch": 0.48951798010711556, "grad_norm": 2.5459949407122777, "learning_rate": 1.7678114761118136e-05, "loss": 0.8727, "step": 3199 }, { "epoch": 0.48967100229533284, "grad_norm": 2.448262018505535, "learning_rate": 1.7676526813345024e-05, "loss": 0.7077, "step": 3200 }, { "epoch": 0.4898240244835501, "grad_norm": 2.622412414805754, "learning_rate": 1.7674938394126368e-05, "loss": 1.0291, "step": 3201 }, { "epoch": 0.4899770466717674, "grad_norm": 2.4738400913098495, "learning_rate": 1.7673349503559728e-05, "loss": 0.857, "step": 3202 }, { "epoch": 0.4901300688599847, "grad_norm": 2.5134079518079253, "learning_rate": 1.767176014174267e-05, "loss": 0.8002, "step": 3203 }, { "epoch": 0.490283091048202, "grad_norm": 13.524370615262251, "learning_rate": 1.767017030877281e-05, "loss": 0.8024, "step": 3204 }, { "epoch": 0.4904361132364193, "grad_norm": 2.404277059545446, "learning_rate": 1.7668580004747787e-05, "loss": 0.769, "step": 3205 }, { "epoch": 0.49058913542463656, "grad_norm": 2.2606428101347795, "learning_rate": 1.7666989229765265e-05, "loss": 0.7535, "step": 3206 }, { "epoch": 0.4907421576128539, "grad_norm": 2.64388311392485, "learning_rate": 1.7665397983922942e-05, "loss": 0.906, "step": 3207 }, { "epoch": 0.49089517980107117, "grad_norm": 2.5906710909288977, "learning_rate": 1.7663806267318538e-05, "loss": 0.7953, "step": 3208 }, { "epoch": 0.49104820198928845, "grad_norm": 2.272625903337256, "learning_rate": 1.7662214080049814e-05, "loss": 0.7149, "step": 3209 }, { "epoch": 0.4912012241775057, "grad_norm": 2.078589092936746, "learning_rate": 1.7660621422214546e-05, "loss": 0.6869, "step": 3210 }, { "epoch": 0.49135424636572306, "grad_norm": 2.1478637518748904, "learning_rate": 1.765902829391055e-05, "loss": 0.7486, "step": 3211 }, { "epoch": 0.49150726855394034, "grad_norm": 2.3289060874748637, "learning_rate": 1.7657434695235666e-05, "loss": 0.7998, "step": 3212 }, { "epoch": 0.4916602907421576, "grad_norm": 2.4882931922578484, "learning_rate": 1.7655840626287756e-05, "loss": 0.7622, "step": 3213 }, { "epoch": 0.4918133129303749, "grad_norm": 2.1951542390318712, "learning_rate": 1.765424608716473e-05, "loss": 0.7396, "step": 3214 }, { "epoch": 0.49196633511859217, "grad_norm": 2.418546776586094, "learning_rate": 1.7652651077964504e-05, "loss": 0.8735, "step": 3215 }, { "epoch": 0.4921193573068095, "grad_norm": 2.258923232410249, "learning_rate": 1.7651055598785037e-05, "loss": 0.8429, "step": 3216 }, { "epoch": 0.4922723794950268, "grad_norm": 2.212119341949836, "learning_rate": 1.7649459649724315e-05, "loss": 0.691, "step": 3217 }, { "epoch": 0.49242540168324406, "grad_norm": 2.5536534558659456, "learning_rate": 1.7647863230880354e-05, "loss": 0.7298, "step": 3218 }, { "epoch": 0.49257842387146134, "grad_norm": 2.3804911347467574, "learning_rate": 1.764626634235119e-05, "loss": 0.857, "step": 3219 }, { "epoch": 0.49273144605967867, "grad_norm": 2.1041892044354715, "learning_rate": 1.76446689842349e-05, "loss": 0.7818, "step": 3220 }, { "epoch": 0.49288446824789595, "grad_norm": 2.3149475586078623, "learning_rate": 1.764307115662958e-05, "loss": 0.8303, "step": 3221 }, { "epoch": 0.4930374904361132, "grad_norm": 2.404275202101493, "learning_rate": 1.7641472859633362e-05, "loss": 0.8074, "step": 3222 }, { "epoch": 0.4931905126243305, "grad_norm": 2.7182580165570585, "learning_rate": 1.76398740933444e-05, "loss": 0.6691, "step": 3223 }, { "epoch": 0.49334353481254783, "grad_norm": 2.2542269012648757, "learning_rate": 1.7638274857860884e-05, "loss": 0.7576, "step": 3224 }, { "epoch": 0.4934965570007651, "grad_norm": 2.6790158205282273, "learning_rate": 1.763667515328103e-05, "loss": 0.7793, "step": 3225 }, { "epoch": 0.4936495791889824, "grad_norm": 2.3313374760580494, "learning_rate": 1.7635074979703076e-05, "loss": 0.8102, "step": 3226 }, { "epoch": 0.49380260137719967, "grad_norm": 2.307545812369083, "learning_rate": 1.76334743372253e-05, "loss": 0.7408, "step": 3227 }, { "epoch": 0.493955623565417, "grad_norm": 2.6064789126655907, "learning_rate": 1.7631873225946004e-05, "loss": 0.9119, "step": 3228 }, { "epoch": 0.4941086457536343, "grad_norm": 2.3624050343096727, "learning_rate": 1.7630271645963518e-05, "loss": 0.829, "step": 3229 }, { "epoch": 0.49426166794185156, "grad_norm": 2.554313730527207, "learning_rate": 1.7628669597376198e-05, "loss": 0.9271, "step": 3230 }, { "epoch": 0.49441469013006883, "grad_norm": 2.1998616907501995, "learning_rate": 1.7627067080282432e-05, "loss": 0.8112, "step": 3231 }, { "epoch": 0.49456771231828617, "grad_norm": 2.363883278824949, "learning_rate": 1.7625464094780646e-05, "loss": 0.7588, "step": 3232 }, { "epoch": 0.49472073450650345, "grad_norm": 2.4905261389828506, "learning_rate": 1.7623860640969275e-05, "loss": 0.7716, "step": 3233 }, { "epoch": 0.4948737566947207, "grad_norm": 2.3865027559186447, "learning_rate": 1.76222567189468e-05, "loss": 0.7401, "step": 3234 }, { "epoch": 0.495026778882938, "grad_norm": 2.483958830160686, "learning_rate": 1.762065232881172e-05, "loss": 0.8404, "step": 3235 }, { "epoch": 0.49517980107115533, "grad_norm": 2.1902122015963448, "learning_rate": 1.7619047470662565e-05, "loss": 0.8056, "step": 3236 }, { "epoch": 0.4953328232593726, "grad_norm": 2.399968282559192, "learning_rate": 1.7617442144597902e-05, "loss": 0.8102, "step": 3237 }, { "epoch": 0.4954858454475899, "grad_norm": 2.1498314028722554, "learning_rate": 1.7615836350716316e-05, "loss": 0.7334, "step": 3238 }, { "epoch": 0.49563886763580717, "grad_norm": 2.6122160629141424, "learning_rate": 1.7614230089116428e-05, "loss": 0.8052, "step": 3239 }, { "epoch": 0.4957918898240245, "grad_norm": 2.293516449978853, "learning_rate": 1.7612623359896884e-05, "loss": 0.7155, "step": 3240 }, { "epoch": 0.4959449120122418, "grad_norm": 2.3151533649954965, "learning_rate": 1.7611016163156355e-05, "loss": 0.7752, "step": 3241 }, { "epoch": 0.49609793420045906, "grad_norm": 2.794173016381072, "learning_rate": 1.7609408498993553e-05, "loss": 0.9031, "step": 3242 }, { "epoch": 0.49625095638867633, "grad_norm": 2.588196116806027, "learning_rate": 1.7607800367507204e-05, "loss": 0.8531, "step": 3243 }, { "epoch": 0.49640397857689367, "grad_norm": 2.3351088057592615, "learning_rate": 1.7606191768796074e-05, "loss": 0.8684, "step": 3244 }, { "epoch": 0.49655700076511095, "grad_norm": 2.32274229856413, "learning_rate": 1.7604582702958953e-05, "loss": 0.7669, "step": 3245 }, { "epoch": 0.4967100229533282, "grad_norm": 2.477436788357222, "learning_rate": 1.7602973170094658e-05, "loss": 0.8381, "step": 3246 }, { "epoch": 0.4968630451415455, "grad_norm": 2.6129214133461343, "learning_rate": 1.7601363170302035e-05, "loss": 0.793, "step": 3247 }, { "epoch": 0.49701606732976283, "grad_norm": 2.172244742262806, "learning_rate": 1.7599752703679966e-05, "loss": 0.624, "step": 3248 }, { "epoch": 0.4971690895179801, "grad_norm": 2.298575304630846, "learning_rate": 1.7598141770327353e-05, "loss": 0.7008, "step": 3249 }, { "epoch": 0.4973221117061974, "grad_norm": 2.268141893500064, "learning_rate": 1.759653037034313e-05, "loss": 0.7647, "step": 3250 }, { "epoch": 0.49747513389441467, "grad_norm": 2.222876437768816, "learning_rate": 1.7594918503826255e-05, "loss": 0.6546, "step": 3251 }, { "epoch": 0.497628156082632, "grad_norm": 2.752810421202548, "learning_rate": 1.7593306170875727e-05, "loss": 0.9053, "step": 3252 }, { "epoch": 0.4977811782708493, "grad_norm": 2.4122049038132767, "learning_rate": 1.7591693371590563e-05, "loss": 0.8456, "step": 3253 }, { "epoch": 0.49793420045906656, "grad_norm": 2.1764013379355425, "learning_rate": 1.7590080106069807e-05, "loss": 0.8315, "step": 3254 }, { "epoch": 0.49808722264728383, "grad_norm": 2.375189125899373, "learning_rate": 1.758846637441254e-05, "loss": 0.7593, "step": 3255 }, { "epoch": 0.49824024483550117, "grad_norm": 2.3610123855811675, "learning_rate": 1.7586852176717867e-05, "loss": 0.8311, "step": 3256 }, { "epoch": 0.49839326702371844, "grad_norm": 2.6043618815139635, "learning_rate": 1.7585237513084922e-05, "loss": 0.7291, "step": 3257 }, { "epoch": 0.4985462892119357, "grad_norm": 2.3211779791516522, "learning_rate": 1.758362238361287e-05, "loss": 0.769, "step": 3258 }, { "epoch": 0.498699311400153, "grad_norm": 2.2649520601965607, "learning_rate": 1.7582006788400895e-05, "loss": 0.705, "step": 3259 }, { "epoch": 0.49885233358837033, "grad_norm": 2.439781656230531, "learning_rate": 1.7580390727548228e-05, "loss": 0.653, "step": 3260 }, { "epoch": 0.4990053557765876, "grad_norm": 2.448128881631369, "learning_rate": 1.7578774201154107e-05, "loss": 0.8099, "step": 3261 }, { "epoch": 0.4991583779648049, "grad_norm": 2.4505488863057416, "learning_rate": 1.7577157209317817e-05, "loss": 0.8893, "step": 3262 }, { "epoch": 0.49931140015302217, "grad_norm": 2.5299595454688135, "learning_rate": 1.757553975213866e-05, "loss": 0.7847, "step": 3263 }, { "epoch": 0.4994644223412395, "grad_norm": 2.7098718662487524, "learning_rate": 1.757392182971597e-05, "loss": 0.7942, "step": 3264 }, { "epoch": 0.4996174445294568, "grad_norm": 2.3535479403390354, "learning_rate": 1.757230344214911e-05, "loss": 0.8274, "step": 3265 }, { "epoch": 0.49977046671767406, "grad_norm": 2.1897548479389557, "learning_rate": 1.7570684589537473e-05, "loss": 0.8113, "step": 3266 }, { "epoch": 0.49992348890589133, "grad_norm": 2.321407918265366, "learning_rate": 1.7569065271980482e-05, "loss": 0.8089, "step": 3267 }, { "epoch": 0.5000765110941087, "grad_norm": 2.5519453674642194, "learning_rate": 1.756744548957758e-05, "loss": 0.7425, "step": 3268 }, { "epoch": 0.5002295332823259, "grad_norm": 2.425437004623082, "learning_rate": 1.756582524242825e-05, "loss": 0.7746, "step": 3269 }, { "epoch": 0.5003825554705432, "grad_norm": 2.3121885815367405, "learning_rate": 1.756420453063199e-05, "loss": 0.8158, "step": 3270 }, { "epoch": 0.5005355776587606, "grad_norm": 2.378431932054567, "learning_rate": 1.756258335428834e-05, "loss": 0.7868, "step": 3271 }, { "epoch": 0.5006885998469778, "grad_norm": 2.007280263141698, "learning_rate": 1.756096171349686e-05, "loss": 0.8533, "step": 3272 }, { "epoch": 0.5008416220351951, "grad_norm": 2.522900136714207, "learning_rate": 1.7559339608357145e-05, "loss": 0.7948, "step": 3273 }, { "epoch": 0.5009946442234124, "grad_norm": 2.2461761449648905, "learning_rate": 1.7557717038968807e-05, "loss": 0.724, "step": 3274 }, { "epoch": 0.5011476664116297, "grad_norm": 2.492589978811216, "learning_rate": 1.7556094005431507e-05, "loss": 0.8158, "step": 3275 }, { "epoch": 0.501300688599847, "grad_norm": 2.779932572876539, "learning_rate": 1.7554470507844906e-05, "loss": 0.7971, "step": 3276 }, { "epoch": 0.5014537107880642, "grad_norm": 2.726781264906075, "learning_rate": 1.7552846546308724e-05, "loss": 0.882, "step": 3277 }, { "epoch": 0.5016067329762816, "grad_norm": 2.750726908807502, "learning_rate": 1.7551222120922687e-05, "loss": 0.7143, "step": 3278 }, { "epoch": 0.5017597551644989, "grad_norm": 2.611633220188425, "learning_rate": 1.7549597231786562e-05, "loss": 0.9365, "step": 3279 }, { "epoch": 0.5019127773527161, "grad_norm": 2.193683141102787, "learning_rate": 1.7547971879000133e-05, "loss": 0.7702, "step": 3280 }, { "epoch": 0.5020657995409334, "grad_norm": 2.1799111635263384, "learning_rate": 1.7546346062663224e-05, "loss": 0.767, "step": 3281 }, { "epoch": 0.5022188217291508, "grad_norm": 2.2272737839801846, "learning_rate": 1.754471978287568e-05, "loss": 0.7561, "step": 3282 }, { "epoch": 0.502371843917368, "grad_norm": 2.3756225531817337, "learning_rate": 1.7543093039737384e-05, "loss": 0.7519, "step": 3283 }, { "epoch": 0.5025248661055853, "grad_norm": 2.6892279006528206, "learning_rate": 1.754146583334823e-05, "loss": 0.7317, "step": 3284 }, { "epoch": 0.5026778882938026, "grad_norm": 2.167783051454944, "learning_rate": 1.7539838163808164e-05, "loss": 0.6927, "step": 3285 }, { "epoch": 0.5028309104820199, "grad_norm": 2.497432083363853, "learning_rate": 1.7538210031217133e-05, "loss": 0.7813, "step": 3286 }, { "epoch": 0.5029839326702372, "grad_norm": 2.42569547096876, "learning_rate": 1.7536581435675136e-05, "loss": 0.6925, "step": 3287 }, { "epoch": 0.5031369548584544, "grad_norm": 2.2400882700793336, "learning_rate": 1.753495237728219e-05, "loss": 0.828, "step": 3288 }, { "epoch": 0.5032899770466718, "grad_norm": 2.247722482445047, "learning_rate": 1.7533322856138345e-05, "loss": 0.7002, "step": 3289 }, { "epoch": 0.5034429992348891, "grad_norm": 2.507215137207525, "learning_rate": 1.7531692872343672e-05, "loss": 0.811, "step": 3290 }, { "epoch": 0.5035960214231063, "grad_norm": 2.3787834208881655, "learning_rate": 1.753006242599827e-05, "loss": 0.8742, "step": 3291 }, { "epoch": 0.5037490436113237, "grad_norm": 2.3822113508515392, "learning_rate": 1.7528431517202283e-05, "loss": 0.8242, "step": 3292 }, { "epoch": 0.5039020657995409, "grad_norm": 2.1981554588331984, "learning_rate": 1.7526800146055866e-05, "loss": 0.932, "step": 3293 }, { "epoch": 0.5040550879877582, "grad_norm": 2.2509071078466643, "learning_rate": 1.7525168312659202e-05, "loss": 0.7999, "step": 3294 }, { "epoch": 0.5042081101759756, "grad_norm": 2.4597949432094417, "learning_rate": 1.7523536017112516e-05, "loss": 0.7423, "step": 3295 }, { "epoch": 0.5043611323641928, "grad_norm": 2.2297846203516305, "learning_rate": 1.7521903259516045e-05, "loss": 0.6966, "step": 3296 }, { "epoch": 0.5045141545524101, "grad_norm": 2.4466722284928677, "learning_rate": 1.752027003997008e-05, "loss": 0.8328, "step": 3297 }, { "epoch": 0.5046671767406274, "grad_norm": 2.240130726906534, "learning_rate": 1.7518636358574905e-05, "loss": 0.7626, "step": 3298 }, { "epoch": 0.5048201989288447, "grad_norm": 2.4645099563125554, "learning_rate": 1.7517002215430857e-05, "loss": 0.8377, "step": 3299 }, { "epoch": 0.504973221117062, "grad_norm": 2.173867927524907, "learning_rate": 1.75153676106383e-05, "loss": 0.7712, "step": 3300 }, { "epoch": 0.5051262433052792, "grad_norm": 2.538823450141907, "learning_rate": 1.7513732544297618e-05, "loss": 0.8464, "step": 3301 }, { "epoch": 0.5052792654934966, "grad_norm": 2.462672051119006, "learning_rate": 1.7512097016509224e-05, "loss": 0.8667, "step": 3302 }, { "epoch": 0.5054322876817139, "grad_norm": 2.4697382148859597, "learning_rate": 1.7510461027373565e-05, "loss": 0.726, "step": 3303 }, { "epoch": 0.5055853098699311, "grad_norm": 2.2938339035117727, "learning_rate": 1.7508824576991115e-05, "loss": 0.7386, "step": 3304 }, { "epoch": 0.5057383320581484, "grad_norm": 2.157524022621634, "learning_rate": 1.7507187665462374e-05, "loss": 0.7496, "step": 3305 }, { "epoch": 0.5058913542463658, "grad_norm": 2.1429953471957015, "learning_rate": 1.7505550292887866e-05, "loss": 0.7888, "step": 3306 }, { "epoch": 0.506044376434583, "grad_norm": 2.1704731421991394, "learning_rate": 1.7503912459368156e-05, "loss": 0.8105, "step": 3307 }, { "epoch": 0.5061973986228003, "grad_norm": 2.5892926483115772, "learning_rate": 1.7502274165003826e-05, "loss": 0.8, "step": 3308 }, { "epoch": 0.5063504208110176, "grad_norm": 2.4993920181365654, "learning_rate": 1.7500635409895488e-05, "loss": 0.8512, "step": 3309 }, { "epoch": 0.5065034429992349, "grad_norm": 2.340329315102193, "learning_rate": 1.7498996194143792e-05, "loss": 0.7955, "step": 3310 }, { "epoch": 0.5066564651874522, "grad_norm": 2.3758682343089905, "learning_rate": 1.7497356517849397e-05, "loss": 0.7777, "step": 3311 }, { "epoch": 0.5068094873756694, "grad_norm": 2.430405212589665, "learning_rate": 1.749571638111301e-05, "loss": 0.9005, "step": 3312 }, { "epoch": 0.5069625095638868, "grad_norm": 2.295985134609166, "learning_rate": 1.749407578403536e-05, "loss": 0.6991, "step": 3313 }, { "epoch": 0.5071155317521041, "grad_norm": 2.2080978558305646, "learning_rate": 1.74924347267172e-05, "loss": 0.6966, "step": 3314 }, { "epoch": 0.5072685539403213, "grad_norm": 2.3125199843341386, "learning_rate": 1.749079320925931e-05, "loss": 0.9435, "step": 3315 }, { "epoch": 0.5074215761285387, "grad_norm": 2.3758545793328394, "learning_rate": 1.7489151231762503e-05, "loss": 0.7937, "step": 3316 }, { "epoch": 0.5075745983167559, "grad_norm": 2.327374821187704, "learning_rate": 1.7487508794327627e-05, "loss": 0.6983, "step": 3317 }, { "epoch": 0.5077276205049732, "grad_norm": 2.3625341694144413, "learning_rate": 1.7485865897055537e-05, "loss": 0.797, "step": 3318 }, { "epoch": 0.5078806426931906, "grad_norm": 2.4367362368564836, "learning_rate": 1.748422254004714e-05, "loss": 0.8072, "step": 3319 }, { "epoch": 0.5080336648814078, "grad_norm": 2.3174896950785433, "learning_rate": 1.7482578723403363e-05, "loss": 0.7571, "step": 3320 }, { "epoch": 0.5081866870696251, "grad_norm": 1.8791679422020524, "learning_rate": 1.748093444722515e-05, "loss": 0.7182, "step": 3321 }, { "epoch": 0.5083397092578424, "grad_norm": 2.1616486568516096, "learning_rate": 1.7479289711613487e-05, "loss": 0.7051, "step": 3322 }, { "epoch": 0.5084927314460597, "grad_norm": 2.230631601951075, "learning_rate": 1.7477644516669387e-05, "loss": 0.7449, "step": 3323 }, { "epoch": 0.508645753634277, "grad_norm": 2.0062049354602225, "learning_rate": 1.747599886249388e-05, "loss": 0.7714, "step": 3324 }, { "epoch": 0.5087987758224942, "grad_norm": 2.5277809968626417, "learning_rate": 1.747435274918804e-05, "loss": 0.9102, "step": 3325 }, { "epoch": 0.5089517980107116, "grad_norm": 2.2964812751680723, "learning_rate": 1.7472706176852957e-05, "loss": 0.8331, "step": 3326 }, { "epoch": 0.5091048201989289, "grad_norm": 2.4469684135554983, "learning_rate": 1.7471059145589755e-05, "loss": 0.8294, "step": 3327 }, { "epoch": 0.5092578423871461, "grad_norm": 2.197412860546768, "learning_rate": 1.7469411655499583e-05, "loss": 0.8342, "step": 3328 }, { "epoch": 0.5094108645753634, "grad_norm": 2.5275013829253212, "learning_rate": 1.746776370668362e-05, "loss": 0.8247, "step": 3329 }, { "epoch": 0.5095638867635807, "grad_norm": 2.1610874400415803, "learning_rate": 1.7466115299243072e-05, "loss": 0.7758, "step": 3330 }, { "epoch": 0.509716908951798, "grad_norm": 2.2429685448279733, "learning_rate": 1.746446643327918e-05, "loss": 0.7493, "step": 3331 }, { "epoch": 0.5098699311400153, "grad_norm": 2.4143447333401435, "learning_rate": 1.7462817108893202e-05, "loss": 0.7255, "step": 3332 }, { "epoch": 0.5100229533282326, "grad_norm": 2.3857507284179795, "learning_rate": 1.746116732618643e-05, "loss": 0.8139, "step": 3333 }, { "epoch": 0.5101759755164499, "grad_norm": 2.2535908652855094, "learning_rate": 1.7459517085260186e-05, "loss": 0.7173, "step": 3334 }, { "epoch": 0.5103289977046672, "grad_norm": 2.182942551591896, "learning_rate": 1.7457866386215814e-05, "loss": 0.6888, "step": 3335 }, { "epoch": 0.5104820198928844, "grad_norm": 2.297465645975313, "learning_rate": 1.7456215229154693e-05, "loss": 0.7634, "step": 3336 }, { "epoch": 0.5106350420811018, "grad_norm": 2.3307719054135156, "learning_rate": 1.7454563614178223e-05, "loss": 0.789, "step": 3337 }, { "epoch": 0.510788064269319, "grad_norm": 2.380246890813595, "learning_rate": 1.7452911541387844e-05, "loss": 0.8229, "step": 3338 }, { "epoch": 0.5109410864575363, "grad_norm": 2.214607579600976, "learning_rate": 1.7451259010885008e-05, "loss": 0.7872, "step": 3339 }, { "epoch": 0.5110941086457537, "grad_norm": 2.5175143675456826, "learning_rate": 1.7449606022771206e-05, "loss": 0.9167, "step": 3340 }, { "epoch": 0.5112471308339709, "grad_norm": 2.2963224878966475, "learning_rate": 1.7447952577147957e-05, "loss": 0.8955, "step": 3341 }, { "epoch": 0.5114001530221882, "grad_norm": 2.6007771172167304, "learning_rate": 1.7446298674116803e-05, "loss": 0.7827, "step": 3342 }, { "epoch": 0.5115531752104056, "grad_norm": 2.619381696692319, "learning_rate": 1.7444644313779316e-05, "loss": 0.8316, "step": 3343 }, { "epoch": 0.5117061973986228, "grad_norm": 2.422295060929172, "learning_rate": 1.74429894962371e-05, "loss": 0.7311, "step": 3344 }, { "epoch": 0.5118592195868401, "grad_norm": 2.3663869854583544, "learning_rate": 1.7441334221591783e-05, "loss": 0.7173, "step": 3345 }, { "epoch": 0.5120122417750573, "grad_norm": 2.1570402298204225, "learning_rate": 1.7439678489945017e-05, "loss": 0.7325, "step": 3346 }, { "epoch": 0.5121652639632747, "grad_norm": 3.0964806046696944, "learning_rate": 1.7438022301398495e-05, "loss": 0.8387, "step": 3347 }, { "epoch": 0.512318286151492, "grad_norm": 2.431537047730991, "learning_rate": 1.743636565605392e-05, "loss": 0.7897, "step": 3348 }, { "epoch": 0.5124713083397092, "grad_norm": 2.091834176573096, "learning_rate": 1.7434708554013046e-05, "loss": 0.7902, "step": 3349 }, { "epoch": 0.5126243305279266, "grad_norm": 2.1331625981738775, "learning_rate": 1.743305099537763e-05, "loss": 0.679, "step": 3350 }, { "epoch": 0.5127773527161439, "grad_norm": 2.5746781871427182, "learning_rate": 1.7431392980249477e-05, "loss": 0.8677, "step": 3351 }, { "epoch": 0.5129303749043611, "grad_norm": 2.30631341726066, "learning_rate": 1.7429734508730404e-05, "loss": 0.7585, "step": 3352 }, { "epoch": 0.5130833970925784, "grad_norm": 2.426958964202821, "learning_rate": 1.7428075580922278e-05, "loss": 0.8328, "step": 3353 }, { "epoch": 0.5132364192807957, "grad_norm": 2.4386540509387076, "learning_rate": 1.7426416196926965e-05, "loss": 0.7015, "step": 3354 }, { "epoch": 0.513389441469013, "grad_norm": 2.3459506785389475, "learning_rate": 1.742475635684638e-05, "loss": 0.7653, "step": 3355 }, { "epoch": 0.5135424636572303, "grad_norm": 2.576562934756025, "learning_rate": 1.7423096060782466e-05, "loss": 0.8647, "step": 3356 }, { "epoch": 0.5136954858454476, "grad_norm": 2.508717422355279, "learning_rate": 1.7421435308837186e-05, "loss": 0.7108, "step": 3357 }, { "epoch": 0.5138485080336649, "grad_norm": 2.1278984177818536, "learning_rate": 1.7419774101112526e-05, "loss": 0.7988, "step": 3358 }, { "epoch": 0.5140015302218822, "grad_norm": 2.376063781505229, "learning_rate": 1.741811243771051e-05, "loss": 0.7497, "step": 3359 }, { "epoch": 0.5141545524100994, "grad_norm": 2.401044243211275, "learning_rate": 1.7416450318733194e-05, "loss": 0.8131, "step": 3360 }, { "epoch": 0.5143075745983168, "grad_norm": 2.3899515726116958, "learning_rate": 1.741478774428265e-05, "loss": 0.855, "step": 3361 }, { "epoch": 0.514460596786534, "grad_norm": 2.6320361366457314, "learning_rate": 1.741312471446098e-05, "loss": 0.7491, "step": 3362 }, { "epoch": 0.5146136189747513, "grad_norm": 2.214272223614381, "learning_rate": 1.7411461229370326e-05, "loss": 0.7406, "step": 3363 }, { "epoch": 0.5147666411629687, "grad_norm": 2.446861139903529, "learning_rate": 1.7409797289112842e-05, "loss": 0.7961, "step": 3364 }, { "epoch": 0.5149196633511859, "grad_norm": 2.6934573971392064, "learning_rate": 1.740813289379072e-05, "loss": 0.7586, "step": 3365 }, { "epoch": 0.5150726855394032, "grad_norm": 2.2123795928387264, "learning_rate": 1.7406468043506176e-05, "loss": 0.7273, "step": 3366 }, { "epoch": 0.5152257077276206, "grad_norm": 2.243404809270673, "learning_rate": 1.740480273836145e-05, "loss": 0.7537, "step": 3367 }, { "epoch": 0.5153787299158378, "grad_norm": 2.250245346866966, "learning_rate": 1.7403136978458828e-05, "loss": 0.7529, "step": 3368 }, { "epoch": 0.5155317521040551, "grad_norm": 2.3461943594132966, "learning_rate": 1.74014707639006e-05, "loss": 0.78, "step": 3369 }, { "epoch": 0.5156847742922723, "grad_norm": 2.2217893071169224, "learning_rate": 1.7399804094789096e-05, "loss": 0.7165, "step": 3370 }, { "epoch": 0.5158377964804897, "grad_norm": 2.2631879850970464, "learning_rate": 1.7398136971226677e-05, "loss": 0.7602, "step": 3371 }, { "epoch": 0.515990818668707, "grad_norm": 2.431585278848265, "learning_rate": 1.739646939331572e-05, "loss": 0.771, "step": 3372 }, { "epoch": 0.5161438408569242, "grad_norm": 2.4202710096861506, "learning_rate": 1.7394801361158648e-05, "loss": 0.7569, "step": 3373 }, { "epoch": 0.5162968630451416, "grad_norm": 2.535564085626204, "learning_rate": 1.7393132874857894e-05, "loss": 0.8373, "step": 3374 }, { "epoch": 0.5164498852333589, "grad_norm": 2.5135595738903467, "learning_rate": 1.739146393451593e-05, "loss": 0.8715, "step": 3375 }, { "epoch": 0.5166029074215761, "grad_norm": 2.095406374528688, "learning_rate": 1.7389794540235246e-05, "loss": 0.7633, "step": 3376 }, { "epoch": 0.5167559296097934, "grad_norm": 2.686993076718003, "learning_rate": 1.7388124692118373e-05, "loss": 0.7197, "step": 3377 }, { "epoch": 0.5169089517980107, "grad_norm": 2.4490117625560517, "learning_rate": 1.7386454390267857e-05, "loss": 0.8004, "step": 3378 }, { "epoch": 0.517061973986228, "grad_norm": 2.301469614522371, "learning_rate": 1.7384783634786284e-05, "loss": 0.7179, "step": 3379 }, { "epoch": 0.5172149961744453, "grad_norm": 2.432510545681375, "learning_rate": 1.738311242577626e-05, "loss": 0.8886, "step": 3380 }, { "epoch": 0.5173680183626626, "grad_norm": 2.4190454792921963, "learning_rate": 1.7381440763340416e-05, "loss": 0.867, "step": 3381 }, { "epoch": 0.5175210405508799, "grad_norm": 2.5166664142471924, "learning_rate": 1.737976864758142e-05, "loss": 0.8787, "step": 3382 }, { "epoch": 0.5176740627390972, "grad_norm": 2.4588195664926578, "learning_rate": 1.7378096078601962e-05, "loss": 0.8622, "step": 3383 }, { "epoch": 0.5178270849273144, "grad_norm": 2.365906374128987, "learning_rate": 1.7376423056504756e-05, "loss": 0.7523, "step": 3384 }, { "epoch": 0.5179801071155318, "grad_norm": 2.2284179451077035, "learning_rate": 1.7374749581392558e-05, "loss": 0.7016, "step": 3385 }, { "epoch": 0.518133129303749, "grad_norm": 2.252164274842456, "learning_rate": 1.7373075653368137e-05, "loss": 0.76, "step": 3386 }, { "epoch": 0.5182861514919663, "grad_norm": 2.3064049524967674, "learning_rate": 1.7371401272534295e-05, "loss": 0.8086, "step": 3387 }, { "epoch": 0.5184391736801837, "grad_norm": 2.3002293014541673, "learning_rate": 1.7369726438993865e-05, "loss": 0.8244, "step": 3388 }, { "epoch": 0.5185921958684009, "grad_norm": 2.495992362678814, "learning_rate": 1.7368051152849704e-05, "loss": 0.8959, "step": 3389 }, { "epoch": 0.5187452180566182, "grad_norm": 2.3947045552206445, "learning_rate": 1.73663754142047e-05, "loss": 0.8003, "step": 3390 }, { "epoch": 0.5188982402448356, "grad_norm": 2.31492330849426, "learning_rate": 1.736469922316176e-05, "loss": 0.6984, "step": 3391 }, { "epoch": 0.5190512624330528, "grad_norm": 2.237344088919252, "learning_rate": 1.736302257982383e-05, "loss": 0.8371, "step": 3392 }, { "epoch": 0.5192042846212701, "grad_norm": 2.7400758010484645, "learning_rate": 1.7361345484293882e-05, "loss": 0.9044, "step": 3393 }, { "epoch": 0.5193573068094873, "grad_norm": 2.33269965816581, "learning_rate": 1.7359667936674907e-05, "loss": 0.7688, "step": 3394 }, { "epoch": 0.5195103289977047, "grad_norm": 2.429817087511107, "learning_rate": 1.7357989937069936e-05, "loss": 0.8199, "step": 3395 }, { "epoch": 0.519663351185922, "grad_norm": 2.3330509991957933, "learning_rate": 1.735631148558202e-05, "loss": 0.7509, "step": 3396 }, { "epoch": 0.5198163733741392, "grad_norm": 2.116684148235966, "learning_rate": 1.7354632582314232e-05, "loss": 0.7309, "step": 3397 }, { "epoch": 0.5199693955623566, "grad_norm": 2.4278111061121836, "learning_rate": 1.735295322736969e-05, "loss": 0.8164, "step": 3398 }, { "epoch": 0.5201224177505739, "grad_norm": 2.2950130329897944, "learning_rate": 1.7351273420851522e-05, "loss": 0.7755, "step": 3399 }, { "epoch": 0.5202754399387911, "grad_norm": 2.3013900005039796, "learning_rate": 1.7349593162862896e-05, "loss": 0.7645, "step": 3400 }, { "epoch": 0.5204284621270084, "grad_norm": 2.131498202133757, "learning_rate": 1.7347912453507003e-05, "loss": 0.7436, "step": 3401 }, { "epoch": 0.5205814843152257, "grad_norm": 2.0513736202538104, "learning_rate": 1.734623129288706e-05, "loss": 0.7005, "step": 3402 }, { "epoch": 0.520734506503443, "grad_norm": 2.458358694468482, "learning_rate": 1.734454968110632e-05, "loss": 0.7273, "step": 3403 }, { "epoch": 0.5208875286916603, "grad_norm": 2.492268056512554, "learning_rate": 1.7342867618268043e-05, "loss": 0.7453, "step": 3404 }, { "epoch": 0.5210405508798776, "grad_norm": 2.2243137346199444, "learning_rate": 1.7341185104475546e-05, "loss": 0.7455, "step": 3405 }, { "epoch": 0.5211935730680949, "grad_norm": 2.3585772891771684, "learning_rate": 1.733950213983215e-05, "loss": 0.8491, "step": 3406 }, { "epoch": 0.5213465952563122, "grad_norm": 2.301805032044261, "learning_rate": 1.733781872444122e-05, "loss": 0.8319, "step": 3407 }, { "epoch": 0.5214996174445294, "grad_norm": 2.2982512317533246, "learning_rate": 1.7336134858406134e-05, "loss": 0.7436, "step": 3408 }, { "epoch": 0.5216526396327468, "grad_norm": 2.335280353022862, "learning_rate": 1.7334450541830305e-05, "loss": 0.7517, "step": 3409 }, { "epoch": 0.521805661820964, "grad_norm": 2.5502609428808394, "learning_rate": 1.7332765774817177e-05, "loss": 0.8186, "step": 3410 }, { "epoch": 0.5219586840091813, "grad_norm": 2.415137367526865, "learning_rate": 1.7331080557470214e-05, "loss": 0.7861, "step": 3411 }, { "epoch": 0.5221117061973987, "grad_norm": 2.720231789733179, "learning_rate": 1.732939488989292e-05, "loss": 0.8509, "step": 3412 }, { "epoch": 0.5222647283856159, "grad_norm": 2.2490141250767057, "learning_rate": 1.732770877218881e-05, "loss": 0.6168, "step": 3413 }, { "epoch": 0.5224177505738332, "grad_norm": 2.255322676967083, "learning_rate": 1.7326022204461438e-05, "loss": 0.8309, "step": 3414 }, { "epoch": 0.5225707727620506, "grad_norm": 2.403136772714949, "learning_rate": 1.7324335186814382e-05, "loss": 0.7862, "step": 3415 }, { "epoch": 0.5227237949502678, "grad_norm": 2.4871382173190963, "learning_rate": 1.7322647719351253e-05, "loss": 0.7874, "step": 3416 }, { "epoch": 0.5228768171384851, "grad_norm": 2.0456564998057143, "learning_rate": 1.732095980217568e-05, "loss": 0.7629, "step": 3417 }, { "epoch": 0.5230298393267023, "grad_norm": 2.140116669546573, "learning_rate": 1.7319271435391326e-05, "loss": 0.7619, "step": 3418 }, { "epoch": 0.5231828615149197, "grad_norm": 2.2570260777984443, "learning_rate": 1.7317582619101876e-05, "loss": 0.7535, "step": 3419 }, { "epoch": 0.523335883703137, "grad_norm": 2.3392895682248445, "learning_rate": 1.7315893353411052e-05, "loss": 0.824, "step": 3420 }, { "epoch": 0.5234889058913542, "grad_norm": 2.543168315321036, "learning_rate": 1.73142036384226e-05, "loss": 0.7372, "step": 3421 }, { "epoch": 0.5236419280795716, "grad_norm": 2.1197882622093034, "learning_rate": 1.731251347424029e-05, "loss": 0.772, "step": 3422 }, { "epoch": 0.5237949502677889, "grad_norm": 2.5217333583866535, "learning_rate": 1.731082286096792e-05, "loss": 0.7934, "step": 3423 }, { "epoch": 0.5239479724560061, "grad_norm": 2.308111842163604, "learning_rate": 1.7309131798709313e-05, "loss": 0.8, "step": 3424 }, { "epoch": 0.5241009946442234, "grad_norm": 2.1948172683467866, "learning_rate": 1.7307440287568335e-05, "loss": 0.7586, "step": 3425 }, { "epoch": 0.5242540168324407, "grad_norm": 2.349110205686944, "learning_rate": 1.730574832764886e-05, "loss": 0.8168, "step": 3426 }, { "epoch": 0.524407039020658, "grad_norm": 2.478957873778854, "learning_rate": 1.73040559190548e-05, "loss": 0.7624, "step": 3427 }, { "epoch": 0.5245600612088753, "grad_norm": 2.2363519273765573, "learning_rate": 1.730236306189009e-05, "loss": 0.7556, "step": 3428 }, { "epoch": 0.5247130833970926, "grad_norm": 2.4000273014466638, "learning_rate": 1.73006697562587e-05, "loss": 0.631, "step": 3429 }, { "epoch": 0.5248661055853099, "grad_norm": 2.296172924033883, "learning_rate": 1.729897600226462e-05, "loss": 0.6752, "step": 3430 }, { "epoch": 0.5250191277735272, "grad_norm": 2.4068767186514926, "learning_rate": 1.729728180001187e-05, "loss": 0.768, "step": 3431 }, { "epoch": 0.5251721499617444, "grad_norm": 2.0931991854527574, "learning_rate": 1.7295587149604496e-05, "loss": 0.7415, "step": 3432 }, { "epoch": 0.5253251721499618, "grad_norm": 2.2446215315731455, "learning_rate": 1.7293892051146575e-05, "loss": 0.7315, "step": 3433 }, { "epoch": 0.525478194338179, "grad_norm": 2.31206674583333, "learning_rate": 1.729219650474221e-05, "loss": 0.7596, "step": 3434 }, { "epoch": 0.5256312165263963, "grad_norm": 2.4780596800982195, "learning_rate": 1.7290500510495532e-05, "loss": 0.8166, "step": 3435 }, { "epoch": 0.5257842387146137, "grad_norm": 2.2182491100816977, "learning_rate": 1.7288804068510695e-05, "loss": 0.7766, "step": 3436 }, { "epoch": 0.5259372609028309, "grad_norm": 2.5993499183042323, "learning_rate": 1.7287107178891886e-05, "loss": 0.7479, "step": 3437 }, { "epoch": 0.5260902830910482, "grad_norm": 2.6164904758032295, "learning_rate": 1.7285409841743318e-05, "loss": 0.8316, "step": 3438 }, { "epoch": 0.5262433052792654, "grad_norm": 2.1482006073526083, "learning_rate": 1.728371205716923e-05, "loss": 0.7556, "step": 3439 }, { "epoch": 0.5263963274674828, "grad_norm": 2.2992580395879236, "learning_rate": 1.7282013825273894e-05, "loss": 0.8617, "step": 3440 }, { "epoch": 0.5265493496557001, "grad_norm": 2.760122409809691, "learning_rate": 1.7280315146161595e-05, "loss": 0.8638, "step": 3441 }, { "epoch": 0.5267023718439173, "grad_norm": 2.413735791441003, "learning_rate": 1.727861601993667e-05, "loss": 0.7804, "step": 3442 }, { "epoch": 0.5268553940321347, "grad_norm": 3.149252704804255, "learning_rate": 1.727691644670346e-05, "loss": 0.7932, "step": 3443 }, { "epoch": 0.527008416220352, "grad_norm": 2.2225478603436235, "learning_rate": 1.727521642656634e-05, "loss": 0.7971, "step": 3444 }, { "epoch": 0.5271614384085692, "grad_norm": 2.2044358309518524, "learning_rate": 1.7273515959629715e-05, "loss": 0.6773, "step": 3445 }, { "epoch": 0.5273144605967865, "grad_norm": 2.4651261717517774, "learning_rate": 1.727181504599803e-05, "loss": 0.8615, "step": 3446 }, { "epoch": 0.5274674827850038, "grad_norm": 2.5505959614561786, "learning_rate": 1.7270113685775728e-05, "loss": 0.8561, "step": 3447 }, { "epoch": 0.5276205049732211, "grad_norm": 2.2154727272181036, "learning_rate": 1.7268411879067305e-05, "loss": 0.7426, "step": 3448 }, { "epoch": 0.5277735271614384, "grad_norm": 2.226037165166472, "learning_rate": 1.7266709625977277e-05, "loss": 0.7895, "step": 3449 }, { "epoch": 0.5279265493496557, "grad_norm": 2.533881833418401, "learning_rate": 1.7265006926610183e-05, "loss": 0.7938, "step": 3450 }, { "epoch": 0.528079571537873, "grad_norm": 2.4668535803573213, "learning_rate": 1.726330378107059e-05, "loss": 0.7951, "step": 3451 }, { "epoch": 0.5282325937260903, "grad_norm": 2.213391599044407, "learning_rate": 1.72616001894631e-05, "loss": 0.6765, "step": 3452 }, { "epoch": 0.5283856159143075, "grad_norm": 2.231713283875013, "learning_rate": 1.7259896151892336e-05, "loss": 0.7737, "step": 3453 }, { "epoch": 0.5285386381025249, "grad_norm": 2.203632199838888, "learning_rate": 1.7258191668462948e-05, "loss": 0.7826, "step": 3454 }, { "epoch": 0.5286916602907421, "grad_norm": 2.2665233994578076, "learning_rate": 1.725648673927961e-05, "loss": 0.7484, "step": 3455 }, { "epoch": 0.5288446824789594, "grad_norm": 2.2420393983606353, "learning_rate": 1.725478136444704e-05, "loss": 0.8268, "step": 3456 }, { "epoch": 0.5289977046671768, "grad_norm": 2.6467064473764834, "learning_rate": 1.7253075544069964e-05, "loss": 0.8971, "step": 3457 }, { "epoch": 0.529150726855394, "grad_norm": 2.8179506973708786, "learning_rate": 1.7251369278253146e-05, "loss": 0.7285, "step": 3458 }, { "epoch": 0.5293037490436113, "grad_norm": 2.0736781024821838, "learning_rate": 1.724966256710137e-05, "loss": 0.7716, "step": 3459 }, { "epoch": 0.5294567712318287, "grad_norm": 2.3075858700032037, "learning_rate": 1.7247955410719454e-05, "loss": 0.7077, "step": 3460 }, { "epoch": 0.5296097934200459, "grad_norm": 2.325588149675756, "learning_rate": 1.7246247809212248e-05, "loss": 0.753, "step": 3461 }, { "epoch": 0.5297628156082632, "grad_norm": 2.3361579258165883, "learning_rate": 1.7244539762684607e-05, "loss": 0.8598, "step": 3462 }, { "epoch": 0.5299158377964804, "grad_norm": 2.105536512175094, "learning_rate": 1.7242831271241446e-05, "loss": 0.8154, "step": 3463 }, { "epoch": 0.5300688599846978, "grad_norm": 2.3968996850665794, "learning_rate": 1.7241122334987684e-05, "loss": 0.8799, "step": 3464 }, { "epoch": 0.5302218821729151, "grad_norm": 2.484649805300681, "learning_rate": 1.7239412954028268e-05, "loss": 0.8878, "step": 3465 }, { "epoch": 0.5303749043611323, "grad_norm": 2.41188080989505, "learning_rate": 1.723770312846818e-05, "loss": 0.8617, "step": 3466 }, { "epoch": 0.5305279265493497, "grad_norm": 2.5004806531008734, "learning_rate": 1.7235992858412432e-05, "loss": 0.8951, "step": 3467 }, { "epoch": 0.530680948737567, "grad_norm": 2.3251879051654063, "learning_rate": 1.7234282143966056e-05, "loss": 0.7946, "step": 3468 }, { "epoch": 0.5308339709257842, "grad_norm": 2.270779230163958, "learning_rate": 1.723257098523411e-05, "loss": 0.814, "step": 3469 }, { "epoch": 0.5309869931140015, "grad_norm": 2.318798194460353, "learning_rate": 1.7230859382321686e-05, "loss": 0.8017, "step": 3470 }, { "epoch": 0.5311400153022188, "grad_norm": 2.3221217836842274, "learning_rate": 1.7229147335333906e-05, "loss": 0.7764, "step": 3471 }, { "epoch": 0.5312930374904361, "grad_norm": 2.1502170801701377, "learning_rate": 1.72274348443759e-05, "loss": 0.7496, "step": 3472 }, { "epoch": 0.5314460596786534, "grad_norm": 2.2513830788602442, "learning_rate": 1.722572190955285e-05, "loss": 0.8413, "step": 3473 }, { "epoch": 0.5315990818668707, "grad_norm": 2.3165428770546392, "learning_rate": 1.722400853096995e-05, "loss": 0.7304, "step": 3474 }, { "epoch": 0.531752104055088, "grad_norm": 2.022193837277281, "learning_rate": 1.7222294708732423e-05, "loss": 0.8124, "step": 3475 }, { "epoch": 0.5319051262433053, "grad_norm": 2.240338201650682, "learning_rate": 1.7220580442945527e-05, "loss": 0.7717, "step": 3476 }, { "epoch": 0.5320581484315225, "grad_norm": 2.279369131283573, "learning_rate": 1.7218865733714538e-05, "loss": 0.8536, "step": 3477 }, { "epoch": 0.5322111706197399, "grad_norm": 2.684390602188598, "learning_rate": 1.7217150581144767e-05, "loss": 0.8194, "step": 3478 }, { "epoch": 0.5323641928079571, "grad_norm": 1.9666810860473458, "learning_rate": 1.721543498534154e-05, "loss": 0.7184, "step": 3479 }, { "epoch": 0.5325172149961744, "grad_norm": 2.1966123033514364, "learning_rate": 1.7213718946410227e-05, "loss": 0.6808, "step": 3480 }, { "epoch": 0.5326702371843918, "grad_norm": 2.839505940014808, "learning_rate": 1.721200246445621e-05, "loss": 0.7687, "step": 3481 }, { "epoch": 0.532823259372609, "grad_norm": 2.396567722242438, "learning_rate": 1.7210285539584913e-05, "loss": 0.7691, "step": 3482 }, { "epoch": 0.5329762815608263, "grad_norm": 2.4196703683338625, "learning_rate": 1.7208568171901768e-05, "loss": 0.7263, "step": 3483 }, { "epoch": 0.5331293037490437, "grad_norm": 2.3478722383711967, "learning_rate": 1.720685036151226e-05, "loss": 0.7947, "step": 3484 }, { "epoch": 0.5332823259372609, "grad_norm": 2.463647890976903, "learning_rate": 1.7205132108521868e-05, "loss": 0.776, "step": 3485 }, { "epoch": 0.5334353481254782, "grad_norm": 2.465606762967954, "learning_rate": 1.7203413413036132e-05, "loss": 0.8351, "step": 3486 }, { "epoch": 0.5335883703136954, "grad_norm": 2.4476074906053005, "learning_rate": 1.72016942751606e-05, "loss": 0.8652, "step": 3487 }, { "epoch": 0.5337413925019128, "grad_norm": 2.3371820769390217, "learning_rate": 1.7199974695000844e-05, "loss": 0.7619, "step": 3488 }, { "epoch": 0.5338944146901301, "grad_norm": 2.016240201556696, "learning_rate": 1.7198254672662482e-05, "loss": 0.7056, "step": 3489 }, { "epoch": 0.5340474368783473, "grad_norm": 2.1501317642039823, "learning_rate": 1.7196534208251138e-05, "loss": 0.7003, "step": 3490 }, { "epoch": 0.5342004590665647, "grad_norm": 2.428358228929068, "learning_rate": 1.7194813301872475e-05, "loss": 0.6957, "step": 3491 }, { "epoch": 0.534353481254782, "grad_norm": 2.3692668417947824, "learning_rate": 1.719309195363218e-05, "loss": 0.833, "step": 3492 }, { "epoch": 0.5345065034429992, "grad_norm": 2.2518561612504544, "learning_rate": 1.7191370163635968e-05, "loss": 0.8864, "step": 3493 }, { "epoch": 0.5346595256312165, "grad_norm": 2.1806620122914433, "learning_rate": 1.7189647931989584e-05, "loss": 0.7775, "step": 3494 }, { "epoch": 0.5348125478194338, "grad_norm": 2.1202465956947107, "learning_rate": 1.7187925258798795e-05, "loss": 0.6904, "step": 3495 }, { "epoch": 0.5349655700076511, "grad_norm": 2.293147683612584, "learning_rate": 1.7186202144169396e-05, "loss": 0.7109, "step": 3496 }, { "epoch": 0.5351185921958684, "grad_norm": 2.387009081930552, "learning_rate": 1.7184478588207208e-05, "loss": 0.8928, "step": 3497 }, { "epoch": 0.5352716143840857, "grad_norm": 2.036587619586427, "learning_rate": 1.7182754591018084e-05, "loss": 0.7308, "step": 3498 }, { "epoch": 0.535424636572303, "grad_norm": 2.23617394688373, "learning_rate": 1.7181030152707903e-05, "loss": 0.8534, "step": 3499 }, { "epoch": 0.5355776587605203, "grad_norm": 2.4902832725810464, "learning_rate": 1.7179305273382567e-05, "loss": 0.898, "step": 3500 }, { "epoch": 0.5357306809487375, "grad_norm": 2.2712921361457936, "learning_rate": 1.717757995314801e-05, "loss": 0.7424, "step": 3501 }, { "epoch": 0.5358837031369549, "grad_norm": 2.1869051131580823, "learning_rate": 1.717585419211019e-05, "loss": 0.8083, "step": 3502 }, { "epoch": 0.5360367253251721, "grad_norm": 2.0173761674315616, "learning_rate": 1.7174127990375092e-05, "loss": 0.7098, "step": 3503 }, { "epoch": 0.5361897475133894, "grad_norm": 2.7480203050102974, "learning_rate": 1.7172401348048726e-05, "loss": 0.8463, "step": 3504 }, { "epoch": 0.5363427697016068, "grad_norm": 2.2412224884608185, "learning_rate": 1.7170674265237136e-05, "loss": 0.7697, "step": 3505 }, { "epoch": 0.536495791889824, "grad_norm": 2.317814020727311, "learning_rate": 1.7168946742046388e-05, "loss": 0.7449, "step": 3506 }, { "epoch": 0.5366488140780413, "grad_norm": 2.60268449435134, "learning_rate": 1.7167218778582575e-05, "loss": 0.7636, "step": 3507 }, { "epoch": 0.5368018362662587, "grad_norm": 2.4034307976445, "learning_rate": 1.716549037495182e-05, "loss": 0.7584, "step": 3508 }, { "epoch": 0.5369548584544759, "grad_norm": 2.181778255040697, "learning_rate": 1.7163761531260267e-05, "loss": 0.8163, "step": 3509 }, { "epoch": 0.5371078806426932, "grad_norm": 2.144458168411783, "learning_rate": 1.7162032247614097e-05, "loss": 0.7615, "step": 3510 }, { "epoch": 0.5372609028309104, "grad_norm": 2.277975562378708, "learning_rate": 1.7160302524119506e-05, "loss": 0.7225, "step": 3511 }, { "epoch": 0.5374139250191278, "grad_norm": 2.1812185639721324, "learning_rate": 1.7158572360882723e-05, "loss": 0.7146, "step": 3512 }, { "epoch": 0.5375669472073451, "grad_norm": 2.487858008956128, "learning_rate": 1.7156841758010015e-05, "loss": 0.8443, "step": 3513 }, { "epoch": 0.5377199693955623, "grad_norm": 2.451809471203112, "learning_rate": 1.715511071560765e-05, "loss": 0.8188, "step": 3514 }, { "epoch": 0.5378729915837797, "grad_norm": 2.4906401899132127, "learning_rate": 1.7153379233781947e-05, "loss": 0.7324, "step": 3515 }, { "epoch": 0.538026013771997, "grad_norm": 2.3799176640369293, "learning_rate": 1.715164731263924e-05, "loss": 0.7876, "step": 3516 }, { "epoch": 0.5381790359602142, "grad_norm": 2.256999436428998, "learning_rate": 1.7149914952285896e-05, "loss": 0.704, "step": 3517 }, { "epoch": 0.5383320581484315, "grad_norm": 2.375566540112142, "learning_rate": 1.7148182152828302e-05, "loss": 0.7552, "step": 3518 }, { "epoch": 0.5384850803366488, "grad_norm": 2.396394674063, "learning_rate": 1.714644891437288e-05, "loss": 0.8285, "step": 3519 }, { "epoch": 0.5386381025248661, "grad_norm": 2.214449654913485, "learning_rate": 1.7144715237026073e-05, "loss": 0.8153, "step": 3520 }, { "epoch": 0.5387911247130834, "grad_norm": 2.440366421084005, "learning_rate": 1.714298112089435e-05, "loss": 0.7928, "step": 3521 }, { "epoch": 0.5389441469013007, "grad_norm": 2.038324486460608, "learning_rate": 1.7141246566084218e-05, "loss": 0.6482, "step": 3522 }, { "epoch": 0.539097169089518, "grad_norm": 2.718259058349394, "learning_rate": 1.7139511572702195e-05, "loss": 0.8081, "step": 3523 }, { "epoch": 0.5392501912777353, "grad_norm": 2.5128057012131255, "learning_rate": 1.7137776140854838e-05, "loss": 0.8716, "step": 3524 }, { "epoch": 0.5394032134659525, "grad_norm": 2.484955535252938, "learning_rate": 1.713604027064872e-05, "loss": 0.8966, "step": 3525 }, { "epoch": 0.5395562356541699, "grad_norm": 2.329587732262935, "learning_rate": 1.713430396219046e-05, "loss": 0.7159, "step": 3526 }, { "epoch": 0.5397092578423871, "grad_norm": 2.2577950281919956, "learning_rate": 1.713256721558668e-05, "loss": 0.7494, "step": 3527 }, { "epoch": 0.5398622800306044, "grad_norm": 2.2125935730221107, "learning_rate": 1.7130830030944042e-05, "loss": 0.743, "step": 3528 }, { "epoch": 0.5400153022188218, "grad_norm": 2.4576951883990286, "learning_rate": 1.712909240836924e-05, "loss": 0.7244, "step": 3529 }, { "epoch": 0.540168324407039, "grad_norm": 2.4385213443689078, "learning_rate": 1.7127354347968984e-05, "loss": 0.8586, "step": 3530 }, { "epoch": 0.5403213465952563, "grad_norm": 2.281079066866731, "learning_rate": 1.712561584985001e-05, "loss": 0.8375, "step": 3531 }, { "epoch": 0.5404743687834737, "grad_norm": 2.114376916900364, "learning_rate": 1.7123876914119097e-05, "loss": 0.7872, "step": 3532 }, { "epoch": 0.5406273909716909, "grad_norm": 2.3214355956058057, "learning_rate": 1.7122137540883034e-05, "loss": 0.8094, "step": 3533 }, { "epoch": 0.5407804131599082, "grad_norm": 2.2774113626957377, "learning_rate": 1.712039773024864e-05, "loss": 0.7912, "step": 3534 }, { "epoch": 0.5409334353481254, "grad_norm": 2.3514449429395685, "learning_rate": 1.7118657482322768e-05, "loss": 0.8295, "step": 3535 }, { "epoch": 0.5410864575363428, "grad_norm": 2.388457809727973, "learning_rate": 1.7116916797212288e-05, "loss": 0.8479, "step": 3536 }, { "epoch": 0.5412394797245601, "grad_norm": 2.4052542394521623, "learning_rate": 1.7115175675024107e-05, "loss": 0.8573, "step": 3537 }, { "epoch": 0.5413925019127773, "grad_norm": 2.281123645319344, "learning_rate": 1.7113434115865157e-05, "loss": 0.7488, "step": 3538 }, { "epoch": 0.5415455241009947, "grad_norm": 2.0479313985657197, "learning_rate": 1.7111692119842388e-05, "loss": 0.7098, "step": 3539 }, { "epoch": 0.541698546289212, "grad_norm": 2.503934028957903, "learning_rate": 1.7109949687062784e-05, "loss": 0.7766, "step": 3540 }, { "epoch": 0.5418515684774292, "grad_norm": 2.2920623120918076, "learning_rate": 1.7108206817633355e-05, "loss": 0.7058, "step": 3541 }, { "epoch": 0.5420045906656465, "grad_norm": 2.3190499618303715, "learning_rate": 1.7106463511661136e-05, "loss": 0.8249, "step": 3542 }, { "epoch": 0.5421576128538638, "grad_norm": 2.3367282852844093, "learning_rate": 1.7104719769253196e-05, "loss": 0.8424, "step": 3543 }, { "epoch": 0.5423106350420811, "grad_norm": 2.459648815663863, "learning_rate": 1.7102975590516622e-05, "loss": 0.7337, "step": 3544 }, { "epoch": 0.5424636572302984, "grad_norm": 2.1848741538774243, "learning_rate": 1.710123097555853e-05, "loss": 0.6866, "step": 3545 }, { "epoch": 0.5426166794185157, "grad_norm": 2.2894374221802813, "learning_rate": 1.709948592448606e-05, "loss": 1.0706, "step": 3546 }, { "epoch": 0.542769701606733, "grad_norm": 2.1592314348992354, "learning_rate": 1.7097740437406388e-05, "loss": 0.79, "step": 3547 }, { "epoch": 0.5429227237949502, "grad_norm": 2.4608898527446863, "learning_rate": 1.7095994514426704e-05, "loss": 0.8597, "step": 3548 }, { "epoch": 0.5430757459831675, "grad_norm": 2.3433110444464886, "learning_rate": 1.7094248155654245e-05, "loss": 0.8365, "step": 3549 }, { "epoch": 0.5432287681713849, "grad_norm": 2.15572265762958, "learning_rate": 1.709250136119625e-05, "loss": 0.7956, "step": 3550 }, { "epoch": 0.5433817903596021, "grad_norm": 2.3729171950028247, "learning_rate": 1.7090754131160002e-05, "loss": 0.816, "step": 3551 }, { "epoch": 0.5435348125478194, "grad_norm": 2.2130874362562074, "learning_rate": 1.7089006465652803e-05, "loss": 0.776, "step": 3552 }, { "epoch": 0.5436878347360368, "grad_norm": 2.0911755353057626, "learning_rate": 1.7087258364781983e-05, "loss": 0.7587, "step": 3553 }, { "epoch": 0.543840856924254, "grad_norm": 2.4086231529019995, "learning_rate": 1.7085509828654903e-05, "loss": 0.7506, "step": 3554 }, { "epoch": 0.5439938791124713, "grad_norm": 2.472931732242185, "learning_rate": 1.7083760857378944e-05, "loss": 0.8069, "step": 3555 }, { "epoch": 0.5441469013006885, "grad_norm": 2.2070865464878966, "learning_rate": 1.708201145106152e-05, "loss": 0.7785, "step": 3556 }, { "epoch": 0.5442999234889059, "grad_norm": 2.2713818289159096, "learning_rate": 1.708026160981007e-05, "loss": 0.7081, "step": 3557 }, { "epoch": 0.5444529456771232, "grad_norm": 2.092372395572879, "learning_rate": 1.7078511333732055e-05, "loss": 0.8032, "step": 3558 }, { "epoch": 0.5446059678653404, "grad_norm": 2.4299843117798483, "learning_rate": 1.7076760622934968e-05, "loss": 0.8316, "step": 3559 }, { "epoch": 0.5447589900535578, "grad_norm": 2.3366226179643514, "learning_rate": 1.7075009477526326e-05, "loss": 0.7562, "step": 3560 }, { "epoch": 0.5449120122417751, "grad_norm": 2.1968180719323662, "learning_rate": 1.7073257897613676e-05, "loss": 0.8084, "step": 3561 }, { "epoch": 0.5450650344299923, "grad_norm": 2.2329839134922658, "learning_rate": 1.7071505883304583e-05, "loss": 0.7897, "step": 3562 }, { "epoch": 0.5452180566182097, "grad_norm": 2.1279812197279533, "learning_rate": 1.706975343470665e-05, "loss": 0.7008, "step": 3563 }, { "epoch": 0.5453710788064269, "grad_norm": 2.457635225867449, "learning_rate": 1.706800055192751e-05, "loss": 0.8133, "step": 3564 }, { "epoch": 0.5455241009946442, "grad_norm": 2.5452787153874588, "learning_rate": 1.70662472350748e-05, "loss": 0.8601, "step": 3565 }, { "epoch": 0.5456771231828615, "grad_norm": 2.2487105216791896, "learning_rate": 1.7064493484256203e-05, "loss": 0.7402, "step": 3566 }, { "epoch": 0.5458301453710788, "grad_norm": 1.9842725323794457, "learning_rate": 1.7062739299579428e-05, "loss": 0.7388, "step": 3567 }, { "epoch": 0.5459831675592961, "grad_norm": 2.235134268947892, "learning_rate": 1.7060984681152198e-05, "loss": 0.7448, "step": 3568 }, { "epoch": 0.5461361897475134, "grad_norm": 2.3743457059092252, "learning_rate": 1.705922962908228e-05, "loss": 0.6938, "step": 3569 }, { "epoch": 0.5462892119357307, "grad_norm": 2.2343627337316208, "learning_rate": 1.7057474143477453e-05, "loss": 0.7406, "step": 3570 }, { "epoch": 0.546442234123948, "grad_norm": 2.0820331103238745, "learning_rate": 1.7055718224445536e-05, "loss": 0.7538, "step": 3571 }, { "epoch": 0.5465952563121652, "grad_norm": 2.45795322798732, "learning_rate": 1.705396187209435e-05, "loss": 0.8631, "step": 3572 }, { "epoch": 0.5467482785003825, "grad_norm": 2.0978156116712694, "learning_rate": 1.7052205086531777e-05, "loss": 0.7448, "step": 3573 }, { "epoch": 0.5469013006885999, "grad_norm": 2.8515911974508024, "learning_rate": 1.7050447867865703e-05, "loss": 0.7892, "step": 3574 }, { "epoch": 0.5470543228768171, "grad_norm": 2.4076632991660487, "learning_rate": 1.704869021620404e-05, "loss": 0.8385, "step": 3575 }, { "epoch": 0.5472073450650344, "grad_norm": 2.0957414053029826, "learning_rate": 1.7046932131654736e-05, "loss": 0.739, "step": 3576 }, { "epoch": 0.5473603672532518, "grad_norm": 2.5671839888269194, "learning_rate": 1.704517361432576e-05, "loss": 0.8024, "step": 3577 }, { "epoch": 0.547513389441469, "grad_norm": 2.3047558703626185, "learning_rate": 1.7043414664325117e-05, "loss": 0.7954, "step": 3578 }, { "epoch": 0.5476664116296863, "grad_norm": 2.3600811561008017, "learning_rate": 1.7041655281760824e-05, "loss": 0.7037, "step": 3579 }, { "epoch": 0.5478194338179035, "grad_norm": 2.3556484582162147, "learning_rate": 1.703989546674093e-05, "loss": 0.8624, "step": 3580 }, { "epoch": 0.5479724560061209, "grad_norm": 2.275299767646912, "learning_rate": 1.7038135219373518e-05, "loss": 0.8452, "step": 3581 }, { "epoch": 0.5481254781943382, "grad_norm": 2.4187029345883224, "learning_rate": 1.7036374539766688e-05, "loss": 0.7792, "step": 3582 }, { "epoch": 0.5482785003825554, "grad_norm": 2.206175619502996, "learning_rate": 1.703461342802857e-05, "loss": 0.8213, "step": 3583 }, { "epoch": 0.5484315225707728, "grad_norm": 2.6504692224635553, "learning_rate": 1.7032851884267323e-05, "loss": 0.8608, "step": 3584 }, { "epoch": 0.5485845447589901, "grad_norm": 4.131748832937652, "learning_rate": 1.703108990859113e-05, "loss": 0.8366, "step": 3585 }, { "epoch": 0.5487375669472073, "grad_norm": 2.2101905827124373, "learning_rate": 1.70293275011082e-05, "loss": 0.7445, "step": 3586 }, { "epoch": 0.5488905891354247, "grad_norm": 2.4726466589537517, "learning_rate": 1.7027564661926766e-05, "loss": 0.7431, "step": 3587 }, { "epoch": 0.5490436113236419, "grad_norm": 2.4049526878760585, "learning_rate": 1.7025801391155097e-05, "loss": 0.6889, "step": 3588 }, { "epoch": 0.5491966335118592, "grad_norm": 2.364932204717252, "learning_rate": 1.7024037688901477e-05, "loss": 0.7787, "step": 3589 }, { "epoch": 0.5493496557000765, "grad_norm": 2.1140087888729324, "learning_rate": 1.7022273555274228e-05, "loss": 0.7424, "step": 3590 }, { "epoch": 0.5495026778882938, "grad_norm": 2.3098634667372884, "learning_rate": 1.7020508990381685e-05, "loss": 0.7408, "step": 3591 }, { "epoch": 0.5496557000765111, "grad_norm": 2.4846746652950555, "learning_rate": 1.7018743994332222e-05, "loss": 0.9127, "step": 3592 }, { "epoch": 0.5498087222647284, "grad_norm": 2.475667889661541, "learning_rate": 1.7016978567234234e-05, "loss": 0.855, "step": 3593 }, { "epoch": 0.5499617444529457, "grad_norm": 2.391009217771575, "learning_rate": 1.7015212709196142e-05, "loss": 0.7626, "step": 3594 }, { "epoch": 0.550114766641163, "grad_norm": 2.7017637632302116, "learning_rate": 1.7013446420326393e-05, "loss": 0.7821, "step": 3595 }, { "epoch": 0.5502677888293802, "grad_norm": 2.302889942316845, "learning_rate": 1.701167970073346e-05, "loss": 0.7432, "step": 3596 }, { "epoch": 0.5504208110175975, "grad_norm": 2.0950840579929983, "learning_rate": 1.700991255052585e-05, "loss": 0.6559, "step": 3597 }, { "epoch": 0.5505738332058149, "grad_norm": 2.0143471965497652, "learning_rate": 1.7008144969812085e-05, "loss": 0.6854, "step": 3598 }, { "epoch": 0.5507268553940321, "grad_norm": 2.153960943563205, "learning_rate": 1.7006376958700724e-05, "loss": 0.722, "step": 3599 }, { "epoch": 0.5508798775822494, "grad_norm": 2.252566815376236, "learning_rate": 1.7004608517300343e-05, "loss": 0.7874, "step": 3600 }, { "epoch": 0.5510328997704668, "grad_norm": 2.306690084716881, "learning_rate": 1.700283964571955e-05, "loss": 0.6778, "step": 3601 }, { "epoch": 0.551185921958684, "grad_norm": 2.0446451450569083, "learning_rate": 1.7001070344066982e-05, "loss": 0.729, "step": 3602 }, { "epoch": 0.5513389441469013, "grad_norm": 2.249420963070831, "learning_rate": 1.6999300612451294e-05, "loss": 0.6717, "step": 3603 }, { "epoch": 0.5514919663351185, "grad_norm": 2.3610035001536853, "learning_rate": 1.6997530450981175e-05, "loss": 0.8365, "step": 3604 }, { "epoch": 0.5516449885233359, "grad_norm": 2.301984232252648, "learning_rate": 1.6995759859765332e-05, "loss": 0.828, "step": 3605 }, { "epoch": 0.5517980107115532, "grad_norm": 2.242175879504416, "learning_rate": 1.6993988838912514e-05, "loss": 0.7475, "step": 3606 }, { "epoch": 0.5519510328997704, "grad_norm": 2.3666687002372737, "learning_rate": 1.699221738853148e-05, "loss": 0.5838, "step": 3607 }, { "epoch": 0.5521040550879878, "grad_norm": 2.3339878325157417, "learning_rate": 1.6990445508731023e-05, "loss": 0.8548, "step": 3608 }, { "epoch": 0.5522570772762051, "grad_norm": 2.2915212295829672, "learning_rate": 1.6988673199619955e-05, "loss": 0.7516, "step": 3609 }, { "epoch": 0.5524100994644223, "grad_norm": 2.6222980931777418, "learning_rate": 1.698690046130713e-05, "loss": 0.7324, "step": 3610 }, { "epoch": 0.5525631216526397, "grad_norm": 2.591361000180004, "learning_rate": 1.6985127293901417e-05, "loss": 0.8047, "step": 3611 }, { "epoch": 0.5527161438408569, "grad_norm": 2.5309834401100066, "learning_rate": 1.698335369751171e-05, "loss": 0.7602, "step": 3612 }, { "epoch": 0.5528691660290742, "grad_norm": 2.332044775123289, "learning_rate": 1.6981579672246932e-05, "loss": 0.8728, "step": 3613 }, { "epoch": 0.5530221882172915, "grad_norm": 2.3418254681031527, "learning_rate": 1.6979805218216033e-05, "loss": 0.7037, "step": 3614 }, { "epoch": 0.5531752104055088, "grad_norm": 2.2946913600869783, "learning_rate": 1.697803033552799e-05, "loss": 0.6968, "step": 3615 }, { "epoch": 0.5533282325937261, "grad_norm": 2.345597481651466, "learning_rate": 1.697625502429181e-05, "loss": 0.8231, "step": 3616 }, { "epoch": 0.5534812547819434, "grad_norm": 2.158189225934225, "learning_rate": 1.6974479284616512e-05, "loss": 0.6546, "step": 3617 }, { "epoch": 0.5536342769701607, "grad_norm": 2.1114929224062187, "learning_rate": 1.697270311661116e-05, "loss": 0.7127, "step": 3618 }, { "epoch": 0.553787299158378, "grad_norm": 2.1596841291083453, "learning_rate": 1.6970926520384833e-05, "loss": 0.7419, "step": 3619 }, { "epoch": 0.5539403213465952, "grad_norm": 2.400828622459582, "learning_rate": 1.696914949604664e-05, "loss": 0.7161, "step": 3620 }, { "epoch": 0.5540933435348125, "grad_norm": 2.323163384739257, "learning_rate": 1.6967372043705707e-05, "loss": 0.8356, "step": 3621 }, { "epoch": 0.5542463657230299, "grad_norm": 2.4295012870436152, "learning_rate": 1.6965594163471202e-05, "loss": 0.8251, "step": 3622 }, { "epoch": 0.5543993879112471, "grad_norm": 2.2234601452756864, "learning_rate": 1.6963815855452312e-05, "loss": 0.6826, "step": 3623 }, { "epoch": 0.5545524100994644, "grad_norm": 1.9956514045453755, "learning_rate": 1.6962037119758247e-05, "loss": 0.7141, "step": 3624 }, { "epoch": 0.5547054322876818, "grad_norm": 2.3909418043326545, "learning_rate": 1.696025795649825e-05, "loss": 0.8332, "step": 3625 }, { "epoch": 0.554858454475899, "grad_norm": 2.453103266939776, "learning_rate": 1.6958478365781576e-05, "loss": 0.8726, "step": 3626 }, { "epoch": 0.5550114766641163, "grad_norm": 2.2746636360271855, "learning_rate": 1.695669834771753e-05, "loss": 0.8042, "step": 3627 }, { "epoch": 0.5551644988523335, "grad_norm": 2.2324321290351796, "learning_rate": 1.6954917902415423e-05, "loss": 0.7195, "step": 3628 }, { "epoch": 0.5553175210405509, "grad_norm": 2.2940189638814728, "learning_rate": 1.6953137029984597e-05, "loss": 0.7609, "step": 3629 }, { "epoch": 0.5554705432287682, "grad_norm": 2.3775879354384677, "learning_rate": 1.6951355730534426e-05, "loss": 0.6981, "step": 3630 }, { "epoch": 0.5556235654169854, "grad_norm": 2.201686255606368, "learning_rate": 1.6949574004174304e-05, "loss": 0.6593, "step": 3631 }, { "epoch": 0.5557765876052028, "grad_norm": 2.1482634948848554, "learning_rate": 1.694779185101366e-05, "loss": 0.7824, "step": 3632 }, { "epoch": 0.5559296097934201, "grad_norm": 2.173950406005488, "learning_rate": 1.6946009271161936e-05, "loss": 0.5722, "step": 3633 }, { "epoch": 0.5560826319816373, "grad_norm": 2.1168563259507946, "learning_rate": 1.6944226264728612e-05, "loss": 0.703, "step": 3634 }, { "epoch": 0.5562356541698547, "grad_norm": 2.267913969094195, "learning_rate": 1.6942442831823184e-05, "loss": 0.8432, "step": 3635 }, { "epoch": 0.5563886763580719, "grad_norm": 2.6212417618753983, "learning_rate": 1.6940658972555184e-05, "loss": 0.8025, "step": 3636 }, { "epoch": 0.5565416985462892, "grad_norm": 2.5846678141954467, "learning_rate": 1.6938874687034163e-05, "loss": 0.8314, "step": 3637 }, { "epoch": 0.5566947207345065, "grad_norm": 2.334021760479528, "learning_rate": 1.6937089975369705e-05, "loss": 0.6797, "step": 3638 }, { "epoch": 0.5568477429227238, "grad_norm": 2.107816213370517, "learning_rate": 1.693530483767141e-05, "loss": 0.6848, "step": 3639 }, { "epoch": 0.5570007651109411, "grad_norm": 2.356072011721238, "learning_rate": 1.693351927404892e-05, "loss": 0.7297, "step": 3640 }, { "epoch": 0.5571537872991584, "grad_norm": 2.169257056229893, "learning_rate": 1.6931733284611882e-05, "loss": 0.8036, "step": 3641 }, { "epoch": 0.5573068094873757, "grad_norm": 2.2003860483288302, "learning_rate": 1.6929946869469987e-05, "loss": 0.6347, "step": 3642 }, { "epoch": 0.557459831675593, "grad_norm": 2.170921678736145, "learning_rate": 1.6928160028732942e-05, "loss": 0.6982, "step": 3643 }, { "epoch": 0.5576128538638102, "grad_norm": 2.29010650862317, "learning_rate": 1.6926372762510492e-05, "loss": 0.8291, "step": 3644 }, { "epoch": 0.5577658760520275, "grad_norm": 2.4638778340052565, "learning_rate": 1.692458507091239e-05, "loss": 0.7613, "step": 3645 }, { "epoch": 0.5579188982402449, "grad_norm": 2.054167992139388, "learning_rate": 1.6922796954048434e-05, "loss": 0.8133, "step": 3646 }, { "epoch": 0.5580719204284621, "grad_norm": 2.7024998915374643, "learning_rate": 1.6921008412028435e-05, "loss": 0.8554, "step": 3647 }, { "epoch": 0.5582249426166794, "grad_norm": 2.797028845762761, "learning_rate": 1.691921944496223e-05, "loss": 0.8489, "step": 3648 }, { "epoch": 0.5583779648048968, "grad_norm": 2.1668006488701166, "learning_rate": 1.6917430052959692e-05, "loss": 0.7914, "step": 3649 }, { "epoch": 0.558530986993114, "grad_norm": 2.17202935119517, "learning_rate": 1.6915640236130716e-05, "loss": 0.6803, "step": 3650 }, { "epoch": 0.5586840091813313, "grad_norm": 2.2835949452400257, "learning_rate": 1.6913849994585217e-05, "loss": 0.7817, "step": 3651 }, { "epoch": 0.5588370313695485, "grad_norm": 2.3830929379834656, "learning_rate": 1.6912059328433144e-05, "loss": 0.7796, "step": 3652 }, { "epoch": 0.5589900535577659, "grad_norm": 2.3925932080902808, "learning_rate": 1.6910268237784465e-05, "loss": 0.8567, "step": 3653 }, { "epoch": 0.5591430757459832, "grad_norm": 2.0860786207614894, "learning_rate": 1.6908476722749182e-05, "loss": 0.7333, "step": 3654 }, { "epoch": 0.5592960979342004, "grad_norm": 2.307073599086852, "learning_rate": 1.6906684783437315e-05, "loss": 0.8835, "step": 3655 }, { "epoch": 0.5594491201224178, "grad_norm": 2.552393727596661, "learning_rate": 1.6904892419958918e-05, "loss": 0.832, "step": 3656 }, { "epoch": 0.559602142310635, "grad_norm": 2.1716406728706112, "learning_rate": 1.6903099632424067e-05, "loss": 0.655, "step": 3657 }, { "epoch": 0.5597551644988523, "grad_norm": 2.431452976880131, "learning_rate": 1.6901306420942862e-05, "loss": 0.812, "step": 3658 }, { "epoch": 0.5599081866870697, "grad_norm": 2.2599738041832333, "learning_rate": 1.689951278562543e-05, "loss": 0.7497, "step": 3659 }, { "epoch": 0.5600612088752869, "grad_norm": 2.3432851680364095, "learning_rate": 1.6897718726581926e-05, "loss": 0.7247, "step": 3660 }, { "epoch": 0.5602142310635042, "grad_norm": 2.385858830011283, "learning_rate": 1.6895924243922535e-05, "loss": 0.8256, "step": 3661 }, { "epoch": 0.5603672532517215, "grad_norm": 2.2732882617133487, "learning_rate": 1.6894129337757458e-05, "loss": 0.8056, "step": 3662 }, { "epoch": 0.5605202754399388, "grad_norm": 2.3518883584912156, "learning_rate": 1.6892334008196925e-05, "loss": 0.7344, "step": 3663 }, { "epoch": 0.5606732976281561, "grad_norm": 2.3340522250983273, "learning_rate": 1.68905382553512e-05, "loss": 0.8086, "step": 3664 }, { "epoch": 0.5608263198163733, "grad_norm": 2.256805069227207, "learning_rate": 1.6888742079330566e-05, "loss": 0.7789, "step": 3665 }, { "epoch": 0.5609793420045907, "grad_norm": 2.3041015030287415, "learning_rate": 1.688694548024533e-05, "loss": 0.749, "step": 3666 }, { "epoch": 0.561132364192808, "grad_norm": 2.601621539475915, "learning_rate": 1.688514845820583e-05, "loss": 0.8205, "step": 3667 }, { "epoch": 0.5612853863810252, "grad_norm": 2.4104490877496945, "learning_rate": 1.6883351013322427e-05, "loss": 0.9014, "step": 3668 }, { "epoch": 0.5614384085692425, "grad_norm": 2.1468517678220076, "learning_rate": 1.6881553145705512e-05, "loss": 0.7408, "step": 3669 }, { "epoch": 0.5615914307574599, "grad_norm": 2.0830615950634783, "learning_rate": 1.68797548554655e-05, "loss": 0.6995, "step": 3670 }, { "epoch": 0.5617444529456771, "grad_norm": 2.6874796790277204, "learning_rate": 1.6877956142712827e-05, "loss": 0.7714, "step": 3671 }, { "epoch": 0.5618974751338944, "grad_norm": 2.4850035657522955, "learning_rate": 1.6876157007557958e-05, "loss": 0.9281, "step": 3672 }, { "epoch": 0.5620504973221117, "grad_norm": 2.273469641065428, "learning_rate": 1.6874357450111392e-05, "loss": 0.7668, "step": 3673 }, { "epoch": 0.562203519510329, "grad_norm": 2.2483399413604674, "learning_rate": 1.6872557470483638e-05, "loss": 0.689, "step": 3674 }, { "epoch": 0.5623565416985463, "grad_norm": 2.1512441247663845, "learning_rate": 1.687075706878525e-05, "loss": 0.7961, "step": 3675 }, { "epoch": 0.5625095638867635, "grad_norm": 2.348647850963959, "learning_rate": 1.6868956245126785e-05, "loss": 0.8583, "step": 3676 }, { "epoch": 0.5626625860749809, "grad_norm": 2.4585945763130486, "learning_rate": 1.686715499961885e-05, "loss": 0.7551, "step": 3677 }, { "epoch": 0.5628156082631982, "grad_norm": 2.227822096524331, "learning_rate": 1.686535333237206e-05, "loss": 0.7732, "step": 3678 }, { "epoch": 0.5629686304514154, "grad_norm": 2.241153131196082, "learning_rate": 1.6863551243497064e-05, "loss": 0.7789, "step": 3679 }, { "epoch": 0.5631216526396328, "grad_norm": 2.493229819494121, "learning_rate": 1.686174873310454e-05, "loss": 0.7737, "step": 3680 }, { "epoch": 0.56327467482785, "grad_norm": 2.204665508561256, "learning_rate": 1.6859945801305176e-05, "loss": 0.7376, "step": 3681 }, { "epoch": 0.5634276970160673, "grad_norm": 2.46220972958079, "learning_rate": 1.685814244820971e-05, "loss": 0.7946, "step": 3682 }, { "epoch": 0.5635807192042847, "grad_norm": 2.145178057791112, "learning_rate": 1.6856338673928883e-05, "loss": 0.686, "step": 3683 }, { "epoch": 0.5637337413925019, "grad_norm": 2.4499506242368474, "learning_rate": 1.6854534478573475e-05, "loss": 0.842, "step": 3684 }, { "epoch": 0.5638867635807192, "grad_norm": 2.056117094582968, "learning_rate": 1.6852729862254297e-05, "loss": 0.7311, "step": 3685 }, { "epoch": 0.5640397857689365, "grad_norm": 2.6838442750214786, "learning_rate": 1.6850924825082164e-05, "loss": 0.9012, "step": 3686 }, { "epoch": 0.5641928079571538, "grad_norm": 2.3160834319934858, "learning_rate": 1.6849119367167935e-05, "loss": 0.758, "step": 3687 }, { "epoch": 0.5643458301453711, "grad_norm": 2.3640101619510423, "learning_rate": 1.6847313488622495e-05, "loss": 0.7606, "step": 3688 }, { "epoch": 0.5644988523335883, "grad_norm": 2.121683684858718, "learning_rate": 1.684550718955675e-05, "loss": 0.6647, "step": 3689 }, { "epoch": 0.5646518745218057, "grad_norm": 2.2062012781293823, "learning_rate": 1.6843700470081625e-05, "loss": 0.8113, "step": 3690 }, { "epoch": 0.564804896710023, "grad_norm": 2.3360486325838505, "learning_rate": 1.684189333030808e-05, "loss": 0.6079, "step": 3691 }, { "epoch": 0.5649579188982402, "grad_norm": 2.197760459122702, "learning_rate": 1.68400857703471e-05, "loss": 0.6825, "step": 3692 }, { "epoch": 0.5651109410864575, "grad_norm": 2.1093971294742326, "learning_rate": 1.6838277790309697e-05, "loss": 0.7355, "step": 3693 }, { "epoch": 0.5652639632746749, "grad_norm": 2.3687841166774524, "learning_rate": 1.6836469390306903e-05, "loss": 0.7743, "step": 3694 }, { "epoch": 0.5654169854628921, "grad_norm": 3.161887730066366, "learning_rate": 1.683466057044978e-05, "loss": 0.827, "step": 3695 }, { "epoch": 0.5655700076511094, "grad_norm": 2.216397568711454, "learning_rate": 1.683285133084941e-05, "loss": 0.8549, "step": 3696 }, { "epoch": 0.5657230298393267, "grad_norm": 1.9890486097667865, "learning_rate": 1.6831041671616914e-05, "loss": 0.7672, "step": 3697 }, { "epoch": 0.565876052027544, "grad_norm": 2.493183723503674, "learning_rate": 1.6829231592863425e-05, "loss": 0.8644, "step": 3698 }, { "epoch": 0.5660290742157613, "grad_norm": 2.2253445860436685, "learning_rate": 1.6827421094700108e-05, "loss": 0.7744, "step": 3699 }, { "epoch": 0.5661820964039785, "grad_norm": 2.5572294147324848, "learning_rate": 1.682561017723815e-05, "loss": 0.7337, "step": 3700 }, { "epoch": 0.5663351185921959, "grad_norm": 2.3673349447586554, "learning_rate": 1.682379884058877e-05, "loss": 0.7592, "step": 3701 }, { "epoch": 0.5664881407804132, "grad_norm": 2.231170609581366, "learning_rate": 1.6821987084863208e-05, "loss": 0.7259, "step": 3702 }, { "epoch": 0.5666411629686304, "grad_norm": 2.312622201395522, "learning_rate": 1.6820174910172732e-05, "loss": 0.7672, "step": 3703 }, { "epoch": 0.5667941851568478, "grad_norm": 2.5794283462785215, "learning_rate": 1.6818362316628635e-05, "loss": 0.7342, "step": 3704 }, { "epoch": 0.566947207345065, "grad_norm": 2.2887391825650525, "learning_rate": 1.6816549304342233e-05, "loss": 0.7277, "step": 3705 }, { "epoch": 0.5671002295332823, "grad_norm": 2.3369358739869197, "learning_rate": 1.6814735873424874e-05, "loss": 0.7064, "step": 3706 }, { "epoch": 0.5672532517214997, "grad_norm": 2.1653393567600103, "learning_rate": 1.6812922023987922e-05, "loss": 0.837, "step": 3707 }, { "epoch": 0.5674062739097169, "grad_norm": 2.3845278093061384, "learning_rate": 1.6811107756142776e-05, "loss": 0.8588, "step": 3708 }, { "epoch": 0.5675592960979342, "grad_norm": 2.495972598753815, "learning_rate": 1.6809293070000862e-05, "loss": 0.7833, "step": 3709 }, { "epoch": 0.5677123182861515, "grad_norm": 2.247267803617609, "learning_rate": 1.680747796567362e-05, "loss": 0.9386, "step": 3710 }, { "epoch": 0.5678653404743688, "grad_norm": 2.5184558160832107, "learning_rate": 1.6805662443272525e-05, "loss": 0.8083, "step": 3711 }, { "epoch": 0.5680183626625861, "grad_norm": 2.165389680357376, "learning_rate": 1.6803846502909074e-05, "loss": 0.7876, "step": 3712 }, { "epoch": 0.5681713848508033, "grad_norm": 2.370201233814579, "learning_rate": 1.6802030144694798e-05, "loss": 0.7525, "step": 3713 }, { "epoch": 0.5683244070390207, "grad_norm": 2.4514170401052175, "learning_rate": 1.6800213368741236e-05, "loss": 0.816, "step": 3714 }, { "epoch": 0.568477429227238, "grad_norm": 2.2577834479368963, "learning_rate": 1.679839617515997e-05, "loss": 0.7959, "step": 3715 }, { "epoch": 0.5686304514154552, "grad_norm": 2.5236928956082605, "learning_rate": 1.67965785640626e-05, "loss": 0.7833, "step": 3716 }, { "epoch": 0.5687834736036725, "grad_norm": 2.188326961484387, "learning_rate": 1.679476053556075e-05, "loss": 0.7064, "step": 3717 }, { "epoch": 0.5689364957918899, "grad_norm": 2.3822901414350306, "learning_rate": 1.6792942089766076e-05, "loss": 0.8851, "step": 3718 }, { "epoch": 0.5690895179801071, "grad_norm": 2.1457955667343134, "learning_rate": 1.6791123226790255e-05, "loss": 0.7607, "step": 3719 }, { "epoch": 0.5692425401683244, "grad_norm": 2.323996375369015, "learning_rate": 1.6789303946744985e-05, "loss": 0.7703, "step": 3720 }, { "epoch": 0.5693955623565417, "grad_norm": 2.3656552591801523, "learning_rate": 1.6787484249742004e-05, "loss": 0.768, "step": 3721 }, { "epoch": 0.569548584544759, "grad_norm": 2.3223483884094143, "learning_rate": 1.678566413589306e-05, "loss": 0.7593, "step": 3722 }, { "epoch": 0.5697016067329763, "grad_norm": 2.2639870672318296, "learning_rate": 1.678384360530994e-05, "loss": 0.733, "step": 3723 }, { "epoch": 0.5698546289211935, "grad_norm": 2.3355673048906254, "learning_rate": 1.6782022658104444e-05, "loss": 0.7424, "step": 3724 }, { "epoch": 0.5700076511094109, "grad_norm": 2.218856200972419, "learning_rate": 1.67802012943884e-05, "loss": 0.769, "step": 3725 }, { "epoch": 0.5701606732976282, "grad_norm": 2.6124220750851506, "learning_rate": 1.6778379514273677e-05, "loss": 0.8804, "step": 3726 }, { "epoch": 0.5703136954858454, "grad_norm": 2.3903567269634087, "learning_rate": 1.6776557317872146e-05, "loss": 0.7813, "step": 3727 }, { "epoch": 0.5704667176740628, "grad_norm": 2.5179833679939247, "learning_rate": 1.6774734705295723e-05, "loss": 0.7096, "step": 3728 }, { "epoch": 0.57061973986228, "grad_norm": 2.1922598523160928, "learning_rate": 1.677291167665634e-05, "loss": 0.7195, "step": 3729 }, { "epoch": 0.5707727620504973, "grad_norm": 2.4942133453405333, "learning_rate": 1.677108823206595e-05, "loss": 0.7428, "step": 3730 }, { "epoch": 0.5709257842387147, "grad_norm": 2.2384937500275104, "learning_rate": 1.6769264371636546e-05, "loss": 0.8616, "step": 3731 }, { "epoch": 0.5710788064269319, "grad_norm": 2.2191384349159002, "learning_rate": 1.6767440095480136e-05, "loss": 0.7403, "step": 3732 }, { "epoch": 0.5712318286151492, "grad_norm": 2.380505958844727, "learning_rate": 1.6765615403708756e-05, "loss": 0.748, "step": 3733 }, { "epoch": 0.5713848508033665, "grad_norm": 2.2650779038130793, "learning_rate": 1.6763790296434463e-05, "loss": 0.7306, "step": 3734 }, { "epoch": 0.5715378729915838, "grad_norm": 2.6913536745347866, "learning_rate": 1.6761964773769352e-05, "loss": 0.8774, "step": 3735 }, { "epoch": 0.5716908951798011, "grad_norm": 2.079473361927895, "learning_rate": 1.676013883582553e-05, "loss": 0.6435, "step": 3736 }, { "epoch": 0.5718439173680183, "grad_norm": 2.236787333970397, "learning_rate": 1.6758312482715137e-05, "loss": 0.8045, "step": 3737 }, { "epoch": 0.5719969395562357, "grad_norm": 2.1951274280494206, "learning_rate": 1.6756485714550333e-05, "loss": 0.7215, "step": 3738 }, { "epoch": 0.572149961744453, "grad_norm": 2.2622052474326733, "learning_rate": 1.6754658531443314e-05, "loss": 0.703, "step": 3739 }, { "epoch": 0.5723029839326702, "grad_norm": 2.3735375982121707, "learning_rate": 1.6752830933506285e-05, "loss": 0.7512, "step": 3740 }, { "epoch": 0.5724560061208875, "grad_norm": 2.0811855879934225, "learning_rate": 1.6751002920851494e-05, "loss": 0.6298, "step": 3741 }, { "epoch": 0.5726090283091049, "grad_norm": 2.5859503146517446, "learning_rate": 1.6749174493591205e-05, "loss": 0.7602, "step": 3742 }, { "epoch": 0.5727620504973221, "grad_norm": 2.1294854580052487, "learning_rate": 1.6747345651837706e-05, "loss": 0.799, "step": 3743 }, { "epoch": 0.5729150726855394, "grad_norm": 2.2886074445311517, "learning_rate": 1.6745516395703315e-05, "loss": 0.8139, "step": 3744 }, { "epoch": 0.5730680948737567, "grad_norm": 2.6893542581228806, "learning_rate": 1.6743686725300373e-05, "loss": 0.7434, "step": 3745 }, { "epoch": 0.573221117061974, "grad_norm": 2.3216860488738424, "learning_rate": 1.6741856640741247e-05, "loss": 0.6948, "step": 3746 }, { "epoch": 0.5733741392501913, "grad_norm": 2.2001897317732615, "learning_rate": 1.6740026142138332e-05, "loss": 0.6766, "step": 3747 }, { "epoch": 0.5735271614384085, "grad_norm": 2.2336227878098693, "learning_rate": 1.6738195229604043e-05, "loss": 0.8209, "step": 3748 }, { "epoch": 0.5736801836266259, "grad_norm": 2.240603429890706, "learning_rate": 1.673636390325083e-05, "loss": 0.7174, "step": 3749 }, { "epoch": 0.5738332058148432, "grad_norm": 2.3651344356855035, "learning_rate": 1.673453216319115e-05, "loss": 0.791, "step": 3750 }, { "epoch": 0.5739862280030604, "grad_norm": 2.557825145409085, "learning_rate": 1.6732700009537506e-05, "loss": 0.8281, "step": 3751 }, { "epoch": 0.5741392501912778, "grad_norm": 2.2939591266531587, "learning_rate": 1.6730867442402417e-05, "loss": 0.7533, "step": 3752 }, { "epoch": 0.574292272379495, "grad_norm": 2.4369272889263467, "learning_rate": 1.6729034461898428e-05, "loss": 0.7564, "step": 3753 }, { "epoch": 0.5744452945677123, "grad_norm": 2.383742765723668, "learning_rate": 1.6727201068138106e-05, "loss": 0.8455, "step": 3754 }, { "epoch": 0.5745983167559297, "grad_norm": 2.1235204798335325, "learning_rate": 1.6725367261234052e-05, "loss": 0.7219, "step": 3755 }, { "epoch": 0.5747513389441469, "grad_norm": 2.0759737406177035, "learning_rate": 1.6723533041298884e-05, "loss": 0.7086, "step": 3756 }, { "epoch": 0.5749043611323642, "grad_norm": 2.1343424495745587, "learning_rate": 1.6721698408445248e-05, "loss": 0.8257, "step": 3757 }, { "epoch": 0.5750573833205815, "grad_norm": 2.0830999607795135, "learning_rate": 1.671986336278582e-05, "loss": 0.7539, "step": 3758 }, { "epoch": 0.5752104055087988, "grad_norm": 2.3324609080288066, "learning_rate": 1.6718027904433292e-05, "loss": 0.8393, "step": 3759 }, { "epoch": 0.5753634276970161, "grad_norm": 2.9857624065747403, "learning_rate": 1.671619203350039e-05, "loss": 0.8469, "step": 3760 }, { "epoch": 0.5755164498852333, "grad_norm": 2.3340876823270857, "learning_rate": 1.6714355750099863e-05, "loss": 0.7146, "step": 3761 }, { "epoch": 0.5756694720734506, "grad_norm": 2.296826476694342, "learning_rate": 1.6712519054344477e-05, "loss": 0.8085, "step": 3762 }, { "epoch": 0.575822494261668, "grad_norm": 2.398681606627602, "learning_rate": 1.6710681946347046e-05, "loss": 0.8752, "step": 3763 }, { "epoch": 0.5759755164498852, "grad_norm": 2.1659581982539042, "learning_rate": 1.6708844426220378e-05, "loss": 0.7433, "step": 3764 }, { "epoch": 0.5761285386381025, "grad_norm": 2.2458337659004317, "learning_rate": 1.6707006494077328e-05, "loss": 0.7098, "step": 3765 }, { "epoch": 0.5762815608263198, "grad_norm": 2.2737668614456337, "learning_rate": 1.670516815003077e-05, "loss": 0.7792, "step": 3766 }, { "epoch": 0.5764345830145371, "grad_norm": 2.305720566758425, "learning_rate": 1.6703329394193612e-05, "loss": 0.6744, "step": 3767 }, { "epoch": 0.5765876052027544, "grad_norm": 2.557917734239461, "learning_rate": 1.6701490226678768e-05, "loss": 0.6602, "step": 3768 }, { "epoch": 0.5767406273909716, "grad_norm": 2.22313464861298, "learning_rate": 1.6699650647599194e-05, "loss": 0.7318, "step": 3769 }, { "epoch": 0.576893649579189, "grad_norm": 2.3570935722006143, "learning_rate": 1.669781065706786e-05, "loss": 0.6865, "step": 3770 }, { "epoch": 0.5770466717674063, "grad_norm": 2.328764118860341, "learning_rate": 1.6695970255197775e-05, "loss": 0.8174, "step": 3771 }, { "epoch": 0.5771996939556235, "grad_norm": 2.206086498235264, "learning_rate": 1.669412944210196e-05, "loss": 0.7195, "step": 3772 }, { "epoch": 0.5773527161438409, "grad_norm": 2.4574715151597206, "learning_rate": 1.6692288217893473e-05, "loss": 0.6601, "step": 3773 }, { "epoch": 0.5775057383320581, "grad_norm": 2.5554333693809337, "learning_rate": 1.6690446582685384e-05, "loss": 0.9636, "step": 3774 }, { "epoch": 0.5776587605202754, "grad_norm": 2.0830553360852653, "learning_rate": 1.6688604536590797e-05, "loss": 0.688, "step": 3775 }, { "epoch": 0.5778117827084928, "grad_norm": 2.1131326036234825, "learning_rate": 1.668676207972284e-05, "loss": 0.7221, "step": 3776 }, { "epoch": 0.57796480489671, "grad_norm": 2.055677955743591, "learning_rate": 1.6684919212194664e-05, "loss": 0.6651, "step": 3777 }, { "epoch": 0.5781178270849273, "grad_norm": 2.2701708775009135, "learning_rate": 1.6683075934119448e-05, "loss": 0.7376, "step": 3778 }, { "epoch": 0.5782708492731446, "grad_norm": 2.0336407250984827, "learning_rate": 1.6681232245610393e-05, "loss": 0.76, "step": 3779 }, { "epoch": 0.5784238714613619, "grad_norm": 2.1902709950009633, "learning_rate": 1.6679388146780732e-05, "loss": 0.7113, "step": 3780 }, { "epoch": 0.5785768936495792, "grad_norm": 2.283177953498535, "learning_rate": 1.667754363774371e-05, "loss": 0.8212, "step": 3781 }, { "epoch": 0.5787299158377964, "grad_norm": 2.5358371176198005, "learning_rate": 1.6675698718612613e-05, "loss": 0.7745, "step": 3782 }, { "epoch": 0.5788829380260138, "grad_norm": 2.4035073241578404, "learning_rate": 1.6673853389500746e-05, "loss": 0.7967, "step": 3783 }, { "epoch": 0.5790359602142311, "grad_norm": 1.9008232222332522, "learning_rate": 1.667200765052143e-05, "loss": 0.5186, "step": 3784 }, { "epoch": 0.5791889824024483, "grad_norm": 2.660572345490886, "learning_rate": 1.6670161501788025e-05, "loss": 0.7472, "step": 3785 }, { "epoch": 0.5793420045906656, "grad_norm": 2.3119643292610608, "learning_rate": 1.666831494341391e-05, "loss": 0.7326, "step": 3786 }, { "epoch": 0.579495026778883, "grad_norm": 2.2646005923282346, "learning_rate": 1.666646797551248e-05, "loss": 0.7679, "step": 3787 }, { "epoch": 0.5796480489671002, "grad_norm": 2.158895112775971, "learning_rate": 1.666462059819718e-05, "loss": 0.7638, "step": 3788 }, { "epoch": 0.5798010711553175, "grad_norm": 2.155066404690859, "learning_rate": 1.6662772811581453e-05, "loss": 0.6579, "step": 3789 }, { "epoch": 0.5799540933435348, "grad_norm": 2.524080120158428, "learning_rate": 1.6660924615778784e-05, "loss": 0.7391, "step": 3790 }, { "epoch": 0.5801071155317521, "grad_norm": 2.3536130897830856, "learning_rate": 1.6659076010902678e-05, "loss": 0.7922, "step": 3791 }, { "epoch": 0.5802601377199694, "grad_norm": 2.139282136745696, "learning_rate": 1.665722699706666e-05, "loss": 0.7086, "step": 3792 }, { "epoch": 0.5804131599081866, "grad_norm": 2.0993206873081407, "learning_rate": 1.6655377574384294e-05, "loss": 0.6831, "step": 3793 }, { "epoch": 0.580566182096404, "grad_norm": 2.0100115117297492, "learning_rate": 1.6653527742969152e-05, "loss": 0.7001, "step": 3794 }, { "epoch": 0.5807192042846213, "grad_norm": 2.3782783324435783, "learning_rate": 1.665167750293484e-05, "loss": 0.7298, "step": 3795 }, { "epoch": 0.5808722264728385, "grad_norm": 2.040780849933868, "learning_rate": 1.6649826854394997e-05, "loss": 0.7327, "step": 3796 }, { "epoch": 0.5810252486610559, "grad_norm": 2.5334375804404625, "learning_rate": 1.664797579746327e-05, "loss": 0.6432, "step": 3797 }, { "epoch": 0.5811782708492731, "grad_norm": 2.2545821441478933, "learning_rate": 1.664612433225334e-05, "loss": 0.7446, "step": 3798 }, { "epoch": 0.5813312930374904, "grad_norm": 2.8266389872037845, "learning_rate": 1.664427245887892e-05, "loss": 0.8615, "step": 3799 }, { "epoch": 0.5814843152257078, "grad_norm": 2.357933150397747, "learning_rate": 1.6642420177453728e-05, "loss": 0.7502, "step": 3800 }, { "epoch": 0.581637337413925, "grad_norm": 2.4860458336444364, "learning_rate": 1.6640567488091536e-05, "loss": 0.7914, "step": 3801 }, { "epoch": 0.5817903596021423, "grad_norm": 2.5167304694736052, "learning_rate": 1.663871439090611e-05, "loss": 0.7699, "step": 3802 }, { "epoch": 0.5819433817903596, "grad_norm": 2.336138556851833, "learning_rate": 1.6636860886011266e-05, "loss": 0.7217, "step": 3803 }, { "epoch": 0.5820964039785769, "grad_norm": 2.3654993443453485, "learning_rate": 1.663500697352083e-05, "loss": 0.738, "step": 3804 }, { "epoch": 0.5822494261667942, "grad_norm": 2.4112699878549546, "learning_rate": 1.663315265354866e-05, "loss": 0.7535, "step": 3805 }, { "epoch": 0.5824024483550114, "grad_norm": 2.381433431863148, "learning_rate": 1.6631297926208637e-05, "loss": 0.8315, "step": 3806 }, { "epoch": 0.5825554705432288, "grad_norm": 2.540843627525302, "learning_rate": 1.6629442791614667e-05, "loss": 0.8621, "step": 3807 }, { "epoch": 0.5827084927314461, "grad_norm": 2.301553310120931, "learning_rate": 1.6627587249880677e-05, "loss": 0.721, "step": 3808 }, { "epoch": 0.5828615149196633, "grad_norm": 2.3190169251043353, "learning_rate": 1.662573130112063e-05, "loss": 0.6618, "step": 3809 }, { "epoch": 0.5830145371078806, "grad_norm": 2.3010873402470033, "learning_rate": 1.6623874945448503e-05, "loss": 0.8682, "step": 3810 }, { "epoch": 0.583167559296098, "grad_norm": 2.1375246030186035, "learning_rate": 1.6622018182978304e-05, "loss": 0.7486, "step": 3811 }, { "epoch": 0.5833205814843152, "grad_norm": 2.293568821408462, "learning_rate": 1.662016101382406e-05, "loss": 0.7397, "step": 3812 }, { "epoch": 0.5834736036725325, "grad_norm": 2.417223850861851, "learning_rate": 1.6618303438099834e-05, "loss": 0.7969, "step": 3813 }, { "epoch": 0.5836266258607498, "grad_norm": 2.419752913361654, "learning_rate": 1.6616445455919704e-05, "loss": 0.7874, "step": 3814 }, { "epoch": 0.5837796480489671, "grad_norm": 2.297683203684462, "learning_rate": 1.6614587067397767e-05, "loss": 0.7571, "step": 3815 }, { "epoch": 0.5839326702371844, "grad_norm": 2.1676060389805043, "learning_rate": 1.661272827264817e-05, "loss": 0.6983, "step": 3816 }, { "epoch": 0.5840856924254016, "grad_norm": 2.2039655542368712, "learning_rate": 1.6610869071785055e-05, "loss": 0.7334, "step": 3817 }, { "epoch": 0.584238714613619, "grad_norm": 2.0194472601786146, "learning_rate": 1.6609009464922613e-05, "loss": 0.6393, "step": 3818 }, { "epoch": 0.5843917368018363, "grad_norm": 2.515878768807154, "learning_rate": 1.660714945217504e-05, "loss": 0.7962, "step": 3819 }, { "epoch": 0.5845447589900535, "grad_norm": 2.5345715323872806, "learning_rate": 1.6605289033656575e-05, "loss": 0.7854, "step": 3820 }, { "epoch": 0.5846977811782709, "grad_norm": 2.2418644854452334, "learning_rate": 1.660342820948147e-05, "loss": 0.8298, "step": 3821 }, { "epoch": 0.5848508033664881, "grad_norm": 2.4365303706928603, "learning_rate": 1.6601566979764007e-05, "loss": 0.7993, "step": 3822 }, { "epoch": 0.5850038255547054, "grad_norm": 2.4005844284928415, "learning_rate": 1.659970534461849e-05, "loss": 0.7509, "step": 3823 }, { "epoch": 0.5851568477429228, "grad_norm": 2.406705848873317, "learning_rate": 1.6597843304159248e-05, "loss": 0.7272, "step": 3824 }, { "epoch": 0.58530986993114, "grad_norm": 2.3041140030698615, "learning_rate": 1.659598085850064e-05, "loss": 0.7891, "step": 3825 }, { "epoch": 0.5854628921193573, "grad_norm": 2.379151533908481, "learning_rate": 1.659411800775704e-05, "loss": 0.7599, "step": 3826 }, { "epoch": 0.5856159143075746, "grad_norm": 2.287716081596106, "learning_rate": 1.659225475204286e-05, "loss": 0.8061, "step": 3827 }, { "epoch": 0.5857689364957919, "grad_norm": 2.2414433200668227, "learning_rate": 1.6590391091472526e-05, "loss": 0.7691, "step": 3828 }, { "epoch": 0.5859219586840092, "grad_norm": 2.5086014363960554, "learning_rate": 1.6588527026160493e-05, "loss": 0.8479, "step": 3829 }, { "epoch": 0.5860749808722264, "grad_norm": 2.419448089243427, "learning_rate": 1.658666255622124e-05, "loss": 0.8236, "step": 3830 }, { "epoch": 0.5862280030604438, "grad_norm": 2.3163306355498157, "learning_rate": 1.6584797681769273e-05, "loss": 0.8364, "step": 3831 }, { "epoch": 0.5863810252486611, "grad_norm": 2.1990484963992585, "learning_rate": 1.6582932402919124e-05, "loss": 0.7264, "step": 3832 }, { "epoch": 0.5865340474368783, "grad_norm": 2.257065590223458, "learning_rate": 1.658106671978534e-05, "loss": 0.7227, "step": 3833 }, { "epoch": 0.5866870696250956, "grad_norm": 2.3568110415125805, "learning_rate": 1.6579200632482502e-05, "loss": 0.8643, "step": 3834 }, { "epoch": 0.586840091813313, "grad_norm": 2.2997320714448537, "learning_rate": 1.6577334141125217e-05, "loss": 0.804, "step": 3835 }, { "epoch": 0.5869931140015302, "grad_norm": 2.4492067663950596, "learning_rate": 1.657546724582811e-05, "loss": 0.7471, "step": 3836 }, { "epoch": 0.5871461361897475, "grad_norm": 2.2367454813232346, "learning_rate": 1.657359994670584e-05, "loss": 0.7728, "step": 3837 }, { "epoch": 0.5872991583779648, "grad_norm": 2.387170074669952, "learning_rate": 1.657173224387308e-05, "loss": 0.7294, "step": 3838 }, { "epoch": 0.5874521805661821, "grad_norm": 2.6611783518744367, "learning_rate": 1.6569864137444533e-05, "loss": 0.8531, "step": 3839 }, { "epoch": 0.5876052027543994, "grad_norm": 2.5795969282876507, "learning_rate": 1.6567995627534927e-05, "loss": 0.7192, "step": 3840 }, { "epoch": 0.5877582249426166, "grad_norm": 2.228751239656882, "learning_rate": 1.6566126714259017e-05, "loss": 0.7878, "step": 3841 }, { "epoch": 0.587911247130834, "grad_norm": 2.418610511457032, "learning_rate": 1.6564257397731577e-05, "loss": 0.8162, "step": 3842 }, { "epoch": 0.5880642693190513, "grad_norm": 2.1098189523920357, "learning_rate": 1.656238767806741e-05, "loss": 0.7597, "step": 3843 }, { "epoch": 0.5882172915072685, "grad_norm": 2.4049855489791803, "learning_rate": 1.6560517555381348e-05, "loss": 0.8416, "step": 3844 }, { "epoch": 0.5883703136954859, "grad_norm": 2.1864495709233913, "learning_rate": 1.6558647029788234e-05, "loss": 0.6129, "step": 3845 }, { "epoch": 0.5885233358837031, "grad_norm": 2.571107206508435, "learning_rate": 1.655677610140295e-05, "loss": 0.8715, "step": 3846 }, { "epoch": 0.5886763580719204, "grad_norm": 2.2997919979778016, "learning_rate": 1.6554904770340393e-05, "loss": 0.8151, "step": 3847 }, { "epoch": 0.5888293802601378, "grad_norm": 2.2281125845467495, "learning_rate": 1.6553033036715493e-05, "loss": 0.8202, "step": 3848 }, { "epoch": 0.588982402448355, "grad_norm": 2.1321131423457773, "learning_rate": 1.6551160900643203e-05, "loss": 0.6831, "step": 3849 }, { "epoch": 0.5891354246365723, "grad_norm": 2.527648900196719, "learning_rate": 1.654928836223849e-05, "loss": 0.7904, "step": 3850 }, { "epoch": 0.5892884468247896, "grad_norm": 2.28923012533992, "learning_rate": 1.6547415421616353e-05, "loss": 0.7622, "step": 3851 }, { "epoch": 0.5894414690130069, "grad_norm": 2.7923038554466846, "learning_rate": 1.6545542078891826e-05, "loss": 0.8117, "step": 3852 }, { "epoch": 0.5895944912012242, "grad_norm": 2.4396804219014423, "learning_rate": 1.654366833417995e-05, "loss": 0.8149, "step": 3853 }, { "epoch": 0.5897475133894414, "grad_norm": 2.2650099970410795, "learning_rate": 1.654179418759581e-05, "loss": 0.7232, "step": 3854 }, { "epoch": 0.5899005355776588, "grad_norm": 2.2479396320602865, "learning_rate": 1.6539919639254494e-05, "loss": 0.7672, "step": 3855 }, { "epoch": 0.5900535577658761, "grad_norm": 2.345314819643059, "learning_rate": 1.6538044689271126e-05, "loss": 0.8204, "step": 3856 }, { "epoch": 0.5902065799540933, "grad_norm": 2.0268666678732443, "learning_rate": 1.653616933776086e-05, "loss": 0.7722, "step": 3857 }, { "epoch": 0.5903596021423106, "grad_norm": 2.3791503614863108, "learning_rate": 1.653429358483886e-05, "loss": 0.8463, "step": 3858 }, { "epoch": 0.590512624330528, "grad_norm": 2.342414633790742, "learning_rate": 1.6532417430620337e-05, "loss": 0.77, "step": 3859 }, { "epoch": 0.5906656465187452, "grad_norm": 2.327695041795651, "learning_rate": 1.65305408752205e-05, "loss": 0.8732, "step": 3860 }, { "epoch": 0.5908186687069625, "grad_norm": 2.437108756237025, "learning_rate": 1.6528663918754597e-05, "loss": 0.8092, "step": 3861 }, { "epoch": 0.5909716908951798, "grad_norm": 2.2774300160190046, "learning_rate": 1.652678656133791e-05, "loss": 0.7207, "step": 3862 }, { "epoch": 0.5911247130833971, "grad_norm": 2.4901626265698886, "learning_rate": 1.652490880308572e-05, "loss": 0.8836, "step": 3863 }, { "epoch": 0.5912777352716144, "grad_norm": 2.49244263349783, "learning_rate": 1.6523030644113357e-05, "loss": 0.7631, "step": 3864 }, { "epoch": 0.5914307574598316, "grad_norm": 2.166963403745061, "learning_rate": 1.6521152084536164e-05, "loss": 0.7394, "step": 3865 }, { "epoch": 0.591583779648049, "grad_norm": 2.3942979926831196, "learning_rate": 1.6519273124469512e-05, "loss": 0.8902, "step": 3866 }, { "epoch": 0.5917368018362663, "grad_norm": 2.1880020195653764, "learning_rate": 1.6517393764028793e-05, "loss": 0.7427, "step": 3867 }, { "epoch": 0.5918898240244835, "grad_norm": 2.106975274430694, "learning_rate": 1.651551400332943e-05, "loss": 0.7053, "step": 3868 }, { "epoch": 0.5920428462127009, "grad_norm": 2.377254747191573, "learning_rate": 1.6513633842486858e-05, "loss": 0.8433, "step": 3869 }, { "epoch": 0.5921958684009181, "grad_norm": 2.1159960070343753, "learning_rate": 1.6511753281616552e-05, "loss": 0.6735, "step": 3870 }, { "epoch": 0.5923488905891354, "grad_norm": 1.9438413653670783, "learning_rate": 1.6509872320834003e-05, "loss": 0.6788, "step": 3871 }, { "epoch": 0.5925019127773528, "grad_norm": 2.0955062116382943, "learning_rate": 1.6507990960254728e-05, "loss": 0.7541, "step": 3872 }, { "epoch": 0.59265493496557, "grad_norm": 2.3820312322134187, "learning_rate": 1.650610919999427e-05, "loss": 0.8307, "step": 3873 }, { "epoch": 0.5928079571537873, "grad_norm": 2.2354983100993446, "learning_rate": 1.6504227040168194e-05, "loss": 0.772, "step": 3874 }, { "epoch": 0.5929609793420046, "grad_norm": 2.2484229287109625, "learning_rate": 1.650234448089209e-05, "loss": 0.8463, "step": 3875 }, { "epoch": 0.5931140015302219, "grad_norm": 2.1798032783600783, "learning_rate": 1.6500461522281575e-05, "loss": 0.6458, "step": 3876 }, { "epoch": 0.5932670237184392, "grad_norm": 2.2753556732841402, "learning_rate": 1.6498578164452285e-05, "loss": 0.7703, "step": 3877 }, { "epoch": 0.5934200459066564, "grad_norm": 2.4188609922833737, "learning_rate": 1.6496694407519888e-05, "loss": 0.8333, "step": 3878 }, { "epoch": 0.5935730680948738, "grad_norm": 2.3421062788752307, "learning_rate": 1.6494810251600075e-05, "loss": 0.7059, "step": 3879 }, { "epoch": 0.5937260902830911, "grad_norm": 2.1227249655673717, "learning_rate": 1.6492925696808555e-05, "loss": 0.8143, "step": 3880 }, { "epoch": 0.5938791124713083, "grad_norm": 2.308489799932691, "learning_rate": 1.6491040743261065e-05, "loss": 0.8233, "step": 3881 }, { "epoch": 0.5940321346595256, "grad_norm": 2.4998079895811687, "learning_rate": 1.6489155391073375e-05, "loss": 0.7824, "step": 3882 }, { "epoch": 0.5941851568477429, "grad_norm": 2.135136759805042, "learning_rate": 1.6487269640361264e-05, "loss": 0.789, "step": 3883 }, { "epoch": 0.5943381790359602, "grad_norm": 2.0920550383410474, "learning_rate": 1.6485383491240546e-05, "loss": 0.743, "step": 3884 }, { "epoch": 0.5944912012241775, "grad_norm": 2.355458998837855, "learning_rate": 1.6483496943827056e-05, "loss": 0.728, "step": 3885 }, { "epoch": 0.5946442234123948, "grad_norm": 2.1107113168909004, "learning_rate": 1.6481609998236656e-05, "loss": 0.6521, "step": 3886 }, { "epoch": 0.5947972456006121, "grad_norm": 2.116378932065588, "learning_rate": 1.647972265458523e-05, "loss": 0.765, "step": 3887 }, { "epoch": 0.5949502677888294, "grad_norm": 2.195681235916977, "learning_rate": 1.647783491298869e-05, "loss": 0.7999, "step": 3888 }, { "epoch": 0.5951032899770466, "grad_norm": 2.65848496667216, "learning_rate": 1.647594677356296e-05, "loss": 0.7752, "step": 3889 }, { "epoch": 0.595256312165264, "grad_norm": 2.3804867928581412, "learning_rate": 1.647405823642401e-05, "loss": 0.7683, "step": 3890 }, { "epoch": 0.5954093343534812, "grad_norm": 2.2256140777105893, "learning_rate": 1.6472169301687816e-05, "loss": 0.6946, "step": 3891 }, { "epoch": 0.5955623565416985, "grad_norm": 2.259429617574836, "learning_rate": 1.6470279969470384e-05, "loss": 0.8007, "step": 3892 }, { "epoch": 0.5957153787299159, "grad_norm": 2.4244045113345667, "learning_rate": 1.646839023988775e-05, "loss": 0.8071, "step": 3893 }, { "epoch": 0.5958684009181331, "grad_norm": 2.21180016617028, "learning_rate": 1.6466500113055963e-05, "loss": 0.6792, "step": 3894 }, { "epoch": 0.5960214231063504, "grad_norm": 2.1941067525426727, "learning_rate": 1.646460958909111e-05, "loss": 0.8207, "step": 3895 }, { "epoch": 0.5961744452945678, "grad_norm": 2.1232749753013667, "learning_rate": 1.646271866810929e-05, "loss": 0.7, "step": 3896 }, { "epoch": 0.596327467482785, "grad_norm": 2.190229920133476, "learning_rate": 1.6460827350226637e-05, "loss": 0.6335, "step": 3897 }, { "epoch": 0.5964804896710023, "grad_norm": 2.2639688761323944, "learning_rate": 1.64589356355593e-05, "loss": 0.7466, "step": 3898 }, { "epoch": 0.5966335118592195, "grad_norm": 2.460362228450079, "learning_rate": 1.645704352422346e-05, "loss": 0.8105, "step": 3899 }, { "epoch": 0.5967865340474369, "grad_norm": 2.2461116435022377, "learning_rate": 1.6455151016335312e-05, "loss": 0.697, "step": 3900 }, { "epoch": 0.5969395562356542, "grad_norm": 2.3594462673289947, "learning_rate": 1.6453258112011094e-05, "loss": 0.7736, "step": 3901 }, { "epoch": 0.5970925784238714, "grad_norm": 2.106693165284362, "learning_rate": 1.6451364811367044e-05, "loss": 0.5927, "step": 3902 }, { "epoch": 0.5972456006120888, "grad_norm": 2.247234785821779, "learning_rate": 1.6449471114519447e-05, "loss": 0.7286, "step": 3903 }, { "epoch": 0.5973986228003061, "grad_norm": 2.0774276621075454, "learning_rate": 1.6447577021584597e-05, "loss": 0.6991, "step": 3904 }, { "epoch": 0.5975516449885233, "grad_norm": 2.430421209143389, "learning_rate": 1.644568253267882e-05, "loss": 0.7457, "step": 3905 }, { "epoch": 0.5977046671767406, "grad_norm": 2.330147887370278, "learning_rate": 1.6443787647918464e-05, "loss": 0.7581, "step": 3906 }, { "epoch": 0.5978576893649579, "grad_norm": 2.0030331146685665, "learning_rate": 1.6441892367419895e-05, "loss": 0.5928, "step": 3907 }, { "epoch": 0.5980107115531752, "grad_norm": 2.3916619016698166, "learning_rate": 1.643999669129952e-05, "loss": 0.781, "step": 3908 }, { "epoch": 0.5981637337413925, "grad_norm": 2.399757751453464, "learning_rate": 1.6438100619673757e-05, "loss": 0.7236, "step": 3909 }, { "epoch": 0.5983167559296098, "grad_norm": 2.5738835936498603, "learning_rate": 1.6436204152659042e-05, "loss": 0.7948, "step": 3910 }, { "epoch": 0.5984697781178271, "grad_norm": 2.3044877765883913, "learning_rate": 1.6434307290371855e-05, "loss": 0.6954, "step": 3911 }, { "epoch": 0.5986228003060444, "grad_norm": 2.2008661487646046, "learning_rate": 1.6432410032928686e-05, "loss": 0.7159, "step": 3912 }, { "epoch": 0.5987758224942616, "grad_norm": 2.3895382666991876, "learning_rate": 1.6430512380446052e-05, "loss": 0.8146, "step": 3913 }, { "epoch": 0.598928844682479, "grad_norm": 2.065309607267929, "learning_rate": 1.64286143330405e-05, "loss": 0.8357, "step": 3914 }, { "epoch": 0.5990818668706962, "grad_norm": 2.3359874366462914, "learning_rate": 1.642671589082859e-05, "loss": 0.7237, "step": 3915 }, { "epoch": 0.5992348890589135, "grad_norm": 2.4962956403966934, "learning_rate": 1.6424817053926917e-05, "loss": 0.8363, "step": 3916 }, { "epoch": 0.5993879112471309, "grad_norm": 2.468183246991395, "learning_rate": 1.6422917822452093e-05, "loss": 0.8228, "step": 3917 }, { "epoch": 0.5995409334353481, "grad_norm": 2.5397453332829016, "learning_rate": 1.6421018196520758e-05, "loss": 0.9307, "step": 3918 }, { "epoch": 0.5996939556235654, "grad_norm": 2.225012213725225, "learning_rate": 1.641911817624958e-05, "loss": 0.76, "step": 3919 }, { "epoch": 0.5998469778117828, "grad_norm": 2.3966394488811016, "learning_rate": 1.641721776175524e-05, "loss": 0.7146, "step": 3920 }, { "epoch": 0.6, "grad_norm": 2.303920410231337, "learning_rate": 1.6415316953154455e-05, "loss": 0.8073, "step": 3921 }, { "epoch": 0.6001530221882173, "grad_norm": 2.211302259446293, "learning_rate": 1.6413415750563957e-05, "loss": 0.7277, "step": 3922 }, { "epoch": 0.6003060443764345, "grad_norm": 2.2345826594193166, "learning_rate": 1.6411514154100513e-05, "loss": 0.6917, "step": 3923 }, { "epoch": 0.6004590665646519, "grad_norm": 2.201839243236848, "learning_rate": 1.6409612163880898e-05, "loss": 0.704, "step": 3924 }, { "epoch": 0.6006120887528692, "grad_norm": 2.2928406532733194, "learning_rate": 1.6407709780021925e-05, "loss": 0.754, "step": 3925 }, { "epoch": 0.6007651109410864, "grad_norm": 2.3920891472308736, "learning_rate": 1.640580700264043e-05, "loss": 0.8704, "step": 3926 }, { "epoch": 0.6009181331293038, "grad_norm": 2.1292889428240294, "learning_rate": 1.6403903831853265e-05, "loss": 0.7045, "step": 3927 }, { "epoch": 0.6010711553175211, "grad_norm": 2.379304273256747, "learning_rate": 1.640200026777732e-05, "loss": 0.8551, "step": 3928 }, { "epoch": 0.6012241775057383, "grad_norm": 2.282693014805505, "learning_rate": 1.6400096310529487e-05, "loss": 0.7696, "step": 3929 }, { "epoch": 0.6013771996939556, "grad_norm": 2.206776247521118, "learning_rate": 1.63981919602267e-05, "loss": 0.6743, "step": 3930 }, { "epoch": 0.6015302218821729, "grad_norm": 2.608385419308765, "learning_rate": 1.639628721698592e-05, "loss": 0.8967, "step": 3931 }, { "epoch": 0.6016832440703902, "grad_norm": 2.455187279779116, "learning_rate": 1.639438208092412e-05, "loss": 0.7461, "step": 3932 }, { "epoch": 0.6018362662586075, "grad_norm": 2.2605743335848274, "learning_rate": 1.63924765521583e-05, "loss": 0.7199, "step": 3933 }, { "epoch": 0.6019892884468248, "grad_norm": 2.445481051817927, "learning_rate": 1.6390570630805487e-05, "loss": 0.767, "step": 3934 }, { "epoch": 0.6021423106350421, "grad_norm": 2.583444641012235, "learning_rate": 1.638866431698273e-05, "loss": 0.8196, "step": 3935 }, { "epoch": 0.6022953328232594, "grad_norm": 2.5086801152333646, "learning_rate": 1.6386757610807106e-05, "loss": 0.816, "step": 3936 }, { "epoch": 0.6024483550114766, "grad_norm": 2.3705704487061348, "learning_rate": 1.6384850512395715e-05, "loss": 0.7676, "step": 3937 }, { "epoch": 0.602601377199694, "grad_norm": 2.1626859005056236, "learning_rate": 1.638294302186567e-05, "loss": 0.7177, "step": 3938 }, { "epoch": 0.6027543993879112, "grad_norm": 2.46703763158888, "learning_rate": 1.6381035139334128e-05, "loss": 0.7297, "step": 3939 }, { "epoch": 0.6029074215761285, "grad_norm": 2.5716263838061666, "learning_rate": 1.6379126864918256e-05, "loss": 0.9171, "step": 3940 }, { "epoch": 0.6030604437643459, "grad_norm": 2.193123379792748, "learning_rate": 1.6377218198735246e-05, "loss": 0.7157, "step": 3941 }, { "epoch": 0.6032134659525631, "grad_norm": 2.3421215315975807, "learning_rate": 1.637530914090232e-05, "loss": 0.748, "step": 3942 }, { "epoch": 0.6033664881407804, "grad_norm": 2.20680704151605, "learning_rate": 1.637339969153672e-05, "loss": 0.7783, "step": 3943 }, { "epoch": 0.6035195103289978, "grad_norm": 2.5368724067005823, "learning_rate": 1.6371489850755712e-05, "loss": 0.7137, "step": 3944 }, { "epoch": 0.603672532517215, "grad_norm": 2.347964186524102, "learning_rate": 1.6369579618676584e-05, "loss": 0.7715, "step": 3945 }, { "epoch": 0.6038255547054323, "grad_norm": 2.4869195064086487, "learning_rate": 1.636766899541666e-05, "loss": 0.8303, "step": 3946 }, { "epoch": 0.6039785768936495, "grad_norm": 2.2848097041733326, "learning_rate": 1.6365757981093266e-05, "loss": 0.8186, "step": 3947 }, { "epoch": 0.6041315990818669, "grad_norm": 2.4440133764573035, "learning_rate": 1.6363846575823772e-05, "loss": 0.8781, "step": 3948 }, { "epoch": 0.6042846212700842, "grad_norm": 2.527286580207838, "learning_rate": 1.6361934779725564e-05, "loss": 0.7026, "step": 3949 }, { "epoch": 0.6044376434583014, "grad_norm": 2.6768839444382713, "learning_rate": 1.6360022592916056e-05, "loss": 0.7902, "step": 3950 }, { "epoch": 0.6045906656465188, "grad_norm": 2.563244839367599, "learning_rate": 1.635811001551268e-05, "loss": 0.7985, "step": 3951 }, { "epoch": 0.6047436878347361, "grad_norm": 2.435160345656961, "learning_rate": 1.6356197047632894e-05, "loss": 0.8541, "step": 3952 }, { "epoch": 0.6048967100229533, "grad_norm": 2.660440652538607, "learning_rate": 1.635428368939418e-05, "loss": 0.7915, "step": 3953 }, { "epoch": 0.6050497322111706, "grad_norm": 2.302355575360249, "learning_rate": 1.635236994091405e-05, "loss": 0.7916, "step": 3954 }, { "epoch": 0.6052027543993879, "grad_norm": 2.5678687348149127, "learning_rate": 1.6350455802310027e-05, "loss": 0.8768, "step": 3955 }, { "epoch": 0.6053557765876052, "grad_norm": 2.4133307260880015, "learning_rate": 1.6348541273699672e-05, "loss": 0.8734, "step": 3956 }, { "epoch": 0.6055087987758225, "grad_norm": 2.1442604005186716, "learning_rate": 1.6346626355200564e-05, "loss": 0.8057, "step": 3957 }, { "epoch": 0.6056618209640398, "grad_norm": 2.2777122530462344, "learning_rate": 1.63447110469303e-05, "loss": 0.6944, "step": 3958 }, { "epoch": 0.6058148431522571, "grad_norm": 2.2702458426898118, "learning_rate": 1.6342795349006514e-05, "loss": 0.7787, "step": 3959 }, { "epoch": 0.6059678653404744, "grad_norm": 2.4138860942835665, "learning_rate": 1.6340879261546848e-05, "loss": 0.7319, "step": 3960 }, { "epoch": 0.6061208875286916, "grad_norm": 2.406986355064948, "learning_rate": 1.6338962784668984e-05, "loss": 0.8305, "step": 3961 }, { "epoch": 0.606273909716909, "grad_norm": 2.1320662468950053, "learning_rate": 1.633704591849061e-05, "loss": 0.7107, "step": 3962 }, { "epoch": 0.6064269319051262, "grad_norm": 2.170118161412104, "learning_rate": 1.6335128663129466e-05, "loss": 0.6988, "step": 3963 }, { "epoch": 0.6065799540933435, "grad_norm": 2.288231162241274, "learning_rate": 1.6333211018703282e-05, "loss": 0.7682, "step": 3964 }, { "epoch": 0.6067329762815609, "grad_norm": 2.0736456947567863, "learning_rate": 1.6331292985329835e-05, "loss": 0.7372, "step": 3965 }, { "epoch": 0.6068859984697781, "grad_norm": 2.107363700837136, "learning_rate": 1.632937456312692e-05, "loss": 0.7282, "step": 3966 }, { "epoch": 0.6070390206579954, "grad_norm": 2.0881937408877524, "learning_rate": 1.6327455752212348e-05, "loss": 0.7685, "step": 3967 }, { "epoch": 0.6071920428462128, "grad_norm": 2.1645877937641425, "learning_rate": 1.6325536552703963e-05, "loss": 0.834, "step": 3968 }, { "epoch": 0.60734506503443, "grad_norm": 2.377608992372084, "learning_rate": 1.6323616964719642e-05, "loss": 0.7594, "step": 3969 }, { "epoch": 0.6074980872226473, "grad_norm": 2.028436733602878, "learning_rate": 1.632169698837726e-05, "loss": 0.7597, "step": 3970 }, { "epoch": 0.6076511094108645, "grad_norm": 2.113181958173014, "learning_rate": 1.631977662379473e-05, "loss": 0.8032, "step": 3971 }, { "epoch": 0.6078041315990819, "grad_norm": 2.5583012059184402, "learning_rate": 1.6317855871090003e-05, "loss": 0.7125, "step": 3972 }, { "epoch": 0.6079571537872992, "grad_norm": 2.3758675047618008, "learning_rate": 1.6315934730381027e-05, "loss": 0.81, "step": 3973 }, { "epoch": 0.6081101759755164, "grad_norm": 2.2335382453442225, "learning_rate": 1.631401320178579e-05, "loss": 0.7042, "step": 3974 }, { "epoch": 0.6082631981637338, "grad_norm": 2.388399327130747, "learning_rate": 1.6312091285422305e-05, "loss": 0.8333, "step": 3975 }, { "epoch": 0.6084162203519511, "grad_norm": 2.545928594962447, "learning_rate": 1.63101689814086e-05, "loss": 0.8372, "step": 3976 }, { "epoch": 0.6085692425401683, "grad_norm": 2.0449415964423037, "learning_rate": 1.630824628986273e-05, "loss": 0.7397, "step": 3977 }, { "epoch": 0.6087222647283856, "grad_norm": 2.567039032923848, "learning_rate": 1.6306323210902784e-05, "loss": 0.7466, "step": 3978 }, { "epoch": 0.6088752869166029, "grad_norm": 2.4160435851987905, "learning_rate": 1.6304399744646854e-05, "loss": 0.7566, "step": 3979 }, { "epoch": 0.6090283091048202, "grad_norm": 2.536239714103674, "learning_rate": 1.630247589121307e-05, "loss": 0.9314, "step": 3980 }, { "epoch": 0.6091813312930375, "grad_norm": 2.278829958141022, "learning_rate": 1.630055165071959e-05, "loss": 0.7122, "step": 3981 }, { "epoch": 0.6093343534812548, "grad_norm": 2.387173473026114, "learning_rate": 1.6298627023284584e-05, "loss": 0.7642, "step": 3982 }, { "epoch": 0.6094873756694721, "grad_norm": 2.3274832439959856, "learning_rate": 1.6296702009026256e-05, "loss": 0.7805, "step": 3983 }, { "epoch": 0.6096403978576894, "grad_norm": 2.413713126301562, "learning_rate": 1.6294776608062818e-05, "loss": 0.8906, "step": 3984 }, { "epoch": 0.6097934200459066, "grad_norm": 2.2657003983898134, "learning_rate": 1.629285082051253e-05, "loss": 0.759, "step": 3985 }, { "epoch": 0.609946442234124, "grad_norm": 2.059377996694824, "learning_rate": 1.6290924646493654e-05, "loss": 0.6946, "step": 3986 }, { "epoch": 0.6100994644223412, "grad_norm": 2.240469903234145, "learning_rate": 1.6288998086124478e-05, "loss": 0.7731, "step": 3987 }, { "epoch": 0.6102524866105585, "grad_norm": 2.1045040115900506, "learning_rate": 1.6287071139523334e-05, "loss": 0.7427, "step": 3988 }, { "epoch": 0.6104055087987759, "grad_norm": 2.2817203881792567, "learning_rate": 1.6285143806808554e-05, "loss": 0.7773, "step": 3989 }, { "epoch": 0.6105585309869931, "grad_norm": 2.445802561201833, "learning_rate": 1.628321608809851e-05, "loss": 0.838, "step": 3990 }, { "epoch": 0.6107115531752104, "grad_norm": 2.5828642223393214, "learning_rate": 1.628128798351158e-05, "loss": 0.6483, "step": 3991 }, { "epoch": 0.6108645753634276, "grad_norm": 2.4672233905559477, "learning_rate": 1.6279359493166183e-05, "loss": 0.8611, "step": 3992 }, { "epoch": 0.611017597551645, "grad_norm": 2.219671457282571, "learning_rate": 1.6277430617180755e-05, "loss": 0.7172, "step": 3993 }, { "epoch": 0.6111706197398623, "grad_norm": 2.4789990648031153, "learning_rate": 1.6275501355673756e-05, "loss": 0.8063, "step": 3994 }, { "epoch": 0.6113236419280795, "grad_norm": 2.3551450398894467, "learning_rate": 1.6273571708763665e-05, "loss": 0.7361, "step": 3995 }, { "epoch": 0.6114766641162969, "grad_norm": 2.529516672420973, "learning_rate": 1.6271641676569e-05, "loss": 0.8252, "step": 3996 }, { "epoch": 0.6116296863045142, "grad_norm": 2.1194774939225143, "learning_rate": 1.626971125920828e-05, "loss": 0.7994, "step": 3997 }, { "epoch": 0.6117827084927314, "grad_norm": 2.130797951705835, "learning_rate": 1.6267780456800066e-05, "loss": 0.7897, "step": 3998 }, { "epoch": 0.6119357306809488, "grad_norm": 2.3216160730004636, "learning_rate": 1.6265849269462936e-05, "loss": 0.8488, "step": 3999 }, { "epoch": 0.612088752869166, "grad_norm": 2.7626139147850224, "learning_rate": 1.6263917697315488e-05, "loss": 0.7936, "step": 4000 }, { "epoch": 0.6122417750573833, "grad_norm": 2.082553450963893, "learning_rate": 1.6261985740476348e-05, "loss": 0.7721, "step": 4001 }, { "epoch": 0.6123947972456006, "grad_norm": 2.3936912056745965, "learning_rate": 1.626005339906417e-05, "loss": 0.8584, "step": 4002 }, { "epoch": 0.6125478194338179, "grad_norm": 2.2087315724400036, "learning_rate": 1.6258120673197623e-05, "loss": 0.7804, "step": 4003 }, { "epoch": 0.6127008416220352, "grad_norm": 2.354583524321165, "learning_rate": 1.6256187562995403e-05, "loss": 0.7383, "step": 4004 }, { "epoch": 0.6128538638102525, "grad_norm": 2.098041581723552, "learning_rate": 1.6254254068576227e-05, "loss": 0.7599, "step": 4005 }, { "epoch": 0.6130068859984698, "grad_norm": 2.0327806604154754, "learning_rate": 1.6252320190058847e-05, "loss": 0.7705, "step": 4006 }, { "epoch": 0.6131599081866871, "grad_norm": 2.496718004055835, "learning_rate": 1.625038592756202e-05, "loss": 0.7543, "step": 4007 }, { "epoch": 0.6133129303749043, "grad_norm": 2.4936165790493554, "learning_rate": 1.6248451281204545e-05, "loss": 0.8754, "step": 4008 }, { "epoch": 0.6134659525631216, "grad_norm": 2.2205096614417075, "learning_rate": 1.624651625110523e-05, "loss": 0.7938, "step": 4009 }, { "epoch": 0.613618974751339, "grad_norm": 2.1301811356198046, "learning_rate": 1.624458083738292e-05, "loss": 0.7472, "step": 4010 }, { "epoch": 0.6137719969395562, "grad_norm": 2.3619766841954326, "learning_rate": 1.624264504015647e-05, "loss": 0.7185, "step": 4011 }, { "epoch": 0.6139250191277735, "grad_norm": 2.6369274563223524, "learning_rate": 1.6240708859544766e-05, "loss": 0.8762, "step": 4012 }, { "epoch": 0.6140780413159909, "grad_norm": 2.3391139454523877, "learning_rate": 1.6238772295666718e-05, "loss": 0.8134, "step": 4013 }, { "epoch": 0.6142310635042081, "grad_norm": 2.472559033054485, "learning_rate": 1.6236835348641254e-05, "loss": 0.8442, "step": 4014 }, { "epoch": 0.6143840856924254, "grad_norm": 2.30552269826598, "learning_rate": 1.6234898018587336e-05, "loss": 0.7564, "step": 4015 }, { "epoch": 0.6145371078806426, "grad_norm": 2.2725183178366644, "learning_rate": 1.623296030562394e-05, "loss": 0.8004, "step": 4016 }, { "epoch": 0.61469013006886, "grad_norm": 2.1530496291189736, "learning_rate": 1.6231022209870063e-05, "loss": 0.7653, "step": 4017 }, { "epoch": 0.6148431522570773, "grad_norm": 2.1620242897683797, "learning_rate": 1.6229083731444745e-05, "loss": 0.7129, "step": 4018 }, { "epoch": 0.6149961744452945, "grad_norm": 2.25085442826096, "learning_rate": 1.622714487046702e-05, "loss": 0.7554, "step": 4019 }, { "epoch": 0.6151491966335119, "grad_norm": 2.169103357595413, "learning_rate": 1.622520562705597e-05, "loss": 0.7607, "step": 4020 }, { "epoch": 0.6153022188217292, "grad_norm": 2.3292061531576955, "learning_rate": 1.622326600133069e-05, "loss": 0.8104, "step": 4021 }, { "epoch": 0.6154552410099464, "grad_norm": 2.170753862893607, "learning_rate": 1.6221325993410295e-05, "loss": 0.745, "step": 4022 }, { "epoch": 0.6156082631981638, "grad_norm": 2.4632365927830207, "learning_rate": 1.6219385603413937e-05, "loss": 0.68, "step": 4023 }, { "epoch": 0.615761285386381, "grad_norm": 2.085184298715942, "learning_rate": 1.6217444831460777e-05, "loss": 0.7114, "step": 4024 }, { "epoch": 0.6159143075745983, "grad_norm": 2.7017932076046374, "learning_rate": 1.621550367767001e-05, "loss": 0.8658, "step": 4025 }, { "epoch": 0.6160673297628156, "grad_norm": 2.66819564271173, "learning_rate": 1.6213562142160842e-05, "loss": 0.8669, "step": 4026 }, { "epoch": 0.6162203519510329, "grad_norm": 2.137016223484698, "learning_rate": 1.621162022505252e-05, "loss": 0.7017, "step": 4027 }, { "epoch": 0.6163733741392502, "grad_norm": 2.3297061769686174, "learning_rate": 1.6209677926464297e-05, "loss": 0.7102, "step": 4028 }, { "epoch": 0.6165263963274675, "grad_norm": 2.2418631040587287, "learning_rate": 1.620773524651546e-05, "loss": 0.8347, "step": 4029 }, { "epoch": 0.6166794185156848, "grad_norm": 2.2617571464792423, "learning_rate": 1.6205792185325318e-05, "loss": 0.8687, "step": 4030 }, { "epoch": 0.6168324407039021, "grad_norm": 2.5724992577181705, "learning_rate": 1.6203848743013202e-05, "loss": 0.724, "step": 4031 }, { "epoch": 0.6169854628921193, "grad_norm": 2.227267112590208, "learning_rate": 1.6201904919698463e-05, "loss": 0.6963, "step": 4032 }, { "epoch": 0.6171384850803366, "grad_norm": 2.172006678534527, "learning_rate": 1.6199960715500482e-05, "loss": 0.7051, "step": 4033 }, { "epoch": 0.617291507268554, "grad_norm": 2.1373746947180012, "learning_rate": 1.619801613053866e-05, "loss": 0.7327, "step": 4034 }, { "epoch": 0.6174445294567712, "grad_norm": 2.11476592544764, "learning_rate": 1.6196071164932415e-05, "loss": 0.7032, "step": 4035 }, { "epoch": 0.6175975516449885, "grad_norm": 2.300625622423079, "learning_rate": 1.6194125818801207e-05, "loss": 0.7691, "step": 4036 }, { "epoch": 0.6177505738332059, "grad_norm": 2.428594165574761, "learning_rate": 1.61921800922645e-05, "loss": 0.723, "step": 4037 }, { "epoch": 0.6179035960214231, "grad_norm": 2.169473308137043, "learning_rate": 1.6190233985441786e-05, "loss": 0.8408, "step": 4038 }, { "epoch": 0.6180566182096404, "grad_norm": 2.5360559820737376, "learning_rate": 1.618828749845259e-05, "loss": 0.8811, "step": 4039 }, { "epoch": 0.6182096403978576, "grad_norm": 2.39178836667541, "learning_rate": 1.6186340631416452e-05, "loss": 0.788, "step": 4040 }, { "epoch": 0.618362662586075, "grad_norm": 2.3918450316007376, "learning_rate": 1.6184393384452928e-05, "loss": 0.7039, "step": 4041 }, { "epoch": 0.6185156847742923, "grad_norm": 2.338128319850209, "learning_rate": 1.6182445757681616e-05, "loss": 0.8499, "step": 4042 }, { "epoch": 0.6186687069625095, "grad_norm": 2.3173269279379136, "learning_rate": 1.6180497751222127e-05, "loss": 0.7303, "step": 4043 }, { "epoch": 0.6188217291507269, "grad_norm": 2.438141149323398, "learning_rate": 1.6178549365194086e-05, "loss": 0.8503, "step": 4044 }, { "epoch": 0.6189747513389442, "grad_norm": 2.2321932250097354, "learning_rate": 1.6176600599717165e-05, "loss": 0.7228, "step": 4045 }, { "epoch": 0.6191277735271614, "grad_norm": 2.3226574409768825, "learning_rate": 1.6174651454911034e-05, "loss": 0.7527, "step": 4046 }, { "epoch": 0.6192807957153788, "grad_norm": 2.0741031426015915, "learning_rate": 1.6172701930895404e-05, "loss": 0.7485, "step": 4047 }, { "epoch": 0.619433817903596, "grad_norm": 2.4837597123114494, "learning_rate": 1.6170752027790002e-05, "loss": 0.7971, "step": 4048 }, { "epoch": 0.6195868400918133, "grad_norm": 2.349665609451401, "learning_rate": 1.6168801745714576e-05, "loss": 0.7738, "step": 4049 }, { "epoch": 0.6197398622800306, "grad_norm": 2.3536660431739045, "learning_rate": 1.6166851084788898e-05, "loss": 0.7474, "step": 4050 }, { "epoch": 0.6198928844682479, "grad_norm": 2.516752609598269, "learning_rate": 1.6164900045132777e-05, "loss": 0.7539, "step": 4051 }, { "epoch": 0.6200459066564652, "grad_norm": 2.5523705841883824, "learning_rate": 1.6162948626866023e-05, "loss": 0.8509, "step": 4052 }, { "epoch": 0.6201989288446825, "grad_norm": 2.2026496418341495, "learning_rate": 1.6160996830108486e-05, "loss": 0.8403, "step": 4053 }, { "epoch": 0.6203519510328998, "grad_norm": 2.017536909380819, "learning_rate": 1.615904465498003e-05, "loss": 0.6502, "step": 4054 }, { "epoch": 0.6205049732211171, "grad_norm": 2.174952113668595, "learning_rate": 1.6157092101600548e-05, "loss": 0.7717, "step": 4055 }, { "epoch": 0.6206579954093343, "grad_norm": 1.8763628970458017, "learning_rate": 1.6155139170089956e-05, "loss": 0.757, "step": 4056 }, { "epoch": 0.6208110175975516, "grad_norm": 2.322317658519997, "learning_rate": 1.6153185860568187e-05, "loss": 0.7672, "step": 4057 }, { "epoch": 0.620964039785769, "grad_norm": 2.3623718327673764, "learning_rate": 1.61512321731552e-05, "loss": 0.7172, "step": 4058 }, { "epoch": 0.6211170619739862, "grad_norm": 2.1276117730331805, "learning_rate": 1.6149278107970983e-05, "loss": 0.7645, "step": 4059 }, { "epoch": 0.6212700841622035, "grad_norm": 2.564749256711884, "learning_rate": 1.6147323665135542e-05, "loss": 0.7936, "step": 4060 }, { "epoch": 0.6214231063504209, "grad_norm": 2.413120918684202, "learning_rate": 1.6145368844768908e-05, "loss": 0.6705, "step": 4061 }, { "epoch": 0.6215761285386381, "grad_norm": 2.445889193129111, "learning_rate": 1.6143413646991132e-05, "loss": 0.8038, "step": 4062 }, { "epoch": 0.6217291507268554, "grad_norm": 2.117825343139216, "learning_rate": 1.6141458071922285e-05, "loss": 0.7411, "step": 4063 }, { "epoch": 0.6218821729150726, "grad_norm": 2.344357755958016, "learning_rate": 1.6139502119682478e-05, "loss": 0.855, "step": 4064 }, { "epoch": 0.62203519510329, "grad_norm": 2.275018128282366, "learning_rate": 1.613754579039183e-05, "loss": 0.7594, "step": 4065 }, { "epoch": 0.6221882172915073, "grad_norm": 2.123207758811535, "learning_rate": 1.613558908417048e-05, "loss": 0.831, "step": 4066 }, { "epoch": 0.6223412394797245, "grad_norm": 2.318206412634962, "learning_rate": 1.6133632001138604e-05, "loss": 0.7474, "step": 4067 }, { "epoch": 0.6224942616679419, "grad_norm": 2.3643768786552535, "learning_rate": 1.613167454141639e-05, "loss": 0.728, "step": 4068 }, { "epoch": 0.6226472838561592, "grad_norm": 2.1021830426618084, "learning_rate": 1.6129716705124055e-05, "loss": 0.7078, "step": 4069 }, { "epoch": 0.6228003060443764, "grad_norm": 2.1384768150213858, "learning_rate": 1.612775849238184e-05, "loss": 0.7496, "step": 4070 }, { "epoch": 0.6229533282325938, "grad_norm": 2.0607954889554434, "learning_rate": 1.612579990331e-05, "loss": 0.6618, "step": 4071 }, { "epoch": 0.623106350420811, "grad_norm": 2.370009379278904, "learning_rate": 1.612384093802883e-05, "loss": 0.8368, "step": 4072 }, { "epoch": 0.6232593726090283, "grad_norm": 2.0723916821807427, "learning_rate": 1.612188159665863e-05, "loss": 0.6796, "step": 4073 }, { "epoch": 0.6234123947972456, "grad_norm": 2.7584617297969425, "learning_rate": 1.6119921879319728e-05, "loss": 0.8882, "step": 4074 }, { "epoch": 0.6235654169854629, "grad_norm": 2.436001429058616, "learning_rate": 1.6117961786132485e-05, "loss": 0.7565, "step": 4075 }, { "epoch": 0.6237184391736802, "grad_norm": 2.4908275513686626, "learning_rate": 1.6116001317217277e-05, "loss": 0.8403, "step": 4076 }, { "epoch": 0.6238714613618975, "grad_norm": 2.1720916230556515, "learning_rate": 1.6114040472694496e-05, "loss": 0.7549, "step": 4077 }, { "epoch": 0.6240244835501148, "grad_norm": 2.1638352336847517, "learning_rate": 1.611207925268458e-05, "loss": 0.6232, "step": 4078 }, { "epoch": 0.6241775057383321, "grad_norm": 2.254748294563211, "learning_rate": 1.611011765730796e-05, "loss": 0.8494, "step": 4079 }, { "epoch": 0.6243305279265493, "grad_norm": 2.051169583162102, "learning_rate": 1.610815568668511e-05, "loss": 0.7465, "step": 4080 }, { "epoch": 0.6244835501147666, "grad_norm": 2.480860331299139, "learning_rate": 1.610619334093653e-05, "loss": 0.9505, "step": 4081 }, { "epoch": 0.624636572302984, "grad_norm": 2.581081394347132, "learning_rate": 1.6104230620182724e-05, "loss": 0.8689, "step": 4082 }, { "epoch": 0.6247895944912012, "grad_norm": 2.4647455923728416, "learning_rate": 1.610226752454424e-05, "loss": 0.7986, "step": 4083 }, { "epoch": 0.6249426166794185, "grad_norm": 2.3084776587149376, "learning_rate": 1.6100304054141634e-05, "loss": 0.8543, "step": 4084 }, { "epoch": 0.6250956388676359, "grad_norm": 2.278351087731257, "learning_rate": 1.609834020909549e-05, "loss": 0.8122, "step": 4085 }, { "epoch": 0.6252486610558531, "grad_norm": 2.276115015814235, "learning_rate": 1.609637598952642e-05, "loss": 0.647, "step": 4086 }, { "epoch": 0.6254016832440704, "grad_norm": 2.4313627499110235, "learning_rate": 1.609441139555505e-05, "loss": 0.7891, "step": 4087 }, { "epoch": 0.6255547054322876, "grad_norm": 2.4151414352242297, "learning_rate": 1.6092446427302028e-05, "loss": 0.7811, "step": 4088 }, { "epoch": 0.625707727620505, "grad_norm": 2.1618931472619822, "learning_rate": 1.609048108488805e-05, "loss": 0.8202, "step": 4089 }, { "epoch": 0.6258607498087223, "grad_norm": 2.2892118398805485, "learning_rate": 1.6088515368433794e-05, "loss": 0.7341, "step": 4090 }, { "epoch": 0.6260137719969395, "grad_norm": 2.319986784666118, "learning_rate": 1.6086549278059988e-05, "loss": 0.7378, "step": 4091 }, { "epoch": 0.6261667941851569, "grad_norm": 2.388332513852767, "learning_rate": 1.6084582813887385e-05, "loss": 0.8177, "step": 4092 }, { "epoch": 0.6263198163733742, "grad_norm": 2.4953449296501113, "learning_rate": 1.608261597603675e-05, "loss": 0.7417, "step": 4093 }, { "epoch": 0.6264728385615914, "grad_norm": 2.2534640175752516, "learning_rate": 1.608064876462887e-05, "loss": 0.6792, "step": 4094 }, { "epoch": 0.6266258607498087, "grad_norm": 2.1813003582366566, "learning_rate": 1.6078681179784563e-05, "loss": 0.7858, "step": 4095 }, { "epoch": 0.626778882938026, "grad_norm": 2.265021488746793, "learning_rate": 1.6076713221624663e-05, "loss": 0.7861, "step": 4096 }, { "epoch": 0.6269319051262433, "grad_norm": 2.394134574972118, "learning_rate": 1.6074744890270033e-05, "loss": 0.7879, "step": 4097 }, { "epoch": 0.6270849273144606, "grad_norm": 2.324105771909182, "learning_rate": 1.6072776185841553e-05, "loss": 0.8359, "step": 4098 }, { "epoch": 0.6272379495026779, "grad_norm": 2.238004787754105, "learning_rate": 1.6070807108460134e-05, "loss": 0.7473, "step": 4099 }, { "epoch": 0.6273909716908952, "grad_norm": 2.4442647854498007, "learning_rate": 1.60688376582467e-05, "loss": 0.7507, "step": 4100 }, { "epoch": 0.6275439938791124, "grad_norm": 2.2586968036292965, "learning_rate": 1.6066867835322205e-05, "loss": 0.7854, "step": 4101 }, { "epoch": 0.6276970160673297, "grad_norm": 2.1196358650293647, "learning_rate": 1.606489763980762e-05, "loss": 0.6373, "step": 4102 }, { "epoch": 0.6278500382555471, "grad_norm": 2.211548528141079, "learning_rate": 1.6062927071823944e-05, "loss": 0.6638, "step": 4103 }, { "epoch": 0.6280030604437643, "grad_norm": 2.4010811990195124, "learning_rate": 1.6060956131492203e-05, "loss": 0.7585, "step": 4104 }, { "epoch": 0.6281560826319816, "grad_norm": 2.5356718330834416, "learning_rate": 1.605898481893343e-05, "loss": 0.8063, "step": 4105 }, { "epoch": 0.628309104820199, "grad_norm": 2.205202390214095, "learning_rate": 1.6057013134268703e-05, "loss": 0.7701, "step": 4106 }, { "epoch": 0.6284621270084162, "grad_norm": 2.2819514096743228, "learning_rate": 1.6055041077619094e-05, "loss": 0.7002, "step": 4107 }, { "epoch": 0.6286151491966335, "grad_norm": 2.3022349264292745, "learning_rate": 1.6053068649105735e-05, "loss": 0.9141, "step": 4108 }, { "epoch": 0.6287681713848507, "grad_norm": 2.277035553809249, "learning_rate": 1.6051095848849747e-05, "loss": 0.8087, "step": 4109 }, { "epoch": 0.6289211935730681, "grad_norm": 2.4443791729171997, "learning_rate": 1.604912267697229e-05, "loss": 0.7876, "step": 4110 }, { "epoch": 0.6290742157612854, "grad_norm": 2.1725975257265784, "learning_rate": 1.6047149133594546e-05, "loss": 0.7552, "step": 4111 }, { "epoch": 0.6292272379495026, "grad_norm": 2.328562803605211, "learning_rate": 1.6045175218837716e-05, "loss": 0.7316, "step": 4112 }, { "epoch": 0.62938026013772, "grad_norm": 2.247396465495871, "learning_rate": 1.604320093282303e-05, "loss": 0.6445, "step": 4113 }, { "epoch": 0.6295332823259373, "grad_norm": 2.173139920348392, "learning_rate": 1.6041226275671727e-05, "loss": 0.7427, "step": 4114 }, { "epoch": 0.6296863045141545, "grad_norm": 2.099285404139486, "learning_rate": 1.603925124750509e-05, "loss": 0.7111, "step": 4115 }, { "epoch": 0.6298393267023719, "grad_norm": 1.9917731708573834, "learning_rate": 1.603727584844441e-05, "loss": 0.6858, "step": 4116 }, { "epoch": 0.6299923488905891, "grad_norm": 2.215400393635466, "learning_rate": 1.6035300078611e-05, "loss": 0.7705, "step": 4117 }, { "epoch": 0.6301453710788064, "grad_norm": 2.0245212493288447, "learning_rate": 1.6033323938126198e-05, "loss": 0.6323, "step": 4118 }, { "epoch": 0.6302983932670237, "grad_norm": 2.3197733532871885, "learning_rate": 1.603134742711138e-05, "loss": 0.7391, "step": 4119 }, { "epoch": 0.630451415455241, "grad_norm": 2.4936860390189213, "learning_rate": 1.6029370545687912e-05, "loss": 0.8139, "step": 4120 }, { "epoch": 0.6306044376434583, "grad_norm": 2.181252151265491, "learning_rate": 1.6027393293977217e-05, "loss": 0.7221, "step": 4121 }, { "epoch": 0.6307574598316756, "grad_norm": 2.1530316724351755, "learning_rate": 1.602541567210072e-05, "loss": 0.7455, "step": 4122 }, { "epoch": 0.6309104820198929, "grad_norm": 2.215998435857359, "learning_rate": 1.6023437680179875e-05, "loss": 0.7335, "step": 4123 }, { "epoch": 0.6310635042081102, "grad_norm": 2.4792874976399717, "learning_rate": 1.6021459318336154e-05, "loss": 0.7476, "step": 4124 }, { "epoch": 0.6312165263963274, "grad_norm": 2.026954969477471, "learning_rate": 1.6019480586691062e-05, "loss": 0.5765, "step": 4125 }, { "epoch": 0.6313695485845447, "grad_norm": 2.595157669941229, "learning_rate": 1.6017501485366125e-05, "loss": 0.8033, "step": 4126 }, { "epoch": 0.6315225707727621, "grad_norm": 2.3899715610542094, "learning_rate": 1.6015522014482877e-05, "loss": 0.7496, "step": 4127 }, { "epoch": 0.6316755929609793, "grad_norm": 2.234234590699503, "learning_rate": 1.601354217416289e-05, "loss": 0.7119, "step": 4128 }, { "epoch": 0.6318286151491966, "grad_norm": 2.359124165929824, "learning_rate": 1.6011561964527748e-05, "loss": 0.8503, "step": 4129 }, { "epoch": 0.631981637337414, "grad_norm": 2.3720387035217527, "learning_rate": 1.6009581385699076e-05, "loss": 0.7733, "step": 4130 }, { "epoch": 0.6321346595256312, "grad_norm": 2.134605187970971, "learning_rate": 1.6007600437798495e-05, "loss": 0.7318, "step": 4131 }, { "epoch": 0.6322876817138485, "grad_norm": 2.129685324766079, "learning_rate": 1.6005619120947672e-05, "loss": 0.7498, "step": 4132 }, { "epoch": 0.6324407039020657, "grad_norm": 2.3785700225702224, "learning_rate": 1.6003637435268285e-05, "loss": 0.7669, "step": 4133 }, { "epoch": 0.6325937260902831, "grad_norm": 2.2794316845639355, "learning_rate": 1.6001655380882036e-05, "loss": 0.8118, "step": 4134 }, { "epoch": 0.6327467482785004, "grad_norm": 2.7039235062628566, "learning_rate": 1.599967295791065e-05, "loss": 0.8262, "step": 4135 }, { "epoch": 0.6328997704667176, "grad_norm": 2.3953376457651268, "learning_rate": 1.599769016647588e-05, "loss": 0.6636, "step": 4136 }, { "epoch": 0.633052792654935, "grad_norm": 2.4461597091487954, "learning_rate": 1.599570700669949e-05, "loss": 0.8773, "step": 4137 }, { "epoch": 0.6332058148431523, "grad_norm": 2.2968863879054484, "learning_rate": 1.599372347870328e-05, "loss": 0.7605, "step": 4138 }, { "epoch": 0.6333588370313695, "grad_norm": 2.319391750065703, "learning_rate": 1.5991739582609066e-05, "loss": 0.6621, "step": 4139 }, { "epoch": 0.6335118592195869, "grad_norm": 2.0426803522722903, "learning_rate": 1.598975531853868e-05, "loss": 0.7313, "step": 4140 }, { "epoch": 0.6336648814078041, "grad_norm": 2.2009394418994286, "learning_rate": 1.598777068661399e-05, "loss": 0.8457, "step": 4141 }, { "epoch": 0.6338179035960214, "grad_norm": 2.265228617457404, "learning_rate": 1.5985785686956877e-05, "loss": 0.7932, "step": 4142 }, { "epoch": 0.6339709257842387, "grad_norm": 2.2768626036052595, "learning_rate": 1.598380031968925e-05, "loss": 0.6946, "step": 4143 }, { "epoch": 0.634123947972456, "grad_norm": 2.2367807099521615, "learning_rate": 1.5981814584933037e-05, "loss": 0.7776, "step": 4144 }, { "epoch": 0.6342769701606733, "grad_norm": 2.174040440407306, "learning_rate": 1.597982848281019e-05, "loss": 0.6848, "step": 4145 }, { "epoch": 0.6344299923488906, "grad_norm": 2.2403010646934223, "learning_rate": 1.597784201344268e-05, "loss": 0.7351, "step": 4146 }, { "epoch": 0.6345830145371079, "grad_norm": 2.313387309452502, "learning_rate": 1.5975855176952505e-05, "loss": 0.8039, "step": 4147 }, { "epoch": 0.6347360367253252, "grad_norm": 2.2112838415716225, "learning_rate": 1.5973867973461687e-05, "loss": 0.7345, "step": 4148 }, { "epoch": 0.6348890589135424, "grad_norm": 2.1155329059767105, "learning_rate": 1.5971880403092267e-05, "loss": 0.6863, "step": 4149 }, { "epoch": 0.6350420811017597, "grad_norm": 2.3988266632172373, "learning_rate": 1.5969892465966315e-05, "loss": 0.7015, "step": 4150 }, { "epoch": 0.6351951032899771, "grad_norm": 2.5735735190206968, "learning_rate": 1.5967904162205906e-05, "loss": 0.7143, "step": 4151 }, { "epoch": 0.6353481254781943, "grad_norm": 2.276882522860152, "learning_rate": 1.5965915491933154e-05, "loss": 0.7921, "step": 4152 }, { "epoch": 0.6355011476664116, "grad_norm": 2.3309317873483577, "learning_rate": 1.5963926455270196e-05, "loss": 0.7485, "step": 4153 }, { "epoch": 0.635654169854629, "grad_norm": 2.1790982554114797, "learning_rate": 1.5961937052339184e-05, "loss": 0.5373, "step": 4154 }, { "epoch": 0.6358071920428462, "grad_norm": 2.666278766782337, "learning_rate": 1.5959947283262296e-05, "loss": 0.7133, "step": 4155 }, { "epoch": 0.6359602142310635, "grad_norm": 2.2551622266417404, "learning_rate": 1.5957957148161727e-05, "loss": 0.7513, "step": 4156 }, { "epoch": 0.6361132364192807, "grad_norm": 2.6383466391094563, "learning_rate": 1.5955966647159706e-05, "loss": 0.8115, "step": 4157 }, { "epoch": 0.6362662586074981, "grad_norm": 2.1419449882341124, "learning_rate": 1.5953975780378466e-05, "loss": 0.8361, "step": 4158 }, { "epoch": 0.6364192807957154, "grad_norm": 2.574103577501881, "learning_rate": 1.5951984547940286e-05, "loss": 0.8792, "step": 4159 }, { "epoch": 0.6365723029839326, "grad_norm": 2.1063506557932348, "learning_rate": 1.594999294996745e-05, "loss": 0.7475, "step": 4160 }, { "epoch": 0.63672532517215, "grad_norm": 2.321127303574338, "learning_rate": 1.594800098658227e-05, "loss": 0.7782, "step": 4161 }, { "epoch": 0.6368783473603673, "grad_norm": 2.1374323400738633, "learning_rate": 1.594600865790708e-05, "loss": 0.6834, "step": 4162 }, { "epoch": 0.6370313695485845, "grad_norm": 2.260939834196185, "learning_rate": 1.594401596406424e-05, "loss": 0.7039, "step": 4163 }, { "epoch": 0.6371843917368019, "grad_norm": 2.0757294361561622, "learning_rate": 1.5942022905176126e-05, "loss": 0.6579, "step": 4164 }, { "epoch": 0.6373374139250191, "grad_norm": 2.437640815822106, "learning_rate": 1.5940029481365137e-05, "loss": 0.7543, "step": 4165 }, { "epoch": 0.6374904361132364, "grad_norm": 2.1674678167376484, "learning_rate": 1.59380356927537e-05, "loss": 0.6833, "step": 4166 }, { "epoch": 0.6376434583014537, "grad_norm": 2.3065774043485363, "learning_rate": 1.5936041539464266e-05, "loss": 0.8161, "step": 4167 }, { "epoch": 0.637796480489671, "grad_norm": 2.3020071837379663, "learning_rate": 1.5934047021619295e-05, "loss": 0.7168, "step": 4168 }, { "epoch": 0.6379495026778883, "grad_norm": 2.379831436116885, "learning_rate": 1.5932052139341285e-05, "loss": 0.8322, "step": 4169 }, { "epoch": 0.6381025248661056, "grad_norm": 2.215752580256027, "learning_rate": 1.5930056892752745e-05, "loss": 0.789, "step": 4170 }, { "epoch": 0.6382555470543229, "grad_norm": 2.0602789500986307, "learning_rate": 1.5928061281976215e-05, "loss": 0.7685, "step": 4171 }, { "epoch": 0.6384085692425402, "grad_norm": 2.249026928393553, "learning_rate": 1.592606530713425e-05, "loss": 0.6997, "step": 4172 }, { "epoch": 0.6385615914307574, "grad_norm": 2.2625016939238534, "learning_rate": 1.592406896834943e-05, "loss": 0.8841, "step": 4173 }, { "epoch": 0.6387146136189747, "grad_norm": 2.7844803793506427, "learning_rate": 1.592207226574436e-05, "loss": 0.8634, "step": 4174 }, { "epoch": 0.6388676358071921, "grad_norm": 2.2100873097986526, "learning_rate": 1.5920075199441665e-05, "loss": 0.7085, "step": 4175 }, { "epoch": 0.6390206579954093, "grad_norm": 2.339271594267563, "learning_rate": 1.591807776956399e-05, "loss": 0.6688, "step": 4176 }, { "epoch": 0.6391736801836266, "grad_norm": 2.1828890019441634, "learning_rate": 1.591607997623401e-05, "loss": 0.7627, "step": 4177 }, { "epoch": 0.639326702371844, "grad_norm": 2.4192884203876304, "learning_rate": 1.5914081819574415e-05, "loss": 0.8232, "step": 4178 }, { "epoch": 0.6394797245600612, "grad_norm": 2.235385282434219, "learning_rate": 1.5912083299707924e-05, "loss": 0.6711, "step": 4179 }, { "epoch": 0.6396327467482785, "grad_norm": 2.4216983966310823, "learning_rate": 1.5910084416757264e-05, "loss": 0.8284, "step": 4180 }, { "epoch": 0.6397857689364957, "grad_norm": 2.326093648518836, "learning_rate": 1.59080851708452e-05, "loss": 0.8516, "step": 4181 }, { "epoch": 0.6399387911247131, "grad_norm": 2.4974683641820157, "learning_rate": 1.5906085562094512e-05, "loss": 0.7125, "step": 4182 }, { "epoch": 0.6400918133129304, "grad_norm": 2.2514434183700467, "learning_rate": 1.590408559062801e-05, "loss": 0.7538, "step": 4183 }, { "epoch": 0.6402448355011476, "grad_norm": 2.2360998818579705, "learning_rate": 1.5902085256568513e-05, "loss": 0.7095, "step": 4184 }, { "epoch": 0.640397857689365, "grad_norm": 2.4038858226459743, "learning_rate": 1.5900084560038866e-05, "loss": 0.8299, "step": 4185 }, { "epoch": 0.6405508798775823, "grad_norm": 2.1972831229466845, "learning_rate": 1.589808350116195e-05, "loss": 0.7232, "step": 4186 }, { "epoch": 0.6407039020657995, "grad_norm": 2.482921431242963, "learning_rate": 1.5896082080060652e-05, "loss": 0.7147, "step": 4187 }, { "epoch": 0.6408569242540169, "grad_norm": 2.2355154693159904, "learning_rate": 1.5894080296857888e-05, "loss": 0.7198, "step": 4188 }, { "epoch": 0.6410099464422341, "grad_norm": 2.2241296107205524, "learning_rate": 1.5892078151676594e-05, "loss": 0.7909, "step": 4189 }, { "epoch": 0.6411629686304514, "grad_norm": 2.605633584782082, "learning_rate": 1.589007564463973e-05, "loss": 0.8084, "step": 4190 }, { "epoch": 0.6413159908186687, "grad_norm": 2.454508772411002, "learning_rate": 1.588807277587028e-05, "loss": 0.8879, "step": 4191 }, { "epoch": 0.641469013006886, "grad_norm": 2.4298870166317212, "learning_rate": 1.5886069545491246e-05, "loss": 0.6854, "step": 4192 }, { "epoch": 0.6416220351951033, "grad_norm": 2.4906106059246733, "learning_rate": 1.5884065953625657e-05, "loss": 0.8755, "step": 4193 }, { "epoch": 0.6417750573833206, "grad_norm": 2.1516796449874516, "learning_rate": 1.5882062000396558e-05, "loss": 0.7066, "step": 4194 }, { "epoch": 0.6419280795715379, "grad_norm": 2.493491767448356, "learning_rate": 1.588005768592702e-05, "loss": 0.7756, "step": 4195 }, { "epoch": 0.6420811017597552, "grad_norm": 2.6459928697516086, "learning_rate": 1.587805301034014e-05, "loss": 0.8202, "step": 4196 }, { "epoch": 0.6422341239479724, "grad_norm": 2.4018200071295177, "learning_rate": 1.587604797375902e-05, "loss": 0.6751, "step": 4197 }, { "epoch": 0.6423871461361897, "grad_norm": 2.367904569625447, "learning_rate": 1.5874042576306817e-05, "loss": 0.7709, "step": 4198 }, { "epoch": 0.6425401683244071, "grad_norm": 2.3896747175232327, "learning_rate": 1.5872036818106672e-05, "loss": 0.8643, "step": 4199 }, { "epoch": 0.6426931905126243, "grad_norm": 2.7365450842405306, "learning_rate": 1.5870030699281783e-05, "loss": 0.709, "step": 4200 }, { "epoch": 0.6428462127008416, "grad_norm": 1.9490071277493157, "learning_rate": 1.5868024219955337e-05, "loss": 0.61, "step": 4201 }, { "epoch": 0.642999234889059, "grad_norm": 2.2493824782464, "learning_rate": 1.5866017380250573e-05, "loss": 0.6998, "step": 4202 }, { "epoch": 0.6431522570772762, "grad_norm": 2.078835252832914, "learning_rate": 1.5864010180290732e-05, "loss": 0.6524, "step": 4203 }, { "epoch": 0.6433052792654935, "grad_norm": 2.295853031190946, "learning_rate": 1.5862002620199086e-05, "loss": 0.7786, "step": 4204 }, { "epoch": 0.6434583014537107, "grad_norm": 2.5791704393175507, "learning_rate": 1.5859994700098925e-05, "loss": 0.7799, "step": 4205 }, { "epoch": 0.6436113236419281, "grad_norm": 2.288695724711449, "learning_rate": 1.5857986420113568e-05, "loss": 0.7069, "step": 4206 }, { "epoch": 0.6437643458301454, "grad_norm": 2.0640414343256586, "learning_rate": 1.5855977780366347e-05, "loss": 0.6614, "step": 4207 }, { "epoch": 0.6439173680183626, "grad_norm": 2.687001513306309, "learning_rate": 1.5853968780980624e-05, "loss": 0.6861, "step": 4208 }, { "epoch": 0.64407039020658, "grad_norm": 2.3769771682509444, "learning_rate": 1.585195942207977e-05, "loss": 0.7767, "step": 4209 }, { "epoch": 0.6442234123947972, "grad_norm": 2.5169919724774026, "learning_rate": 1.58499497037872e-05, "loss": 0.7743, "step": 4210 }, { "epoch": 0.6443764345830145, "grad_norm": 2.3993993027725664, "learning_rate": 1.5847939626226336e-05, "loss": 0.7773, "step": 4211 }, { "epoch": 0.6445294567712319, "grad_norm": 2.37377422024334, "learning_rate": 1.5845929189520623e-05, "loss": 0.8059, "step": 4212 }, { "epoch": 0.6446824789594491, "grad_norm": 2.4955691858759743, "learning_rate": 1.5843918393793523e-05, "loss": 0.6907, "step": 4213 }, { "epoch": 0.6448355011476664, "grad_norm": 2.4733738421996296, "learning_rate": 1.5841907239168535e-05, "loss": 0.909, "step": 4214 }, { "epoch": 0.6449885233358837, "grad_norm": 2.1763820948064976, "learning_rate": 1.5839895725769173e-05, "loss": 0.7843, "step": 4215 }, { "epoch": 0.645141545524101, "grad_norm": 2.1398197268300114, "learning_rate": 1.5837883853718964e-05, "loss": 0.6951, "step": 4216 }, { "epoch": 0.6452945677123183, "grad_norm": 2.5145027467966528, "learning_rate": 1.583587162314147e-05, "loss": 0.8311, "step": 4217 }, { "epoch": 0.6454475899005355, "grad_norm": 2.4465579575147656, "learning_rate": 1.5833859034160275e-05, "loss": 0.6852, "step": 4218 }, { "epoch": 0.6456006120887529, "grad_norm": 2.3378074319427453, "learning_rate": 1.583184608689897e-05, "loss": 0.7895, "step": 4219 }, { "epoch": 0.6457536342769702, "grad_norm": 1.9247135984095447, "learning_rate": 1.5829832781481186e-05, "loss": 0.6891, "step": 4220 }, { "epoch": 0.6459066564651874, "grad_norm": 2.2282892978052735, "learning_rate": 1.582781911803056e-05, "loss": 0.736, "step": 4221 }, { "epoch": 0.6460596786534047, "grad_norm": 2.285669724890519, "learning_rate": 1.5825805096670766e-05, "loss": 0.7469, "step": 4222 }, { "epoch": 0.6462127008416221, "grad_norm": 2.581987099916605, "learning_rate": 1.5823790717525487e-05, "loss": 0.8131, "step": 4223 }, { "epoch": 0.6463657230298393, "grad_norm": 2.00862726818773, "learning_rate": 1.5821775980718443e-05, "loss": 0.7074, "step": 4224 }, { "epoch": 0.6465187452180566, "grad_norm": 2.3151539318728314, "learning_rate": 1.581976088637336e-05, "loss": 0.711, "step": 4225 }, { "epoch": 0.6466717674062739, "grad_norm": 2.159998848069135, "learning_rate": 1.581774543461399e-05, "loss": 0.7569, "step": 4226 }, { "epoch": 0.6468247895944912, "grad_norm": 2.2315436312344916, "learning_rate": 1.5815729625564116e-05, "loss": 0.6982, "step": 4227 }, { "epoch": 0.6469778117827085, "grad_norm": 2.3296082075550983, "learning_rate": 1.5813713459347532e-05, "loss": 0.6977, "step": 4228 }, { "epoch": 0.6471308339709257, "grad_norm": 2.3650900215329758, "learning_rate": 1.5811696936088066e-05, "loss": 0.7675, "step": 4229 }, { "epoch": 0.6472838561591431, "grad_norm": 2.0510993157963515, "learning_rate": 1.5809680055909552e-05, "loss": 0.5907, "step": 4230 }, { "epoch": 0.6474368783473604, "grad_norm": 2.392826682725871, "learning_rate": 1.580766281893586e-05, "loss": 0.756, "step": 4231 }, { "epoch": 0.6475899005355776, "grad_norm": 2.159486959255893, "learning_rate": 1.5805645225290872e-05, "loss": 0.7136, "step": 4232 }, { "epoch": 0.647742922723795, "grad_norm": 2.13632556895698, "learning_rate": 1.58036272750985e-05, "loss": 0.7156, "step": 4233 }, { "epoch": 0.6478959449120122, "grad_norm": 2.2967568833902283, "learning_rate": 1.5801608968482676e-05, "loss": 0.8513, "step": 4234 }, { "epoch": 0.6480489671002295, "grad_norm": 2.1817154806929016, "learning_rate": 1.5799590305567345e-05, "loss": 0.7536, "step": 4235 }, { "epoch": 0.6482019892884469, "grad_norm": 2.0809234386044073, "learning_rate": 1.5797571286476484e-05, "loss": 0.7957, "step": 4236 }, { "epoch": 0.6483550114766641, "grad_norm": 2.6517476323715115, "learning_rate": 1.5795551911334096e-05, "loss": 0.7822, "step": 4237 }, { "epoch": 0.6485080336648814, "grad_norm": 2.350343075077592, "learning_rate": 1.579353218026419e-05, "loss": 0.7427, "step": 4238 }, { "epoch": 0.6486610558530987, "grad_norm": 2.1243287340078383, "learning_rate": 1.579151209339081e-05, "loss": 0.8415, "step": 4239 }, { "epoch": 0.648814078041316, "grad_norm": 1.989695986982748, "learning_rate": 1.5789491650838013e-05, "loss": 0.733, "step": 4240 }, { "epoch": 0.6489671002295333, "grad_norm": 2.215200133687346, "learning_rate": 1.5787470852729886e-05, "loss": 0.7286, "step": 4241 }, { "epoch": 0.6491201224177505, "grad_norm": 2.002861466308425, "learning_rate": 1.5785449699190533e-05, "loss": 0.708, "step": 4242 }, { "epoch": 0.6492731446059679, "grad_norm": 2.198265404670774, "learning_rate": 1.578342819034408e-05, "loss": 0.6877, "step": 4243 }, { "epoch": 0.6494261667941852, "grad_norm": 2.2444980427910064, "learning_rate": 1.578140632631468e-05, "loss": 0.7127, "step": 4244 }, { "epoch": 0.6495791889824024, "grad_norm": 2.3743717173706855, "learning_rate": 1.57793841072265e-05, "loss": 0.7277, "step": 4245 }, { "epoch": 0.6497322111706197, "grad_norm": 2.094820997637052, "learning_rate": 1.5777361533203733e-05, "loss": 0.6453, "step": 4246 }, { "epoch": 0.6498852333588371, "grad_norm": 2.359769995436231, "learning_rate": 1.577533860437059e-05, "loss": 0.7156, "step": 4247 }, { "epoch": 0.6500382555470543, "grad_norm": 2.4827444410396127, "learning_rate": 1.577331532085131e-05, "loss": 0.8702, "step": 4248 }, { "epoch": 0.6501912777352716, "grad_norm": 2.392016169668508, "learning_rate": 1.5771291682770156e-05, "loss": 0.8331, "step": 4249 }, { "epoch": 0.6503442999234889, "grad_norm": 2.3940592611494895, "learning_rate": 1.57692676902514e-05, "loss": 0.7551, "step": 4250 }, { "epoch": 0.6504973221117062, "grad_norm": 2.3839301346084025, "learning_rate": 1.5767243343419342e-05, "loss": 0.8463, "step": 4251 }, { "epoch": 0.6506503442999235, "grad_norm": 2.4202585155038787, "learning_rate": 1.5765218642398314e-05, "loss": 0.7652, "step": 4252 }, { "epoch": 0.6508033664881407, "grad_norm": 2.1053361807836937, "learning_rate": 1.5763193587312655e-05, "loss": 0.6554, "step": 4253 }, { "epoch": 0.6509563886763581, "grad_norm": 1.8735355733426593, "learning_rate": 1.5761168178286727e-05, "loss": 0.691, "step": 4254 }, { "epoch": 0.6511094108645754, "grad_norm": 2.224692423547609, "learning_rate": 1.5759142415444925e-05, "loss": 0.696, "step": 4255 }, { "epoch": 0.6512624330527926, "grad_norm": 2.0753222236814683, "learning_rate": 1.575711629891166e-05, "loss": 0.7598, "step": 4256 }, { "epoch": 0.65141545524101, "grad_norm": 2.2697210222802306, "learning_rate": 1.5755089828811362e-05, "loss": 0.6358, "step": 4257 }, { "epoch": 0.6515684774292272, "grad_norm": 2.3751008200480275, "learning_rate": 1.5753063005268483e-05, "loss": 0.7119, "step": 4258 }, { "epoch": 0.6517214996174445, "grad_norm": 2.124262512388635, "learning_rate": 1.5751035828407494e-05, "loss": 0.6876, "step": 4259 }, { "epoch": 0.6518745218056619, "grad_norm": 2.2054070929703276, "learning_rate": 1.57490082983529e-05, "loss": 0.7775, "step": 4260 }, { "epoch": 0.6520275439938791, "grad_norm": 2.077240839261722, "learning_rate": 1.5746980415229217e-05, "loss": 0.6499, "step": 4261 }, { "epoch": 0.6521805661820964, "grad_norm": 2.2208562054207053, "learning_rate": 1.5744952179160985e-05, "loss": 0.8069, "step": 4262 }, { "epoch": 0.6523335883703137, "grad_norm": 2.135417668181956, "learning_rate": 1.5742923590272762e-05, "loss": 0.7635, "step": 4263 }, { "epoch": 0.652486610558531, "grad_norm": 2.2710496430797926, "learning_rate": 1.5740894648689138e-05, "loss": 0.8612, "step": 4264 }, { "epoch": 0.6526396327467483, "grad_norm": 2.097980111255635, "learning_rate": 1.5738865354534714e-05, "loss": 0.7313, "step": 4265 }, { "epoch": 0.6527926549349655, "grad_norm": 2.3366510066682835, "learning_rate": 1.573683570793412e-05, "loss": 0.7146, "step": 4266 }, { "epoch": 0.6529456771231829, "grad_norm": 2.162653896948852, "learning_rate": 1.5734805709012e-05, "loss": 0.7029, "step": 4267 }, { "epoch": 0.6530986993114002, "grad_norm": 2.3656466257792323, "learning_rate": 1.5732775357893024e-05, "loss": 0.8957, "step": 4268 }, { "epoch": 0.6532517214996174, "grad_norm": 2.098111046017186, "learning_rate": 1.573074465470189e-05, "loss": 0.6822, "step": 4269 }, { "epoch": 0.6534047436878347, "grad_norm": 2.4117741397672408, "learning_rate": 1.5728713599563306e-05, "loss": 0.7866, "step": 4270 }, { "epoch": 0.6535577658760521, "grad_norm": 2.174338870646714, "learning_rate": 1.5726682192602015e-05, "loss": 0.8173, "step": 4271 }, { "epoch": 0.6537107880642693, "grad_norm": 2.186513283043854, "learning_rate": 1.572465043394276e-05, "loss": 0.776, "step": 4272 }, { "epoch": 0.6538638102524866, "grad_norm": 2.4290604461156273, "learning_rate": 1.572261832371033e-05, "loss": 0.8385, "step": 4273 }, { "epoch": 0.6540168324407039, "grad_norm": 2.204819777026553, "learning_rate": 1.5720585862029522e-05, "loss": 0.7703, "step": 4274 }, { "epoch": 0.6541698546289212, "grad_norm": 2.3510224710248004, "learning_rate": 1.571855304902516e-05, "loss": 0.7174, "step": 4275 }, { "epoch": 0.6543228768171385, "grad_norm": 2.5013178502584084, "learning_rate": 1.571651988482208e-05, "loss": 0.7605, "step": 4276 }, { "epoch": 0.6544758990053557, "grad_norm": 2.4612060334374517, "learning_rate": 1.5714486369545152e-05, "loss": 0.7707, "step": 4277 }, { "epoch": 0.6546289211935731, "grad_norm": 2.2725366255063655, "learning_rate": 1.571245250331926e-05, "loss": 0.8654, "step": 4278 }, { "epoch": 0.6547819433817904, "grad_norm": 2.216534816275824, "learning_rate": 1.5710418286269313e-05, "loss": 0.68, "step": 4279 }, { "epoch": 0.6549349655700076, "grad_norm": 2.2998627589268352, "learning_rate": 1.570838371852024e-05, "loss": 0.7354, "step": 4280 }, { "epoch": 0.655087987758225, "grad_norm": 2.559430498766767, "learning_rate": 1.5706348800196993e-05, "loss": 0.7744, "step": 4281 }, { "epoch": 0.6552410099464422, "grad_norm": 1.9862496757382806, "learning_rate": 1.570431353142454e-05, "loss": 0.674, "step": 4282 }, { "epoch": 0.6553940321346595, "grad_norm": 2.315587355397246, "learning_rate": 1.570227791232788e-05, "loss": 0.7833, "step": 4283 }, { "epoch": 0.6555470543228769, "grad_norm": 2.175044828127138, "learning_rate": 1.570024194303202e-05, "loss": 0.6864, "step": 4284 }, { "epoch": 0.6557000765110941, "grad_norm": 2.037261450774724, "learning_rate": 1.5698205623662013e-05, "loss": 0.619, "step": 4285 }, { "epoch": 0.6558530986993114, "grad_norm": 2.1956095246913505, "learning_rate": 1.56961689543429e-05, "loss": 0.6983, "step": 4286 }, { "epoch": 0.6560061208875287, "grad_norm": 2.5219192285962375, "learning_rate": 1.569413193519977e-05, "loss": 0.7786, "step": 4287 }, { "epoch": 0.656159143075746, "grad_norm": 1.9680199150456783, "learning_rate": 1.5692094566357723e-05, "loss": 0.6138, "step": 4288 }, { "epoch": 0.6563121652639633, "grad_norm": 2.791679205377205, "learning_rate": 1.569005684794188e-05, "loss": 0.7804, "step": 4289 }, { "epoch": 0.6564651874521805, "grad_norm": 2.441683768664178, "learning_rate": 1.5688018780077387e-05, "loss": 0.7648, "step": 4290 }, { "epoch": 0.6566182096403979, "grad_norm": 2.3593952030509393, "learning_rate": 1.5685980362889407e-05, "loss": 0.7983, "step": 4291 }, { "epoch": 0.6567712318286152, "grad_norm": 2.44463161656858, "learning_rate": 1.5683941596503132e-05, "loss": 0.8427, "step": 4292 }, { "epoch": 0.6569242540168324, "grad_norm": 2.477271920848612, "learning_rate": 1.568190248104377e-05, "loss": 0.709, "step": 4293 }, { "epoch": 0.6570772762050497, "grad_norm": 2.0726834727509247, "learning_rate": 1.567986301663654e-05, "loss": 0.7644, "step": 4294 }, { "epoch": 0.6572302983932671, "grad_norm": 2.4549904996769523, "learning_rate": 1.567782320340671e-05, "loss": 0.6868, "step": 4295 }, { "epoch": 0.6573833205814843, "grad_norm": 2.133291763535228, "learning_rate": 1.5675783041479542e-05, "loss": 0.6548, "step": 4296 }, { "epoch": 0.6575363427697016, "grad_norm": 2.69944862833919, "learning_rate": 1.5673742530980337e-05, "loss": 0.7521, "step": 4297 }, { "epoch": 0.6576893649579189, "grad_norm": 2.318466203294317, "learning_rate": 1.5671701672034406e-05, "loss": 0.6864, "step": 4298 }, { "epoch": 0.6578423871461362, "grad_norm": 2.4267845713840233, "learning_rate": 1.5669660464767087e-05, "loss": 0.8534, "step": 4299 }, { "epoch": 0.6579954093343535, "grad_norm": 2.382318083873871, "learning_rate": 1.5667618909303738e-05, "loss": 0.7832, "step": 4300 }, { "epoch": 0.6581484315225707, "grad_norm": 2.2591848691994003, "learning_rate": 1.566557700576974e-05, "loss": 0.7867, "step": 4301 }, { "epoch": 0.6583014537107881, "grad_norm": 2.1813851643328905, "learning_rate": 1.5663534754290496e-05, "loss": 0.8173, "step": 4302 }, { "epoch": 0.6584544758990054, "grad_norm": 2.3371976725354635, "learning_rate": 1.5661492154991424e-05, "loss": 0.6822, "step": 4303 }, { "epoch": 0.6586074980872226, "grad_norm": 2.4281104499333086, "learning_rate": 1.5659449207997975e-05, "loss": 0.7563, "step": 4304 }, { "epoch": 0.65876052027544, "grad_norm": 2.332443416486321, "learning_rate": 1.5657405913435608e-05, "loss": 0.7429, "step": 4305 }, { "epoch": 0.6589135424636572, "grad_norm": 2.1794383230938497, "learning_rate": 1.565536227142981e-05, "loss": 0.7894, "step": 4306 }, { "epoch": 0.6590665646518745, "grad_norm": 2.2246346257704994, "learning_rate": 1.5653318282106096e-05, "loss": 0.7179, "step": 4307 }, { "epoch": 0.6592195868400919, "grad_norm": 2.278677670858125, "learning_rate": 1.5651273945589984e-05, "loss": 0.8088, "step": 4308 }, { "epoch": 0.6593726090283091, "grad_norm": 2.3163101708985665, "learning_rate": 1.5649229262007035e-05, "loss": 0.7675, "step": 4309 }, { "epoch": 0.6595256312165264, "grad_norm": 2.1382456702220924, "learning_rate": 1.5647184231482816e-05, "loss": 0.6105, "step": 4310 }, { "epoch": 0.6596786534047437, "grad_norm": 2.6339501865403645, "learning_rate": 1.5645138854142926e-05, "loss": 0.7263, "step": 4311 }, { "epoch": 0.659831675592961, "grad_norm": 2.622754234753976, "learning_rate": 1.564309313011297e-05, "loss": 0.8526, "step": 4312 }, { "epoch": 0.6599846977811783, "grad_norm": 2.3998143541848145, "learning_rate": 1.564104705951859e-05, "loss": 0.8304, "step": 4313 }, { "epoch": 0.6601377199693955, "grad_norm": 2.197439826841551, "learning_rate": 1.563900064248544e-05, "loss": 0.7395, "step": 4314 }, { "epoch": 0.6602907421576129, "grad_norm": 2.3044007948624072, "learning_rate": 1.5636953879139204e-05, "loss": 0.7928, "step": 4315 }, { "epoch": 0.6604437643458302, "grad_norm": 2.2479485395298666, "learning_rate": 1.563490676960558e-05, "loss": 0.791, "step": 4316 }, { "epoch": 0.6605967865340474, "grad_norm": 2.3513886944279325, "learning_rate": 1.563285931401028e-05, "loss": 0.7341, "step": 4317 }, { "epoch": 0.6607498087222647, "grad_norm": 2.177828319997214, "learning_rate": 1.563081151247906e-05, "loss": 0.7671, "step": 4318 }, { "epoch": 0.6609028309104821, "grad_norm": 2.192816682090379, "learning_rate": 1.562876336513768e-05, "loss": 0.83, "step": 4319 }, { "epoch": 0.6610558530986993, "grad_norm": 2.0953415645877778, "learning_rate": 1.5626714872111915e-05, "loss": 0.6911, "step": 4320 }, { "epoch": 0.6612088752869166, "grad_norm": 2.2103399446325276, "learning_rate": 1.5624666033527577e-05, "loss": 0.7532, "step": 4321 }, { "epoch": 0.6613618974751339, "grad_norm": 2.4237262824412125, "learning_rate": 1.5622616849510497e-05, "loss": 0.7964, "step": 4322 }, { "epoch": 0.6615149196633512, "grad_norm": 2.100080910063574, "learning_rate": 1.5620567320186522e-05, "loss": 0.5986, "step": 4323 }, { "epoch": 0.6616679418515685, "grad_norm": 2.2211004196585367, "learning_rate": 1.561851744568152e-05, "loss": 0.7725, "step": 4324 }, { "epoch": 0.6618209640397857, "grad_norm": 2.2274632487007358, "learning_rate": 1.561646722612138e-05, "loss": 0.7935, "step": 4325 }, { "epoch": 0.6619739862280031, "grad_norm": 2.0708948012053563, "learning_rate": 1.5614416661632015e-05, "loss": 0.6594, "step": 4326 }, { "epoch": 0.6621270084162203, "grad_norm": 2.2197858350008843, "learning_rate": 1.5612365752339364e-05, "loss": 0.766, "step": 4327 }, { "epoch": 0.6622800306044376, "grad_norm": 2.220248640125984, "learning_rate": 1.5610314498369372e-05, "loss": 0.7767, "step": 4328 }, { "epoch": 0.662433052792655, "grad_norm": 2.380043383610386, "learning_rate": 1.560826289984802e-05, "loss": 0.7342, "step": 4329 }, { "epoch": 0.6625860749808722, "grad_norm": 2.6236063675363046, "learning_rate": 1.5606210956901303e-05, "loss": 0.7274, "step": 4330 }, { "epoch": 0.6627390971690895, "grad_norm": 2.148371880434111, "learning_rate": 1.560415866965524e-05, "loss": 0.6713, "step": 4331 }, { "epoch": 0.6628921193573069, "grad_norm": 2.0025618604021607, "learning_rate": 1.5602106038235874e-05, "loss": 0.6365, "step": 4332 }, { "epoch": 0.6630451415455241, "grad_norm": 2.21401912073108, "learning_rate": 1.5600053062769253e-05, "loss": 0.6911, "step": 4333 }, { "epoch": 0.6631981637337414, "grad_norm": 2.228557608606604, "learning_rate": 1.559799974338147e-05, "loss": 0.6889, "step": 4334 }, { "epoch": 0.6633511859219586, "grad_norm": 2.438672814007364, "learning_rate": 1.5595946080198624e-05, "loss": 0.7678, "step": 4335 }, { "epoch": 0.663504208110176, "grad_norm": 2.2428273489360913, "learning_rate": 1.5593892073346837e-05, "loss": 0.7243, "step": 4336 }, { "epoch": 0.6636572302983933, "grad_norm": 2.4099325849538364, "learning_rate": 1.5591837722952253e-05, "loss": 0.8096, "step": 4337 }, { "epoch": 0.6638102524866105, "grad_norm": 2.19582365303818, "learning_rate": 1.5589783029141038e-05, "loss": 0.7445, "step": 4338 }, { "epoch": 0.6639632746748279, "grad_norm": 2.2901855214253173, "learning_rate": 1.558772799203938e-05, "loss": 0.7788, "step": 4339 }, { "epoch": 0.6641162968630452, "grad_norm": 2.093622043015938, "learning_rate": 1.558567261177349e-05, "loss": 0.8354, "step": 4340 }, { "epoch": 0.6642693190512624, "grad_norm": 2.233333883954918, "learning_rate": 1.5583616888469594e-05, "loss": 0.8035, "step": 4341 }, { "epoch": 0.6644223412394797, "grad_norm": 2.027649735934898, "learning_rate": 1.5581560822253938e-05, "loss": 0.6271, "step": 4342 }, { "epoch": 0.664575363427697, "grad_norm": 2.3410794486831534, "learning_rate": 1.55795044132528e-05, "loss": 0.6748, "step": 4343 }, { "epoch": 0.6647283856159143, "grad_norm": 2.034670891220707, "learning_rate": 1.5577447661592467e-05, "loss": 0.6798, "step": 4344 }, { "epoch": 0.6648814078041316, "grad_norm": 2.267729163658624, "learning_rate": 1.5575390567399253e-05, "loss": 0.7299, "step": 4345 }, { "epoch": 0.6650344299923489, "grad_norm": 2.3485035033426493, "learning_rate": 1.5573333130799494e-05, "loss": 0.7796, "step": 4346 }, { "epoch": 0.6651874521805662, "grad_norm": 2.3608093796256657, "learning_rate": 1.5571275351919543e-05, "loss": 1.0151, "step": 4347 }, { "epoch": 0.6653404743687835, "grad_norm": 2.258946043310113, "learning_rate": 1.556921723088578e-05, "loss": 0.668, "step": 4348 }, { "epoch": 0.6654934965570007, "grad_norm": 2.4385006256619626, "learning_rate": 1.5567158767824603e-05, "loss": 0.7649, "step": 4349 }, { "epoch": 0.6656465187452181, "grad_norm": 2.187731470321015, "learning_rate": 1.556509996286242e-05, "loss": 0.74, "step": 4350 }, { "epoch": 0.6657995409334353, "grad_norm": 2.136213298646861, "learning_rate": 1.5563040816125683e-05, "loss": 0.756, "step": 4351 }, { "epoch": 0.6659525631216526, "grad_norm": 2.464238595477538, "learning_rate": 1.5560981327740846e-05, "loss": 0.7432, "step": 4352 }, { "epoch": 0.66610558530987, "grad_norm": 2.138263083825212, "learning_rate": 1.5558921497834387e-05, "loss": 0.7623, "step": 4353 }, { "epoch": 0.6662586074980872, "grad_norm": 2.2159031648485157, "learning_rate": 1.5556861326532817e-05, "loss": 0.6481, "step": 4354 }, { "epoch": 0.6664116296863045, "grad_norm": 2.3079101636175405, "learning_rate": 1.5554800813962652e-05, "loss": 0.7337, "step": 4355 }, { "epoch": 0.6665646518745219, "grad_norm": 2.3557552648531095, "learning_rate": 1.555273996025044e-05, "loss": 0.7614, "step": 4356 }, { "epoch": 0.6667176740627391, "grad_norm": 2.3975045855476234, "learning_rate": 1.5550678765522743e-05, "loss": 0.8563, "step": 4357 }, { "epoch": 0.6668706962509564, "grad_norm": 2.3698126148175263, "learning_rate": 1.554861722990615e-05, "loss": 0.6551, "step": 4358 }, { "epoch": 0.6670237184391736, "grad_norm": 1.9109474500910666, "learning_rate": 1.5546555353527268e-05, "loss": 0.5765, "step": 4359 }, { "epoch": 0.667176740627391, "grad_norm": 2.347918132400794, "learning_rate": 1.554449313651272e-05, "loss": 0.8157, "step": 4360 }, { "epoch": 0.6673297628156083, "grad_norm": 2.1667966924767517, "learning_rate": 1.554243057898916e-05, "loss": 0.7399, "step": 4361 }, { "epoch": 0.6674827850038255, "grad_norm": 2.086540955838038, "learning_rate": 1.5540367681083256e-05, "loss": 0.6853, "step": 4362 }, { "epoch": 0.6676358071920429, "grad_norm": 2.132291609887321, "learning_rate": 1.5538304442921694e-05, "loss": 0.7374, "step": 4363 }, { "epoch": 0.6677888293802602, "grad_norm": 2.363328018543426, "learning_rate": 1.5536240864631197e-05, "loss": 0.824, "step": 4364 }, { "epoch": 0.6679418515684774, "grad_norm": 2.3075046294927977, "learning_rate": 1.553417694633849e-05, "loss": 0.7507, "step": 4365 }, { "epoch": 0.6680948737566947, "grad_norm": 2.5578916953080086, "learning_rate": 1.553211268817032e-05, "loss": 0.9281, "step": 4366 }, { "epoch": 0.668247895944912, "grad_norm": 2.0753350483746233, "learning_rate": 1.5530048090253475e-05, "loss": 0.7547, "step": 4367 }, { "epoch": 0.6684009181331293, "grad_norm": 2.069627851881168, "learning_rate": 1.552798315271474e-05, "loss": 0.7056, "step": 4368 }, { "epoch": 0.6685539403213466, "grad_norm": 2.3010025507959573, "learning_rate": 1.5525917875680936e-05, "loss": 0.6966, "step": 4369 }, { "epoch": 0.6687069625095639, "grad_norm": 2.5111567762780154, "learning_rate": 1.55238522592789e-05, "loss": 0.7832, "step": 4370 }, { "epoch": 0.6688599846977812, "grad_norm": 1.9618299176729825, "learning_rate": 1.5521786303635483e-05, "loss": 0.6293, "step": 4371 }, { "epoch": 0.6690130068859985, "grad_norm": 2.225950898436061, "learning_rate": 1.5519720008877567e-05, "loss": 0.7209, "step": 4372 }, { "epoch": 0.6691660290742157, "grad_norm": 1.992624936802358, "learning_rate": 1.5517653375132055e-05, "loss": 0.7138, "step": 4373 }, { "epoch": 0.6693190512624331, "grad_norm": 1.8752156297552323, "learning_rate": 1.551558640252586e-05, "loss": 0.6508, "step": 4374 }, { "epoch": 0.6694720734506503, "grad_norm": 2.3301325690600945, "learning_rate": 1.5513519091185934e-05, "loss": 0.5898, "step": 4375 }, { "epoch": 0.6696250956388676, "grad_norm": 2.3208751688365576, "learning_rate": 1.5511451441239227e-05, "loss": 0.6817, "step": 4376 }, { "epoch": 0.669778117827085, "grad_norm": 2.3173083606568396, "learning_rate": 1.5509383452812725e-05, "loss": 0.5693, "step": 4377 }, { "epoch": 0.6699311400153022, "grad_norm": 2.383092745146042, "learning_rate": 1.5507315126033435e-05, "loss": 0.6256, "step": 4378 }, { "epoch": 0.6700841622035195, "grad_norm": 1.8645480695198386, "learning_rate": 1.550524646102838e-05, "loss": 0.526, "step": 4379 }, { "epoch": 0.6702371843917369, "grad_norm": 2.4363367750243805, "learning_rate": 1.5503177457924597e-05, "loss": 0.7734, "step": 4380 }, { "epoch": 0.6703902065799541, "grad_norm": 2.5537622593414415, "learning_rate": 1.5501108116849162e-05, "loss": 0.7684, "step": 4381 }, { "epoch": 0.6705432287681714, "grad_norm": 2.226941029837064, "learning_rate": 1.549903843792916e-05, "loss": 0.7214, "step": 4382 }, { "epoch": 0.6706962509563886, "grad_norm": 2.4219083185269015, "learning_rate": 1.5496968421291687e-05, "loss": 0.8276, "step": 4383 }, { "epoch": 0.670849273144606, "grad_norm": 2.160286832500492, "learning_rate": 1.5494898067063885e-05, "loss": 0.7445, "step": 4384 }, { "epoch": 0.6710022953328233, "grad_norm": 2.207589097594382, "learning_rate": 1.5492827375372895e-05, "loss": 0.5768, "step": 4385 }, { "epoch": 0.6711553175210405, "grad_norm": 2.163120547285754, "learning_rate": 1.5490756346345887e-05, "loss": 0.7213, "step": 4386 }, { "epoch": 0.6713083397092579, "grad_norm": 2.2884691646975153, "learning_rate": 1.5488684980110052e-05, "loss": 0.7249, "step": 4387 }, { "epoch": 0.6714613618974752, "grad_norm": 2.302420265586536, "learning_rate": 1.5486613276792604e-05, "loss": 0.7526, "step": 4388 }, { "epoch": 0.6716143840856924, "grad_norm": 2.2770861895161585, "learning_rate": 1.5484541236520767e-05, "loss": 0.6862, "step": 4389 }, { "epoch": 0.6717674062739097, "grad_norm": 2.1419356393123854, "learning_rate": 1.54824688594218e-05, "loss": 0.6584, "step": 4390 }, { "epoch": 0.671920428462127, "grad_norm": 2.29317049827774, "learning_rate": 1.548039614562297e-05, "loss": 0.7182, "step": 4391 }, { "epoch": 0.6720734506503443, "grad_norm": 2.432437397742068, "learning_rate": 1.5478323095251572e-05, "loss": 0.7444, "step": 4392 }, { "epoch": 0.6722264728385616, "grad_norm": 2.129156587913408, "learning_rate": 1.5476249708434928e-05, "loss": 0.7134, "step": 4393 }, { "epoch": 0.6723794950267789, "grad_norm": 2.4526432928617687, "learning_rate": 1.547417598530036e-05, "loss": 0.8522, "step": 4394 }, { "epoch": 0.6725325172149962, "grad_norm": 1.9197792032530787, "learning_rate": 1.5472101925975232e-05, "loss": 0.6823, "step": 4395 }, { "epoch": 0.6726855394032135, "grad_norm": 2.276953404476131, "learning_rate": 1.5470027530586917e-05, "loss": 0.7254, "step": 4396 }, { "epoch": 0.6728385615914307, "grad_norm": 1.8933823182228429, "learning_rate": 1.546795279926281e-05, "loss": 0.5938, "step": 4397 }, { "epoch": 0.6729915837796481, "grad_norm": 2.573327299964346, "learning_rate": 1.5465877732130334e-05, "loss": 0.7998, "step": 4398 }, { "epoch": 0.6731446059678653, "grad_norm": 2.0314553890273146, "learning_rate": 1.5463802329316925e-05, "loss": 0.7874, "step": 4399 }, { "epoch": 0.6732976281560826, "grad_norm": 2.5070632781376814, "learning_rate": 1.5461726590950038e-05, "loss": 0.8208, "step": 4400 }, { "epoch": 0.6734506503443, "grad_norm": 2.3025849045760953, "learning_rate": 1.5459650517157155e-05, "loss": 0.7712, "step": 4401 }, { "epoch": 0.6736036725325172, "grad_norm": 2.7271924988858642, "learning_rate": 1.545757410806578e-05, "loss": 0.8297, "step": 4402 }, { "epoch": 0.6737566947207345, "grad_norm": 2.4592991194582554, "learning_rate": 1.545549736380342e-05, "loss": 0.755, "step": 4403 }, { "epoch": 0.6739097169089519, "grad_norm": 2.2928675487770684, "learning_rate": 1.545342028449763e-05, "loss": 0.7825, "step": 4404 }, { "epoch": 0.6740627390971691, "grad_norm": 2.396197246986, "learning_rate": 1.5451342870275966e-05, "loss": 0.7941, "step": 4405 }, { "epoch": 0.6742157612853864, "grad_norm": 2.388805239861892, "learning_rate": 1.5449265121266013e-05, "loss": 0.7196, "step": 4406 }, { "epoch": 0.6743687834736036, "grad_norm": 1.9013526614221556, "learning_rate": 1.5447187037595365e-05, "loss": 0.6383, "step": 4407 }, { "epoch": 0.674521805661821, "grad_norm": 2.2240279181316263, "learning_rate": 1.5445108619391658e-05, "loss": 0.7109, "step": 4408 }, { "epoch": 0.6746748278500383, "grad_norm": 2.197479041596447, "learning_rate": 1.5443029866782527e-05, "loss": 0.7536, "step": 4409 }, { "epoch": 0.6748278500382555, "grad_norm": 2.2551420831296647, "learning_rate": 1.544095077989564e-05, "loss": 0.7972, "step": 4410 }, { "epoch": 0.6749808722264728, "grad_norm": 2.0258059419407903, "learning_rate": 1.5438871358858677e-05, "loss": 0.6498, "step": 4411 }, { "epoch": 0.6751338944146902, "grad_norm": 2.09550472938151, "learning_rate": 1.543679160379935e-05, "loss": 0.7254, "step": 4412 }, { "epoch": 0.6752869166029074, "grad_norm": 2.5330437432780735, "learning_rate": 1.543471151484538e-05, "loss": 0.7032, "step": 4413 }, { "epoch": 0.6754399387911247, "grad_norm": 2.348682051814609, "learning_rate": 1.5432631092124516e-05, "loss": 0.8294, "step": 4414 }, { "epoch": 0.675592960979342, "grad_norm": 2.1542786517889905, "learning_rate": 1.5430550335764522e-05, "loss": 0.5664, "step": 4415 }, { "epoch": 0.6757459831675593, "grad_norm": 2.690330238546623, "learning_rate": 1.542846924589319e-05, "loss": 0.7578, "step": 4416 }, { "epoch": 0.6758990053557766, "grad_norm": 2.398820373779532, "learning_rate": 1.5426387822638323e-05, "loss": 0.7756, "step": 4417 }, { "epoch": 0.6760520275439938, "grad_norm": 2.583573843262456, "learning_rate": 1.5424306066127754e-05, "loss": 0.6986, "step": 4418 }, { "epoch": 0.6762050497322112, "grad_norm": 2.935202140832654, "learning_rate": 1.542222397648933e-05, "loss": 0.8748, "step": 4419 }, { "epoch": 0.6763580719204285, "grad_norm": 2.119216436906614, "learning_rate": 1.5420141553850914e-05, "loss": 0.6931, "step": 4420 }, { "epoch": 0.6765110941086457, "grad_norm": 2.2188152996527304, "learning_rate": 1.541805879834041e-05, "loss": 0.7145, "step": 4421 }, { "epoch": 0.6766641162968631, "grad_norm": 2.2188544191352104, "learning_rate": 1.5415975710085715e-05, "loss": 0.6896, "step": 4422 }, { "epoch": 0.6768171384850803, "grad_norm": 2.1994570146008567, "learning_rate": 1.5413892289214765e-05, "loss": 0.7304, "step": 4423 }, { "epoch": 0.6769701606732976, "grad_norm": 2.031793893241159, "learning_rate": 1.5411808535855508e-05, "loss": 0.7114, "step": 4424 }, { "epoch": 0.677123182861515, "grad_norm": 2.2744688009146516, "learning_rate": 1.540972445013592e-05, "loss": 0.7738, "step": 4425 }, { "epoch": 0.6772762050497322, "grad_norm": 1.9530244694556822, "learning_rate": 1.5407640032183993e-05, "loss": 0.5981, "step": 4426 }, { "epoch": 0.6774292272379495, "grad_norm": 2.2978859459660987, "learning_rate": 1.5405555282127732e-05, "loss": 0.6649, "step": 4427 }, { "epoch": 0.6775822494261668, "grad_norm": 2.279440938423363, "learning_rate": 1.5403470200095176e-05, "loss": 0.7574, "step": 4428 }, { "epoch": 0.6777352716143841, "grad_norm": 2.40984717918197, "learning_rate": 1.5401384786214377e-05, "loss": 0.7694, "step": 4429 }, { "epoch": 0.6778882938026014, "grad_norm": 2.3415709152188033, "learning_rate": 1.5399299040613408e-05, "loss": 0.7097, "step": 4430 }, { "epoch": 0.6780413159908186, "grad_norm": 2.270028435261307, "learning_rate": 1.539721296342036e-05, "loss": 0.7187, "step": 4431 }, { "epoch": 0.678194338179036, "grad_norm": 2.231886593812637, "learning_rate": 1.5395126554763357e-05, "loss": 0.7461, "step": 4432 }, { "epoch": 0.6783473603672533, "grad_norm": 2.030871074521151, "learning_rate": 1.539303981477052e-05, "loss": 0.6325, "step": 4433 }, { "epoch": 0.6785003825554705, "grad_norm": 2.3190377138045912, "learning_rate": 1.5390952743570015e-05, "loss": 0.6715, "step": 4434 }, { "epoch": 0.6786534047436878, "grad_norm": 2.2574521183138123, "learning_rate": 1.538886534129001e-05, "loss": 0.7406, "step": 4435 }, { "epoch": 0.6788064269319051, "grad_norm": 2.1855521286184527, "learning_rate": 1.5386777608058702e-05, "loss": 0.7811, "step": 4436 }, { "epoch": 0.6789594491201224, "grad_norm": 2.4386053936890324, "learning_rate": 1.5384689544004307e-05, "loss": 0.8023, "step": 4437 }, { "epoch": 0.6791124713083397, "grad_norm": 2.424761529872376, "learning_rate": 1.5382601149255063e-05, "loss": 0.7948, "step": 4438 }, { "epoch": 0.679265493496557, "grad_norm": 2.14136896989551, "learning_rate": 1.5380512423939227e-05, "loss": 0.7419, "step": 4439 }, { "epoch": 0.6794185156847743, "grad_norm": 2.1760452971611994, "learning_rate": 1.5378423368185074e-05, "loss": 0.7354, "step": 4440 }, { "epoch": 0.6795715378729916, "grad_norm": 2.4588643253323603, "learning_rate": 1.53763339821209e-05, "loss": 0.7851, "step": 4441 }, { "epoch": 0.6797245600612088, "grad_norm": 2.3166074058614443, "learning_rate": 1.5374244265875026e-05, "loss": 0.7038, "step": 4442 }, { "epoch": 0.6798775822494262, "grad_norm": 2.1635072983925836, "learning_rate": 1.5372154219575788e-05, "loss": 0.8344, "step": 4443 }, { "epoch": 0.6800306044376434, "grad_norm": 2.2860730808637797, "learning_rate": 1.5370063843351538e-05, "loss": 0.7131, "step": 4444 }, { "epoch": 0.6801836266258607, "grad_norm": 2.2691362821289847, "learning_rate": 1.5367973137330667e-05, "loss": 0.7036, "step": 4445 }, { "epoch": 0.6803366488140781, "grad_norm": 2.2305641983243927, "learning_rate": 1.536588210164156e-05, "loss": 0.6088, "step": 4446 }, { "epoch": 0.6804896710022953, "grad_norm": 2.2145056979037303, "learning_rate": 1.5363790736412646e-05, "loss": 0.8274, "step": 4447 }, { "epoch": 0.6806426931905126, "grad_norm": 2.8007677770733492, "learning_rate": 1.5361699041772358e-05, "loss": 0.8732, "step": 4448 }, { "epoch": 0.68079571537873, "grad_norm": 2.3126948601147457, "learning_rate": 1.5359607017849156e-05, "loss": 0.8034, "step": 4449 }, { "epoch": 0.6809487375669472, "grad_norm": 2.2147887174883616, "learning_rate": 1.535751466477152e-05, "loss": 0.7773, "step": 4450 }, { "epoch": 0.6811017597551645, "grad_norm": 2.4159115687576618, "learning_rate": 1.535542198266795e-05, "loss": 0.7059, "step": 4451 }, { "epoch": 0.6812547819433817, "grad_norm": 2.0860891506089665, "learning_rate": 1.535332897166697e-05, "loss": 0.7719, "step": 4452 }, { "epoch": 0.6814078041315991, "grad_norm": 1.9432480744857157, "learning_rate": 1.535123563189711e-05, "loss": 0.681, "step": 4453 }, { "epoch": 0.6815608263198164, "grad_norm": 2.4927618482640583, "learning_rate": 1.5349141963486934e-05, "loss": 0.7504, "step": 4454 }, { "epoch": 0.6817138485080336, "grad_norm": 2.5913353045572856, "learning_rate": 1.5347047966565026e-05, "loss": 0.8291, "step": 4455 }, { "epoch": 0.681866870696251, "grad_norm": 2.083645177191506, "learning_rate": 1.534495364125999e-05, "loss": 0.73, "step": 4456 }, { "epoch": 0.6820198928844683, "grad_norm": 2.319715812091627, "learning_rate": 1.5342858987700434e-05, "loss": 0.8116, "step": 4457 }, { "epoch": 0.6821729150726855, "grad_norm": 2.2755235463056542, "learning_rate": 1.534076400601501e-05, "loss": 0.7882, "step": 4458 }, { "epoch": 0.6823259372609028, "grad_norm": 2.262029634001563, "learning_rate": 1.5338668696332374e-05, "loss": 0.6669, "step": 4459 }, { "epoch": 0.6824789594491201, "grad_norm": 2.6664918202648633, "learning_rate": 1.533657305878121e-05, "loss": 0.7102, "step": 4460 }, { "epoch": 0.6826319816373374, "grad_norm": 2.127031830300462, "learning_rate": 1.5334477093490215e-05, "loss": 0.7144, "step": 4461 }, { "epoch": 0.6827850038255547, "grad_norm": 2.4017809639453396, "learning_rate": 1.5332380800588116e-05, "loss": 0.7166, "step": 4462 }, { "epoch": 0.682938026013772, "grad_norm": 2.5503010827973833, "learning_rate": 1.5330284180203648e-05, "loss": 0.8424, "step": 4463 }, { "epoch": 0.6830910482019893, "grad_norm": 2.31586137036302, "learning_rate": 1.532818723246558e-05, "loss": 0.8279, "step": 4464 }, { "epoch": 0.6832440703902066, "grad_norm": 2.1965797282146737, "learning_rate": 1.5326089957502688e-05, "loss": 0.7076, "step": 4465 }, { "epoch": 0.6833970925784238, "grad_norm": 2.3924228128071867, "learning_rate": 1.532399235544378e-05, "loss": 0.7191, "step": 4466 }, { "epoch": 0.6835501147666412, "grad_norm": 2.1377788155877355, "learning_rate": 1.5321894426417667e-05, "loss": 0.6807, "step": 4467 }, { "epoch": 0.6837031369548584, "grad_norm": 2.2934918162416698, "learning_rate": 1.5319796170553202e-05, "loss": 0.7232, "step": 4468 }, { "epoch": 0.6838561591430757, "grad_norm": 2.130277739327962, "learning_rate": 1.5317697587979243e-05, "loss": 0.638, "step": 4469 }, { "epoch": 0.6840091813312931, "grad_norm": 2.312260633732955, "learning_rate": 1.531559867882467e-05, "loss": 0.8274, "step": 4470 }, { "epoch": 0.6841622035195103, "grad_norm": 2.586845946207191, "learning_rate": 1.5313499443218388e-05, "loss": 0.7508, "step": 4471 }, { "epoch": 0.6843152257077276, "grad_norm": 2.378424806595788, "learning_rate": 1.5311399881289322e-05, "loss": 0.7575, "step": 4472 }, { "epoch": 0.684468247895945, "grad_norm": 2.1522295879865427, "learning_rate": 1.5309299993166405e-05, "loss": 0.6589, "step": 4473 }, { "epoch": 0.6846212700841622, "grad_norm": 2.3097812688810784, "learning_rate": 1.530719977897861e-05, "loss": 0.7054, "step": 4474 }, { "epoch": 0.6847742922723795, "grad_norm": 2.3172326880772736, "learning_rate": 1.5305099238854913e-05, "loss": 0.6894, "step": 4475 }, { "epoch": 0.6849273144605967, "grad_norm": 2.1003335405866106, "learning_rate": 1.530299837292432e-05, "loss": 0.7624, "step": 4476 }, { "epoch": 0.6850803366488141, "grad_norm": 2.599385998272937, "learning_rate": 1.5300897181315852e-05, "loss": 0.7463, "step": 4477 }, { "epoch": 0.6852333588370314, "grad_norm": 2.3900101225352395, "learning_rate": 1.5298795664158547e-05, "loss": 0.8077, "step": 4478 }, { "epoch": 0.6853863810252486, "grad_norm": 2.191784966065849, "learning_rate": 1.5296693821581474e-05, "loss": 0.6986, "step": 4479 }, { "epoch": 0.685539403213466, "grad_norm": 2.6280784977891147, "learning_rate": 1.529459165371371e-05, "loss": 0.7687, "step": 4480 }, { "epoch": 0.6856924254016833, "grad_norm": 2.471333458938139, "learning_rate": 1.5292489160684362e-05, "loss": 0.8365, "step": 4481 }, { "epoch": 0.6858454475899005, "grad_norm": 2.3021022470545582, "learning_rate": 1.529038634262255e-05, "loss": 0.8477, "step": 4482 }, { "epoch": 0.6859984697781178, "grad_norm": 2.3246002041267304, "learning_rate": 1.528828319965742e-05, "loss": 0.8146, "step": 4483 }, { "epoch": 0.6861514919663351, "grad_norm": 2.1854019951696757, "learning_rate": 1.5286179731918126e-05, "loss": 0.6378, "step": 4484 }, { "epoch": 0.6863045141545524, "grad_norm": 1.9833151918210372, "learning_rate": 1.5284075939533854e-05, "loss": 0.8069, "step": 4485 }, { "epoch": 0.6864575363427697, "grad_norm": 2.324187336487013, "learning_rate": 1.5281971822633812e-05, "loss": 0.858, "step": 4486 }, { "epoch": 0.686610558530987, "grad_norm": 2.151761922724606, "learning_rate": 1.527986738134721e-05, "loss": 0.63, "step": 4487 }, { "epoch": 0.6867635807192043, "grad_norm": 1.9302796780045488, "learning_rate": 1.5277762615803308e-05, "loss": 0.7935, "step": 4488 }, { "epoch": 0.6869166029074216, "grad_norm": 2.0743268312554854, "learning_rate": 1.527565752613135e-05, "loss": 0.7011, "step": 4489 }, { "epoch": 0.6870696250956388, "grad_norm": 2.5266338890936133, "learning_rate": 1.5273552112460626e-05, "loss": 0.8576, "step": 4490 }, { "epoch": 0.6872226472838562, "grad_norm": 2.0054601689178035, "learning_rate": 1.5271446374920435e-05, "loss": 0.6995, "step": 4491 }, { "epoch": 0.6873756694720734, "grad_norm": 2.208190332410164, "learning_rate": 1.5269340313640104e-05, "loss": 0.6487, "step": 4492 }, { "epoch": 0.6875286916602907, "grad_norm": 2.083733044038154, "learning_rate": 1.526723392874897e-05, "loss": 0.782, "step": 4493 }, { "epoch": 0.6876817138485081, "grad_norm": 2.2792750232808676, "learning_rate": 1.526512722037639e-05, "loss": 0.726, "step": 4494 }, { "epoch": 0.6878347360367253, "grad_norm": 2.089507560978964, "learning_rate": 1.526302018865175e-05, "loss": 0.7222, "step": 4495 }, { "epoch": 0.6879877582249426, "grad_norm": 2.2579331803512193, "learning_rate": 1.526091283370446e-05, "loss": 0.7362, "step": 4496 }, { "epoch": 0.68814078041316, "grad_norm": 2.2197350362031933, "learning_rate": 1.5258805155663924e-05, "loss": 0.7617, "step": 4497 }, { "epoch": 0.6882938026013772, "grad_norm": 2.4317255785428267, "learning_rate": 1.5256697154659593e-05, "loss": 0.7737, "step": 4498 }, { "epoch": 0.6884468247895945, "grad_norm": 2.28504107301684, "learning_rate": 1.5254588830820925e-05, "loss": 0.7777, "step": 4499 }, { "epoch": 0.6885998469778117, "grad_norm": 2.272161786994709, "learning_rate": 1.5252480184277403e-05, "loss": 0.8356, "step": 4500 }, { "epoch": 0.6887528691660291, "grad_norm": 2.335814379960226, "learning_rate": 1.5250371215158523e-05, "loss": 0.841, "step": 4501 }, { "epoch": 0.6889058913542464, "grad_norm": 2.168986813520893, "learning_rate": 1.5248261923593805e-05, "loss": 0.7132, "step": 4502 }, { "epoch": 0.6890589135424636, "grad_norm": 2.1288853465211113, "learning_rate": 1.5246152309712795e-05, "loss": 0.7998, "step": 4503 }, { "epoch": 0.689211935730681, "grad_norm": 2.319619776523877, "learning_rate": 1.5244042373645047e-05, "loss": 0.802, "step": 4504 }, { "epoch": 0.6893649579188983, "grad_norm": 2.2306663348214166, "learning_rate": 1.5241932115520142e-05, "loss": 0.6982, "step": 4505 }, { "epoch": 0.6895179801071155, "grad_norm": 1.9740335208345237, "learning_rate": 1.523982153546768e-05, "loss": 0.6945, "step": 4506 }, { "epoch": 0.6896710022953328, "grad_norm": 2.3212474725289, "learning_rate": 1.5237710633617278e-05, "loss": 0.7454, "step": 4507 }, { "epoch": 0.6898240244835501, "grad_norm": 2.181356444869256, "learning_rate": 1.5235599410098576e-05, "loss": 0.6968, "step": 4508 }, { "epoch": 0.6899770466717674, "grad_norm": 2.4681098588473676, "learning_rate": 1.5233487865041237e-05, "loss": 0.7217, "step": 4509 }, { "epoch": 0.6901300688599847, "grad_norm": 2.2233164003664068, "learning_rate": 1.5231375998574929e-05, "loss": 0.6686, "step": 4510 }, { "epoch": 0.690283091048202, "grad_norm": 2.1293830198002177, "learning_rate": 1.5229263810829355e-05, "loss": 0.7067, "step": 4511 }, { "epoch": 0.6904361132364193, "grad_norm": 2.333776572385844, "learning_rate": 1.5227151301934235e-05, "loss": 0.7336, "step": 4512 }, { "epoch": 0.6905891354246366, "grad_norm": 2.488107023266574, "learning_rate": 1.522503847201931e-05, "loss": 0.8592, "step": 4513 }, { "epoch": 0.6907421576128538, "grad_norm": 2.2582773023106526, "learning_rate": 1.5222925321214326e-05, "loss": 0.6915, "step": 4514 }, { "epoch": 0.6908951798010712, "grad_norm": 2.222243925719928, "learning_rate": 1.5220811849649066e-05, "loss": 0.7331, "step": 4515 }, { "epoch": 0.6910482019892884, "grad_norm": 2.0850055482377776, "learning_rate": 1.521869805745333e-05, "loss": 0.7538, "step": 4516 }, { "epoch": 0.6912012241775057, "grad_norm": 2.221052204851177, "learning_rate": 1.5216583944756927e-05, "loss": 0.6374, "step": 4517 }, { "epoch": 0.6913542463657231, "grad_norm": 2.1796358857455043, "learning_rate": 1.5214469511689698e-05, "loss": 0.6842, "step": 4518 }, { "epoch": 0.6915072685539403, "grad_norm": 2.405770425488477, "learning_rate": 1.5212354758381498e-05, "loss": 0.7566, "step": 4519 }, { "epoch": 0.6916602907421576, "grad_norm": 2.5722939852043254, "learning_rate": 1.5210239684962197e-05, "loss": 0.7809, "step": 4520 }, { "epoch": 0.691813312930375, "grad_norm": 2.465616131274673, "learning_rate": 1.5208124291561702e-05, "loss": 0.78, "step": 4521 }, { "epoch": 0.6919663351185922, "grad_norm": 2.3138649418442356, "learning_rate": 1.5206008578309917e-05, "loss": 0.73, "step": 4522 }, { "epoch": 0.6921193573068095, "grad_norm": 2.3167740039306755, "learning_rate": 1.5203892545336776e-05, "loss": 0.703, "step": 4523 }, { "epoch": 0.6922723794950267, "grad_norm": 2.670744482743038, "learning_rate": 1.5201776192772237e-05, "loss": 0.7674, "step": 4524 }, { "epoch": 0.6924254016832441, "grad_norm": 2.217729480121748, "learning_rate": 1.5199659520746275e-05, "loss": 0.7415, "step": 4525 }, { "epoch": 0.6925784238714614, "grad_norm": 2.001448225210971, "learning_rate": 1.5197542529388878e-05, "loss": 0.6448, "step": 4526 }, { "epoch": 0.6927314460596786, "grad_norm": 2.238292825597769, "learning_rate": 1.5195425218830063e-05, "loss": 0.7611, "step": 4527 }, { "epoch": 0.692884468247896, "grad_norm": 2.7725948227174784, "learning_rate": 1.5193307589199862e-05, "loss": 0.8477, "step": 4528 }, { "epoch": 0.6930374904361133, "grad_norm": 2.110571689962769, "learning_rate": 1.519118964062832e-05, "loss": 0.6644, "step": 4529 }, { "epoch": 0.6931905126243305, "grad_norm": 2.2568105040470257, "learning_rate": 1.5189071373245521e-05, "loss": 0.6753, "step": 4530 }, { "epoch": 0.6933435348125478, "grad_norm": 2.5626809993301594, "learning_rate": 1.5186952787181546e-05, "loss": 0.8011, "step": 4531 }, { "epoch": 0.6934965570007651, "grad_norm": 2.2221418431380155, "learning_rate": 1.5184833882566507e-05, "loss": 0.764, "step": 4532 }, { "epoch": 0.6936495791889824, "grad_norm": 2.4274947741070005, "learning_rate": 1.518271465953054e-05, "loss": 0.7462, "step": 4533 }, { "epoch": 0.6938026013771997, "grad_norm": 2.270795029988415, "learning_rate": 1.5180595118203791e-05, "loss": 0.6212, "step": 4534 }, { "epoch": 0.693955623565417, "grad_norm": 2.3794007844331966, "learning_rate": 1.5178475258716426e-05, "loss": 0.7506, "step": 4535 }, { "epoch": 0.6941086457536343, "grad_norm": 2.404191858909533, "learning_rate": 1.5176355081198638e-05, "loss": 0.7967, "step": 4536 }, { "epoch": 0.6942616679418516, "grad_norm": 2.272161701559529, "learning_rate": 1.5174234585780633e-05, "loss": 0.8067, "step": 4537 }, { "epoch": 0.6944146901300688, "grad_norm": 2.16796211094527, "learning_rate": 1.5172113772592645e-05, "loss": 0.8289, "step": 4538 }, { "epoch": 0.6945677123182862, "grad_norm": 2.7916757266297894, "learning_rate": 1.5169992641764913e-05, "loss": 0.7694, "step": 4539 }, { "epoch": 0.6947207345065034, "grad_norm": 2.2621760884957345, "learning_rate": 1.5167871193427709e-05, "loss": 0.7442, "step": 4540 }, { "epoch": 0.6948737566947207, "grad_norm": 2.2540368894976583, "learning_rate": 1.5165749427711318e-05, "loss": 0.8203, "step": 4541 }, { "epoch": 0.6950267788829381, "grad_norm": 2.2972220573536957, "learning_rate": 1.5163627344746045e-05, "loss": 0.818, "step": 4542 }, { "epoch": 0.6951798010711553, "grad_norm": 2.482793009436285, "learning_rate": 1.516150494466222e-05, "loss": 0.8353, "step": 4543 }, { "epoch": 0.6953328232593726, "grad_norm": 2.3515004139625897, "learning_rate": 1.515938222759018e-05, "loss": 0.778, "step": 4544 }, { "epoch": 0.6954858454475898, "grad_norm": 2.2361132567474615, "learning_rate": 1.5157259193660295e-05, "loss": 0.7126, "step": 4545 }, { "epoch": 0.6956388676358072, "grad_norm": 2.5681665991598286, "learning_rate": 1.5155135843002948e-05, "loss": 0.8872, "step": 4546 }, { "epoch": 0.6957918898240245, "grad_norm": 2.3461733455885727, "learning_rate": 1.5153012175748542e-05, "loss": 0.7425, "step": 4547 }, { "epoch": 0.6959449120122417, "grad_norm": 2.1546909441667124, "learning_rate": 1.5150888192027502e-05, "loss": 0.5938, "step": 4548 }, { "epoch": 0.6960979342004591, "grad_norm": 2.1532119909978937, "learning_rate": 1.5148763891970264e-05, "loss": 0.6435, "step": 4549 }, { "epoch": 0.6962509563886764, "grad_norm": 2.141592558646377, "learning_rate": 1.5146639275707298e-05, "loss": 0.6884, "step": 4550 }, { "epoch": 0.6964039785768936, "grad_norm": 2.5681176533512216, "learning_rate": 1.5144514343369075e-05, "loss": 0.9509, "step": 4551 }, { "epoch": 0.696557000765111, "grad_norm": 2.3532875826075554, "learning_rate": 1.5142389095086103e-05, "loss": 0.7296, "step": 4552 }, { "epoch": 0.6967100229533282, "grad_norm": 2.464874640315048, "learning_rate": 1.5140263530988898e-05, "loss": 0.8494, "step": 4553 }, { "epoch": 0.6968630451415455, "grad_norm": 2.3773010925552773, "learning_rate": 1.5138137651208004e-05, "loss": 0.6996, "step": 4554 }, { "epoch": 0.6970160673297628, "grad_norm": 1.9930665522309956, "learning_rate": 1.5136011455873973e-05, "loss": 0.7253, "step": 4555 }, { "epoch": 0.6971690895179801, "grad_norm": 2.287025093599191, "learning_rate": 1.513388494511739e-05, "loss": 0.6763, "step": 4556 }, { "epoch": 0.6973221117061974, "grad_norm": 2.141087425346586, "learning_rate": 1.5131758119068844e-05, "loss": 0.6891, "step": 4557 }, { "epoch": 0.6974751338944147, "grad_norm": 1.9646395637835101, "learning_rate": 1.512963097785896e-05, "loss": 0.6165, "step": 4558 }, { "epoch": 0.697628156082632, "grad_norm": 2.0897594617619903, "learning_rate": 1.5127503521618371e-05, "loss": 0.5849, "step": 4559 }, { "epoch": 0.6977811782708493, "grad_norm": 2.305818341701876, "learning_rate": 1.5125375750477731e-05, "loss": 0.7084, "step": 4560 }, { "epoch": 0.6979342004590665, "grad_norm": 2.5024862701767345, "learning_rate": 1.512324766456771e-05, "loss": 0.8031, "step": 4561 }, { "epoch": 0.6980872226472838, "grad_norm": 2.4597919538294217, "learning_rate": 1.5121119264019015e-05, "loss": 0.8691, "step": 4562 }, { "epoch": 0.6982402448355012, "grad_norm": 2.2910984541415385, "learning_rate": 1.511899054896235e-05, "loss": 0.6815, "step": 4563 }, { "epoch": 0.6983932670237184, "grad_norm": 2.364384147976584, "learning_rate": 1.5116861519528447e-05, "loss": 0.7718, "step": 4564 }, { "epoch": 0.6985462892119357, "grad_norm": 2.4982226125776226, "learning_rate": 1.5114732175848064e-05, "loss": 0.7911, "step": 4565 }, { "epoch": 0.6986993114001531, "grad_norm": 2.6777365242008986, "learning_rate": 1.5112602518051971e-05, "loss": 0.9079, "step": 4566 }, { "epoch": 0.6988523335883703, "grad_norm": 2.3841456314000538, "learning_rate": 1.5110472546270952e-05, "loss": 0.6749, "step": 4567 }, { "epoch": 0.6990053557765876, "grad_norm": 2.150933316605576, "learning_rate": 1.5108342260635821e-05, "loss": 0.576, "step": 4568 }, { "epoch": 0.6991583779648048, "grad_norm": 2.383057686481416, "learning_rate": 1.5106211661277412e-05, "loss": 0.7526, "step": 4569 }, { "epoch": 0.6993114001530222, "grad_norm": 2.273681917930905, "learning_rate": 1.5104080748326568e-05, "loss": 0.817, "step": 4570 }, { "epoch": 0.6994644223412395, "grad_norm": 2.223542529079718, "learning_rate": 1.510194952191416e-05, "loss": 0.7082, "step": 4571 }, { "epoch": 0.6996174445294567, "grad_norm": 2.3811340188781487, "learning_rate": 1.509981798217107e-05, "loss": 0.7807, "step": 4572 }, { "epoch": 0.6997704667176741, "grad_norm": 2.278119095067911, "learning_rate": 1.5097686129228208e-05, "loss": 0.6676, "step": 4573 }, { "epoch": 0.6999234889058914, "grad_norm": 2.2273074977858, "learning_rate": 1.5095553963216496e-05, "loss": 0.6651, "step": 4574 }, { "epoch": 0.7000765110941086, "grad_norm": 2.483082682130355, "learning_rate": 1.5093421484266885e-05, "loss": 0.6142, "step": 4575 }, { "epoch": 0.700229533282326, "grad_norm": 2.1945222620117533, "learning_rate": 1.5091288692510334e-05, "loss": 0.6586, "step": 4576 }, { "epoch": 0.7003825554705432, "grad_norm": 2.3719793672214244, "learning_rate": 1.5089155588077824e-05, "loss": 0.814, "step": 4577 }, { "epoch": 0.7005355776587605, "grad_norm": 2.4060459274427783, "learning_rate": 1.5087022171100362e-05, "loss": 0.7127, "step": 4578 }, { "epoch": 0.7006885998469778, "grad_norm": 2.309202456137207, "learning_rate": 1.5084888441708972e-05, "loss": 0.6896, "step": 4579 }, { "epoch": 0.7008416220351951, "grad_norm": 2.3124390047154764, "learning_rate": 1.5082754400034687e-05, "loss": 0.8279, "step": 4580 }, { "epoch": 0.7009946442234124, "grad_norm": 2.0904361229422763, "learning_rate": 1.508062004620857e-05, "loss": 0.6954, "step": 4581 }, { "epoch": 0.7011476664116297, "grad_norm": 2.094692589966736, "learning_rate": 1.5078485380361702e-05, "loss": 0.6429, "step": 4582 }, { "epoch": 0.701300688599847, "grad_norm": 2.4664976414160535, "learning_rate": 1.507635040262518e-05, "loss": 0.734, "step": 4583 }, { "epoch": 0.7014537107880643, "grad_norm": 2.230209940959713, "learning_rate": 1.507421511313012e-05, "loss": 0.7579, "step": 4584 }, { "epoch": 0.7016067329762815, "grad_norm": 2.129917064589851, "learning_rate": 1.5072079512007661e-05, "loss": 0.7132, "step": 4585 }, { "epoch": 0.7017597551644988, "grad_norm": 2.6035639966486883, "learning_rate": 1.5069943599388952e-05, "loss": 0.8368, "step": 4586 }, { "epoch": 0.7019127773527162, "grad_norm": 2.0753617962083895, "learning_rate": 1.506780737540518e-05, "loss": 0.6343, "step": 4587 }, { "epoch": 0.7020657995409334, "grad_norm": 2.2775042401770023, "learning_rate": 1.5065670840187527e-05, "loss": 0.7708, "step": 4588 }, { "epoch": 0.7022188217291507, "grad_norm": 2.1526579899134552, "learning_rate": 1.5063533993867209e-05, "loss": 0.6628, "step": 4589 }, { "epoch": 0.7023718439173681, "grad_norm": 2.176150777690883, "learning_rate": 1.5061396836575463e-05, "loss": 0.7387, "step": 4590 }, { "epoch": 0.7025248661055853, "grad_norm": 2.129854293577214, "learning_rate": 1.5059259368443541e-05, "loss": 0.6796, "step": 4591 }, { "epoch": 0.7026778882938026, "grad_norm": 2.207031757536129, "learning_rate": 1.5057121589602703e-05, "loss": 0.6714, "step": 4592 }, { "epoch": 0.7028309104820198, "grad_norm": 2.148029900786362, "learning_rate": 1.5054983500184245e-05, "loss": 0.8089, "step": 4593 }, { "epoch": 0.7029839326702372, "grad_norm": 2.138588131175154, "learning_rate": 1.5052845100319478e-05, "loss": 0.6533, "step": 4594 }, { "epoch": 0.7031369548584545, "grad_norm": 2.350243331629356, "learning_rate": 1.5050706390139724e-05, "loss": 0.7045, "step": 4595 }, { "epoch": 0.7032899770466717, "grad_norm": 2.4869590482805126, "learning_rate": 1.5048567369776334e-05, "loss": 0.8078, "step": 4596 }, { "epoch": 0.7034429992348891, "grad_norm": 2.367666383463255, "learning_rate": 1.5046428039360673e-05, "loss": 0.6646, "step": 4597 }, { "epoch": 0.7035960214231064, "grad_norm": 2.06402576755447, "learning_rate": 1.5044288399024121e-05, "loss": 0.8041, "step": 4598 }, { "epoch": 0.7037490436113236, "grad_norm": 2.15944555184331, "learning_rate": 1.5042148448898086e-05, "loss": 0.6758, "step": 4599 }, { "epoch": 0.703902065799541, "grad_norm": 2.5051434873442933, "learning_rate": 1.504000818911399e-05, "loss": 0.8164, "step": 4600 }, { "epoch": 0.7040550879877582, "grad_norm": 2.3479880611298487, "learning_rate": 1.5037867619803276e-05, "loss": 0.8284, "step": 4601 }, { "epoch": 0.7042081101759755, "grad_norm": 2.1908232486121, "learning_rate": 1.5035726741097398e-05, "loss": 0.7382, "step": 4602 }, { "epoch": 0.7043611323641928, "grad_norm": 2.3218035445254226, "learning_rate": 1.5033585553127848e-05, "loss": 0.6095, "step": 4603 }, { "epoch": 0.7045141545524101, "grad_norm": 2.2092916242716907, "learning_rate": 1.5031444056026114e-05, "loss": 0.8147, "step": 4604 }, { "epoch": 0.7046671767406274, "grad_norm": 2.217187498878475, "learning_rate": 1.5029302249923716e-05, "loss": 0.7882, "step": 4605 }, { "epoch": 0.7048201989288447, "grad_norm": 2.2389718404752834, "learning_rate": 1.5027160134952197e-05, "loss": 0.6402, "step": 4606 }, { "epoch": 0.704973221117062, "grad_norm": 2.4569438697426893, "learning_rate": 1.5025017711243104e-05, "loss": 0.8244, "step": 4607 }, { "epoch": 0.7051262433052793, "grad_norm": 2.1755220170600422, "learning_rate": 1.5022874978928015e-05, "loss": 0.6957, "step": 4608 }, { "epoch": 0.7052792654934965, "grad_norm": 2.1244164995728982, "learning_rate": 1.5020731938138523e-05, "loss": 0.6932, "step": 4609 }, { "epoch": 0.7054322876817138, "grad_norm": 2.2433322410932757, "learning_rate": 1.5018588589006247e-05, "loss": 0.7543, "step": 4610 }, { "epoch": 0.7055853098699312, "grad_norm": 2.118332433021445, "learning_rate": 1.501644493166281e-05, "loss": 0.8065, "step": 4611 }, { "epoch": 0.7057383320581484, "grad_norm": 2.1458348809316914, "learning_rate": 1.5014300966239863e-05, "loss": 0.741, "step": 4612 }, { "epoch": 0.7058913542463657, "grad_norm": 2.2114252595349178, "learning_rate": 1.501215669286908e-05, "loss": 0.7222, "step": 4613 }, { "epoch": 0.7060443764345831, "grad_norm": 2.114569239497734, "learning_rate": 1.5010012111682146e-05, "loss": 0.6805, "step": 4614 }, { "epoch": 0.7061973986228003, "grad_norm": 2.065733198288359, "learning_rate": 1.5007867222810769e-05, "loss": 0.6061, "step": 4615 }, { "epoch": 0.7063504208110176, "grad_norm": 2.214125841483104, "learning_rate": 1.5005722026386675e-05, "loss": 0.6689, "step": 4616 }, { "epoch": 0.7065034429992348, "grad_norm": 2.2882545330622057, "learning_rate": 1.5003576522541609e-05, "loss": 0.6518, "step": 4617 }, { "epoch": 0.7066564651874522, "grad_norm": 2.3663303914915375, "learning_rate": 1.5001430711407335e-05, "loss": 0.6424, "step": 4618 }, { "epoch": 0.7068094873756695, "grad_norm": 2.431556481342764, "learning_rate": 1.4999284593115633e-05, "loss": 0.8046, "step": 4619 }, { "epoch": 0.7069625095638867, "grad_norm": 2.389676632888257, "learning_rate": 1.499713816779831e-05, "loss": 0.7433, "step": 4620 }, { "epoch": 0.7071155317521041, "grad_norm": 2.2577603438639184, "learning_rate": 1.4994991435587181e-05, "loss": 0.7294, "step": 4621 }, { "epoch": 0.7072685539403214, "grad_norm": 2.309661103988358, "learning_rate": 1.499284439661409e-05, "loss": 0.769, "step": 4622 }, { "epoch": 0.7074215761285386, "grad_norm": 2.117249156808084, "learning_rate": 1.499069705101089e-05, "loss": 0.7223, "step": 4623 }, { "epoch": 0.707574598316756, "grad_norm": 2.2679542217853284, "learning_rate": 1.4988549398909461e-05, "loss": 0.7463, "step": 4624 }, { "epoch": 0.7077276205049732, "grad_norm": 2.4233667275174873, "learning_rate": 1.4986401440441698e-05, "loss": 0.6929, "step": 4625 }, { "epoch": 0.7078806426931905, "grad_norm": 2.4387312401102728, "learning_rate": 1.4984253175739516e-05, "loss": 0.7185, "step": 4626 }, { "epoch": 0.7080336648814078, "grad_norm": 2.1516389780097813, "learning_rate": 1.4982104604934847e-05, "loss": 0.6914, "step": 4627 }, { "epoch": 0.7081866870696251, "grad_norm": 2.1911271817443976, "learning_rate": 1.4979955728159648e-05, "loss": 0.7076, "step": 4628 }, { "epoch": 0.7083397092578424, "grad_norm": 2.2764653082340605, "learning_rate": 1.4977806545545882e-05, "loss": 0.7879, "step": 4629 }, { "epoch": 0.7084927314460597, "grad_norm": 2.258450187406121, "learning_rate": 1.4975657057225541e-05, "loss": 0.8093, "step": 4630 }, { "epoch": 0.708645753634277, "grad_norm": 2.102515060129002, "learning_rate": 1.497350726333064e-05, "loss": 0.6645, "step": 4631 }, { "epoch": 0.7087987758224943, "grad_norm": 2.3893729812773725, "learning_rate": 1.4971357163993201e-05, "loss": 0.7671, "step": 4632 }, { "epoch": 0.7089517980107115, "grad_norm": 1.9505505804431376, "learning_rate": 1.4969206759345268e-05, "loss": 0.6499, "step": 4633 }, { "epoch": 0.7091048201989288, "grad_norm": 2.2384013451963143, "learning_rate": 1.4967056049518909e-05, "loss": 0.7929, "step": 4634 }, { "epoch": 0.7092578423871462, "grad_norm": 2.4005614517986893, "learning_rate": 1.4964905034646207e-05, "loss": 0.7999, "step": 4635 }, { "epoch": 0.7094108645753634, "grad_norm": 2.0678028281202065, "learning_rate": 1.4962753714859265e-05, "loss": 0.7242, "step": 4636 }, { "epoch": 0.7095638867635807, "grad_norm": 2.462845353036398, "learning_rate": 1.4960602090290201e-05, "loss": 0.8538, "step": 4637 }, { "epoch": 0.7097169089517981, "grad_norm": 2.3117301226127798, "learning_rate": 1.495845016107116e-05, "loss": 0.7825, "step": 4638 }, { "epoch": 0.7098699311400153, "grad_norm": 2.2646133688524297, "learning_rate": 1.4956297927334293e-05, "loss": 0.7812, "step": 4639 }, { "epoch": 0.7100229533282326, "grad_norm": 2.2227803644605286, "learning_rate": 1.4954145389211783e-05, "loss": 0.6609, "step": 4640 }, { "epoch": 0.7101759755164498, "grad_norm": 2.282965177589414, "learning_rate": 1.4951992546835822e-05, "loss": 0.7945, "step": 4641 }, { "epoch": 0.7103289977046672, "grad_norm": 2.4988127974075436, "learning_rate": 1.494983940033863e-05, "loss": 0.7835, "step": 4642 }, { "epoch": 0.7104820198928845, "grad_norm": 2.0580200836857627, "learning_rate": 1.4947685949852433e-05, "loss": 0.7276, "step": 4643 }, { "epoch": 0.7106350420811017, "grad_norm": 2.115513884581086, "learning_rate": 1.4945532195509489e-05, "loss": 0.8458, "step": 4644 }, { "epoch": 0.7107880642693191, "grad_norm": 2.1312885230885694, "learning_rate": 1.4943378137442066e-05, "loss": 0.6838, "step": 4645 }, { "epoch": 0.7109410864575364, "grad_norm": 2.0192826011804788, "learning_rate": 1.494122377578245e-05, "loss": 0.6525, "step": 4646 }, { "epoch": 0.7110941086457536, "grad_norm": 2.14333559550761, "learning_rate": 1.4939069110662953e-05, "loss": 0.7441, "step": 4647 }, { "epoch": 0.711247130833971, "grad_norm": 2.735799505033225, "learning_rate": 1.4936914142215901e-05, "loss": 0.8995, "step": 4648 }, { "epoch": 0.7114001530221882, "grad_norm": 2.3028298562489193, "learning_rate": 1.4934758870573638e-05, "loss": 0.8153, "step": 4649 }, { "epoch": 0.7115531752104055, "grad_norm": 2.0552740637304434, "learning_rate": 1.4932603295868529e-05, "loss": 0.7292, "step": 4650 }, { "epoch": 0.7117061973986228, "grad_norm": 2.0362932635684676, "learning_rate": 1.4930447418232954e-05, "loss": 0.7156, "step": 4651 }, { "epoch": 0.7118592195868401, "grad_norm": 2.3297210965014323, "learning_rate": 1.4928291237799316e-05, "loss": 0.7321, "step": 4652 }, { "epoch": 0.7120122417750574, "grad_norm": 2.3448837720146587, "learning_rate": 1.4926134754700033e-05, "loss": 0.7765, "step": 4653 }, { "epoch": 0.7121652639632746, "grad_norm": 2.090763497741587, "learning_rate": 1.4923977969067542e-05, "loss": 0.7825, "step": 4654 }, { "epoch": 0.712318286151492, "grad_norm": 2.1568103828538137, "learning_rate": 1.4921820881034303e-05, "loss": 0.6317, "step": 4655 }, { "epoch": 0.7124713083397093, "grad_norm": 2.1517165425688307, "learning_rate": 1.4919663490732788e-05, "loss": 0.7669, "step": 4656 }, { "epoch": 0.7126243305279265, "grad_norm": 2.348912710534796, "learning_rate": 1.4917505798295496e-05, "loss": 0.6615, "step": 4657 }, { "epoch": 0.7127773527161438, "grad_norm": 2.3188303433517197, "learning_rate": 1.491534780385493e-05, "loss": 0.6957, "step": 4658 }, { "epoch": 0.7129303749043612, "grad_norm": 2.31676216581471, "learning_rate": 1.4913189507543629e-05, "loss": 0.7658, "step": 4659 }, { "epoch": 0.7130833970925784, "grad_norm": 2.3905665163661616, "learning_rate": 1.491103090949414e-05, "loss": 0.8188, "step": 4660 }, { "epoch": 0.7132364192807957, "grad_norm": 2.331333999855849, "learning_rate": 1.4908872009839032e-05, "loss": 0.7859, "step": 4661 }, { "epoch": 0.713389441469013, "grad_norm": 2.383095717161027, "learning_rate": 1.4906712808710887e-05, "loss": 0.7431, "step": 4662 }, { "epoch": 0.7135424636572303, "grad_norm": 2.225256917614341, "learning_rate": 1.4904553306242315e-05, "loss": 0.7684, "step": 4663 }, { "epoch": 0.7136954858454476, "grad_norm": 2.313231003189659, "learning_rate": 1.4902393502565938e-05, "loss": 0.7094, "step": 4664 }, { "epoch": 0.7138485080336648, "grad_norm": 2.602060352447604, "learning_rate": 1.49002333978144e-05, "loss": 0.7902, "step": 4665 }, { "epoch": 0.7140015302218822, "grad_norm": 2.279485041471543, "learning_rate": 1.4898072992120354e-05, "loss": 0.6993, "step": 4666 }, { "epoch": 0.7141545524100995, "grad_norm": 2.3635218759370136, "learning_rate": 1.4895912285616488e-05, "loss": 0.6515, "step": 4667 }, { "epoch": 0.7143075745983167, "grad_norm": 2.096166872095998, "learning_rate": 1.489375127843549e-05, "loss": 0.6639, "step": 4668 }, { "epoch": 0.7144605967865341, "grad_norm": 2.3381009761630818, "learning_rate": 1.489158997071009e-05, "loss": 0.7515, "step": 4669 }, { "epoch": 0.7146136189747513, "grad_norm": 2.195710905149429, "learning_rate": 1.488942836257301e-05, "loss": 0.6466, "step": 4670 }, { "epoch": 0.7147666411629686, "grad_norm": 2.1753748778774877, "learning_rate": 1.4887266454157005e-05, "loss": 0.6547, "step": 4671 }, { "epoch": 0.714919663351186, "grad_norm": 2.122937116378295, "learning_rate": 1.488510424559485e-05, "loss": 0.7748, "step": 4672 }, { "epoch": 0.7150726855394032, "grad_norm": 2.406407272988174, "learning_rate": 1.4882941737019334e-05, "loss": 0.8305, "step": 4673 }, { "epoch": 0.7152257077276205, "grad_norm": 2.2145862249870514, "learning_rate": 1.488077892856326e-05, "loss": 0.6078, "step": 4674 }, { "epoch": 0.7153787299158378, "grad_norm": 2.1791170205115225, "learning_rate": 1.487861582035946e-05, "loss": 0.6832, "step": 4675 }, { "epoch": 0.7155317521040551, "grad_norm": 2.21390173825772, "learning_rate": 1.4876452412540778e-05, "loss": 0.7003, "step": 4676 }, { "epoch": 0.7156847742922724, "grad_norm": 2.276605772799139, "learning_rate": 1.4874288705240077e-05, "loss": 0.739, "step": 4677 }, { "epoch": 0.7158377964804896, "grad_norm": 2.14951981367647, "learning_rate": 1.4872124698590239e-05, "loss": 0.6115, "step": 4678 }, { "epoch": 0.715990818668707, "grad_norm": 2.3320357024227785, "learning_rate": 1.4869960392724162e-05, "loss": 0.7127, "step": 4679 }, { "epoch": 0.7161438408569243, "grad_norm": 2.3126591024809047, "learning_rate": 1.4867795787774766e-05, "loss": 0.7032, "step": 4680 }, { "epoch": 0.7162968630451415, "grad_norm": 2.4048082026718247, "learning_rate": 1.4865630883874983e-05, "loss": 0.7534, "step": 4681 }, { "epoch": 0.7164498852333588, "grad_norm": 2.451316391581056, "learning_rate": 1.486346568115778e-05, "loss": 0.7529, "step": 4682 }, { "epoch": 0.7166029074215762, "grad_norm": 2.053071504108291, "learning_rate": 1.4861300179756122e-05, "loss": 0.6541, "step": 4683 }, { "epoch": 0.7167559296097934, "grad_norm": 2.3417044813298715, "learning_rate": 1.4859134379803e-05, "loss": 0.7603, "step": 4684 }, { "epoch": 0.7169089517980107, "grad_norm": 2.3473788742841015, "learning_rate": 1.4856968281431428e-05, "loss": 0.6605, "step": 4685 }, { "epoch": 0.717061973986228, "grad_norm": 2.2490942525781707, "learning_rate": 1.4854801884774435e-05, "loss": 0.6712, "step": 4686 }, { "epoch": 0.7172149961744453, "grad_norm": 2.231128453832111, "learning_rate": 1.4852635189965063e-05, "loss": 0.6849, "step": 4687 }, { "epoch": 0.7173680183626626, "grad_norm": 2.0474238341958215, "learning_rate": 1.485046819713638e-05, "loss": 0.7608, "step": 4688 }, { "epoch": 0.7175210405508798, "grad_norm": 2.178205755401646, "learning_rate": 1.4848300906421473e-05, "loss": 0.8481, "step": 4689 }, { "epoch": 0.7176740627390972, "grad_norm": 2.205174854802361, "learning_rate": 1.4846133317953441e-05, "loss": 0.7059, "step": 4690 }, { "epoch": 0.7178270849273145, "grad_norm": 2.3020851967727944, "learning_rate": 1.4843965431865401e-05, "loss": 0.7218, "step": 4691 }, { "epoch": 0.7179801071155317, "grad_norm": 2.4590759700573046, "learning_rate": 1.4841797248290494e-05, "loss": 0.8537, "step": 4692 }, { "epoch": 0.7181331293037491, "grad_norm": 2.1277534202106225, "learning_rate": 1.483962876736188e-05, "loss": 0.6644, "step": 4693 }, { "epoch": 0.7182861514919663, "grad_norm": 2.3080061593741434, "learning_rate": 1.4837459989212728e-05, "loss": 0.6971, "step": 4694 }, { "epoch": 0.7184391736801836, "grad_norm": 2.014506867651919, "learning_rate": 1.4835290913976237e-05, "loss": 0.7153, "step": 4695 }, { "epoch": 0.718592195868401, "grad_norm": 2.3493421964319565, "learning_rate": 1.4833121541785612e-05, "loss": 0.8454, "step": 4696 }, { "epoch": 0.7187452180566182, "grad_norm": 2.5341913901501676, "learning_rate": 1.4830951872774084e-05, "loss": 0.8979, "step": 4697 }, { "epoch": 0.7188982402448355, "grad_norm": 1.9746285755318667, "learning_rate": 1.4828781907074907e-05, "loss": 0.6924, "step": 4698 }, { "epoch": 0.7190512624330528, "grad_norm": 2.3123766427327763, "learning_rate": 1.4826611644821342e-05, "loss": 0.7356, "step": 4699 }, { "epoch": 0.7192042846212701, "grad_norm": 2.079211608687565, "learning_rate": 1.4824441086146673e-05, "loss": 0.6949, "step": 4700 }, { "epoch": 0.7193573068094874, "grad_norm": 2.0936906022949326, "learning_rate": 1.4822270231184202e-05, "loss": 0.6237, "step": 4701 }, { "epoch": 0.7195103289977046, "grad_norm": 2.2139872126331377, "learning_rate": 1.4820099080067256e-05, "loss": 0.8631, "step": 4702 }, { "epoch": 0.719663351185922, "grad_norm": 2.8509554055876634, "learning_rate": 1.4817927632929166e-05, "loss": 0.865, "step": 4703 }, { "epoch": 0.7198163733741393, "grad_norm": 2.203551816183465, "learning_rate": 1.4815755889903292e-05, "loss": 0.8114, "step": 4704 }, { "epoch": 0.7199693955623565, "grad_norm": 2.2383720960632463, "learning_rate": 1.481358385112301e-05, "loss": 0.7976, "step": 4705 }, { "epoch": 0.7201224177505738, "grad_norm": 2.066559424344547, "learning_rate": 1.4811411516721713e-05, "loss": 0.7183, "step": 4706 }, { "epoch": 0.7202754399387912, "grad_norm": 2.104497394187465, "learning_rate": 1.480923888683281e-05, "loss": 0.7099, "step": 4707 }, { "epoch": 0.7204284621270084, "grad_norm": 2.111770796173033, "learning_rate": 1.4807065961589737e-05, "loss": 0.6569, "step": 4708 }, { "epoch": 0.7205814843152257, "grad_norm": 2.147742730612102, "learning_rate": 1.4804892741125934e-05, "loss": 0.7724, "step": 4709 }, { "epoch": 0.720734506503443, "grad_norm": 2.246631746401689, "learning_rate": 1.4802719225574876e-05, "loss": 0.8219, "step": 4710 }, { "epoch": 0.7208875286916603, "grad_norm": 2.13309088287406, "learning_rate": 1.4800545415070037e-05, "loss": 0.6694, "step": 4711 }, { "epoch": 0.7210405508798776, "grad_norm": 2.0313301699605866, "learning_rate": 1.4798371309744925e-05, "loss": 0.6445, "step": 4712 }, { "epoch": 0.7211935730680948, "grad_norm": 2.530711557247044, "learning_rate": 1.4796196909733063e-05, "loss": 0.7704, "step": 4713 }, { "epoch": 0.7213465952563122, "grad_norm": 2.47685892445677, "learning_rate": 1.4794022215167983e-05, "loss": 0.8267, "step": 4714 }, { "epoch": 0.7214996174445295, "grad_norm": 2.0937083712741194, "learning_rate": 1.4791847226183245e-05, "loss": 0.6132, "step": 4715 }, { "epoch": 0.7216526396327467, "grad_norm": 2.189520413249951, "learning_rate": 1.4789671942912424e-05, "loss": 0.6962, "step": 4716 }, { "epoch": 0.7218056618209641, "grad_norm": 2.39097362182118, "learning_rate": 1.4787496365489112e-05, "loss": 0.6877, "step": 4717 }, { "epoch": 0.7219586840091813, "grad_norm": 2.3351690545072827, "learning_rate": 1.4785320494046917e-05, "loss": 0.7894, "step": 4718 }, { "epoch": 0.7221117061973986, "grad_norm": 2.061697603752519, "learning_rate": 1.4783144328719471e-05, "loss": 0.6861, "step": 4719 }, { "epoch": 0.722264728385616, "grad_norm": 2.2225612629930938, "learning_rate": 1.4780967869640424e-05, "loss": 0.6203, "step": 4720 }, { "epoch": 0.7224177505738332, "grad_norm": 2.573723966712967, "learning_rate": 1.4778791116943432e-05, "loss": 0.7715, "step": 4721 }, { "epoch": 0.7225707727620505, "grad_norm": 2.594210095762241, "learning_rate": 1.4776614070762183e-05, "loss": 0.6993, "step": 4722 }, { "epoch": 0.7227237949502678, "grad_norm": 2.371055805718302, "learning_rate": 1.477443673123038e-05, "loss": 0.8131, "step": 4723 }, { "epoch": 0.7228768171384851, "grad_norm": 2.096628347839013, "learning_rate": 1.4772259098481741e-05, "loss": 0.7364, "step": 4724 }, { "epoch": 0.7230298393267024, "grad_norm": 2.3398331654499103, "learning_rate": 1.4770081172649995e-05, "loss": 0.8091, "step": 4725 }, { "epoch": 0.7231828615149196, "grad_norm": 2.1132524686931795, "learning_rate": 1.4767902953868908e-05, "loss": 0.8064, "step": 4726 }, { "epoch": 0.723335883703137, "grad_norm": 2.0520566030415286, "learning_rate": 1.4765724442272252e-05, "loss": 0.6772, "step": 4727 }, { "epoch": 0.7234889058913543, "grad_norm": 2.30000431071158, "learning_rate": 1.4763545637993808e-05, "loss": 0.7558, "step": 4728 }, { "epoch": 0.7236419280795715, "grad_norm": 2.632293513300626, "learning_rate": 1.4761366541167394e-05, "loss": 0.746, "step": 4729 }, { "epoch": 0.7237949502677888, "grad_norm": 2.379979644133256, "learning_rate": 1.4759187151926833e-05, "loss": 0.8553, "step": 4730 }, { "epoch": 0.7239479724560062, "grad_norm": 2.2346609604101344, "learning_rate": 1.4757007470405973e-05, "loss": 0.6299, "step": 4731 }, { "epoch": 0.7241009946442234, "grad_norm": 2.27430307026169, "learning_rate": 1.4754827496738672e-05, "loss": 0.6518, "step": 4732 }, { "epoch": 0.7242540168324407, "grad_norm": 2.1413328730472427, "learning_rate": 1.4752647231058816e-05, "loss": 0.6566, "step": 4733 }, { "epoch": 0.724407039020658, "grad_norm": 2.3859874983347202, "learning_rate": 1.47504666735003e-05, "loss": 0.7186, "step": 4734 }, { "epoch": 0.7245600612088753, "grad_norm": 2.3047056874773912, "learning_rate": 1.4748285824197042e-05, "loss": 0.866, "step": 4735 }, { "epoch": 0.7247130833970926, "grad_norm": 2.1601134195962137, "learning_rate": 1.4746104683282978e-05, "loss": 0.6606, "step": 4736 }, { "epoch": 0.7248661055853098, "grad_norm": 2.416663587369718, "learning_rate": 1.4743923250892056e-05, "loss": 0.8479, "step": 4737 }, { "epoch": 0.7250191277735272, "grad_norm": 2.4091001521362276, "learning_rate": 1.4741741527158246e-05, "loss": 0.7452, "step": 4738 }, { "epoch": 0.7251721499617445, "grad_norm": 2.1203901701257464, "learning_rate": 1.4739559512215546e-05, "loss": 0.6542, "step": 4739 }, { "epoch": 0.7253251721499617, "grad_norm": 2.132147928958173, "learning_rate": 1.4737377206197951e-05, "loss": 0.7092, "step": 4740 }, { "epoch": 0.7254781943381791, "grad_norm": 2.208144698192683, "learning_rate": 1.4735194609239487e-05, "loss": 0.7039, "step": 4741 }, { "epoch": 0.7256312165263963, "grad_norm": 2.505747937898946, "learning_rate": 1.4733011721474202e-05, "loss": 0.8391, "step": 4742 }, { "epoch": 0.7257842387146136, "grad_norm": 2.04294489654917, "learning_rate": 1.4730828543036151e-05, "loss": 0.6893, "step": 4743 }, { "epoch": 0.725937260902831, "grad_norm": 2.2859908932549153, "learning_rate": 1.4728645074059409e-05, "loss": 0.7034, "step": 4744 }, { "epoch": 0.7260902830910482, "grad_norm": 2.0581272816435408, "learning_rate": 1.4726461314678075e-05, "loss": 0.6576, "step": 4745 }, { "epoch": 0.7262433052792655, "grad_norm": 2.1795007417010406, "learning_rate": 1.4724277265026263e-05, "loss": 0.7988, "step": 4746 }, { "epoch": 0.7263963274674828, "grad_norm": 2.2960157182927365, "learning_rate": 1.4722092925238106e-05, "loss": 0.7517, "step": 4747 }, { "epoch": 0.7265493496557001, "grad_norm": 2.623740200196195, "learning_rate": 1.4719908295447745e-05, "loss": 0.8784, "step": 4748 }, { "epoch": 0.7267023718439174, "grad_norm": 2.360442706550242, "learning_rate": 1.4717723375789353e-05, "loss": 0.8329, "step": 4749 }, { "epoch": 0.7268553940321346, "grad_norm": 2.4744425533825245, "learning_rate": 1.4715538166397109e-05, "loss": 0.6913, "step": 4750 }, { "epoch": 0.727008416220352, "grad_norm": 2.2088002606216492, "learning_rate": 1.4713352667405222e-05, "loss": 0.6413, "step": 4751 }, { "epoch": 0.7271614384085693, "grad_norm": 2.1576929521842128, "learning_rate": 1.4711166878947911e-05, "loss": 0.7331, "step": 4752 }, { "epoch": 0.7273144605967865, "grad_norm": 2.1404435433709343, "learning_rate": 1.470898080115941e-05, "loss": 0.7478, "step": 4753 }, { "epoch": 0.7274674827850038, "grad_norm": 2.0884135256005627, "learning_rate": 1.4706794434173974e-05, "loss": 0.6151, "step": 4754 }, { "epoch": 0.7276205049732212, "grad_norm": 2.296005837056293, "learning_rate": 1.470460777812588e-05, "loss": 0.7234, "step": 4755 }, { "epoch": 0.7277735271614384, "grad_norm": 2.2279102958195995, "learning_rate": 1.470242083314942e-05, "loss": 0.7359, "step": 4756 }, { "epoch": 0.7279265493496557, "grad_norm": 2.4351625984189536, "learning_rate": 1.47002335993789e-05, "loss": 0.7718, "step": 4757 }, { "epoch": 0.728079571537873, "grad_norm": 2.4439542483712335, "learning_rate": 1.4698046076948647e-05, "loss": 0.7665, "step": 4758 }, { "epoch": 0.7282325937260903, "grad_norm": 2.157585434584162, "learning_rate": 1.4695858265993005e-05, "loss": 0.7102, "step": 4759 }, { "epoch": 0.7283856159143076, "grad_norm": 2.38408004828983, "learning_rate": 1.4693670166646337e-05, "loss": 0.7396, "step": 4760 }, { "epoch": 0.7285386381025248, "grad_norm": 2.2861670459333525, "learning_rate": 1.4691481779043022e-05, "loss": 0.7067, "step": 4761 }, { "epoch": 0.7286916602907422, "grad_norm": 2.4361904893888395, "learning_rate": 1.4689293103317456e-05, "loss": 0.7469, "step": 4762 }, { "epoch": 0.7288446824789594, "grad_norm": 2.031659778568357, "learning_rate": 1.4687104139604058e-05, "loss": 0.6313, "step": 4763 }, { "epoch": 0.7289977046671767, "grad_norm": 2.0556376938872942, "learning_rate": 1.468491488803726e-05, "loss": 0.8087, "step": 4764 }, { "epoch": 0.7291507268553941, "grad_norm": 2.565523844865347, "learning_rate": 1.468272534875151e-05, "loss": 0.8579, "step": 4765 }, { "epoch": 0.7293037490436113, "grad_norm": 2.363770120524518, "learning_rate": 1.4680535521881277e-05, "loss": 0.7409, "step": 4766 }, { "epoch": 0.7294567712318286, "grad_norm": 2.318947990020625, "learning_rate": 1.4678345407561046e-05, "loss": 0.7089, "step": 4767 }, { "epoch": 0.729609793420046, "grad_norm": 2.1665273836897954, "learning_rate": 1.4676155005925323e-05, "loss": 0.6989, "step": 4768 }, { "epoch": 0.7297628156082632, "grad_norm": 2.282864821091944, "learning_rate": 1.4673964317108627e-05, "loss": 0.6375, "step": 4769 }, { "epoch": 0.7299158377964805, "grad_norm": 2.1550424071289473, "learning_rate": 1.4671773341245499e-05, "loss": 0.7207, "step": 4770 }, { "epoch": 0.7300688599846977, "grad_norm": 2.1784007795790505, "learning_rate": 1.4669582078470494e-05, "loss": 0.6942, "step": 4771 }, { "epoch": 0.7302218821729151, "grad_norm": 2.2050045587066025, "learning_rate": 1.4667390528918186e-05, "loss": 0.7414, "step": 4772 }, { "epoch": 0.7303749043611324, "grad_norm": 2.107735164826548, "learning_rate": 1.4665198692723166e-05, "loss": 0.7075, "step": 4773 }, { "epoch": 0.7305279265493496, "grad_norm": 2.145877315751559, "learning_rate": 1.4663006570020044e-05, "loss": 0.7179, "step": 4774 }, { "epoch": 0.730680948737567, "grad_norm": 2.282915199457753, "learning_rate": 1.4660814160943448e-05, "loss": 0.7671, "step": 4775 }, { "epoch": 0.7308339709257843, "grad_norm": 2.0724299524788115, "learning_rate": 1.465862146562802e-05, "loss": 0.6496, "step": 4776 }, { "epoch": 0.7309869931140015, "grad_norm": 2.36617080210269, "learning_rate": 1.4656428484208423e-05, "loss": 0.7133, "step": 4777 }, { "epoch": 0.7311400153022188, "grad_norm": 2.3154604337397346, "learning_rate": 1.4654235216819337e-05, "loss": 0.6548, "step": 4778 }, { "epoch": 0.7312930374904361, "grad_norm": 2.284140303142969, "learning_rate": 1.4652041663595457e-05, "loss": 0.6584, "step": 4779 }, { "epoch": 0.7314460596786534, "grad_norm": 2.549272830669625, "learning_rate": 1.4649847824671503e-05, "loss": 0.7738, "step": 4780 }, { "epoch": 0.7315990818668707, "grad_norm": 2.3716627589861474, "learning_rate": 1.4647653700182203e-05, "loss": 0.6948, "step": 4781 }, { "epoch": 0.731752104055088, "grad_norm": 2.254070886426423, "learning_rate": 1.4645459290262304e-05, "loss": 0.7778, "step": 4782 }, { "epoch": 0.7319051262433053, "grad_norm": 2.4077618806749204, "learning_rate": 1.464326459504658e-05, "loss": 0.7467, "step": 4783 }, { "epoch": 0.7320581484315226, "grad_norm": 2.4854494866694794, "learning_rate": 1.4641069614669814e-05, "loss": 0.799, "step": 4784 }, { "epoch": 0.7322111706197398, "grad_norm": 2.097663474137104, "learning_rate": 1.4638874349266806e-05, "loss": 0.7088, "step": 4785 }, { "epoch": 0.7323641928079572, "grad_norm": 2.2978940941367743, "learning_rate": 1.4636678798972374e-05, "loss": 0.6974, "step": 4786 }, { "epoch": 0.7325172149961744, "grad_norm": 2.2559176492956676, "learning_rate": 1.4634482963921362e-05, "loss": 0.7634, "step": 4787 }, { "epoch": 0.7326702371843917, "grad_norm": 2.29409352342819, "learning_rate": 1.4632286844248618e-05, "loss": 0.677, "step": 4788 }, { "epoch": 0.7328232593726091, "grad_norm": 2.186287939404946, "learning_rate": 1.463009044008902e-05, "loss": 0.7605, "step": 4789 }, { "epoch": 0.7329762815608263, "grad_norm": 2.487897472414095, "learning_rate": 1.4627893751577454e-05, "loss": 0.7555, "step": 4790 }, { "epoch": 0.7331293037490436, "grad_norm": 2.1811604114846115, "learning_rate": 1.4625696778848826e-05, "loss": 0.7005, "step": 4791 }, { "epoch": 0.733282325937261, "grad_norm": 2.344426014732196, "learning_rate": 1.4623499522038064e-05, "loss": 0.7748, "step": 4792 }, { "epoch": 0.7334353481254782, "grad_norm": 2.344890853831977, "learning_rate": 1.4621301981280112e-05, "loss": 0.8042, "step": 4793 }, { "epoch": 0.7335883703136955, "grad_norm": 1.9922549418646183, "learning_rate": 1.4619104156709924e-05, "loss": 0.6517, "step": 4794 }, { "epoch": 0.7337413925019127, "grad_norm": 2.0746510135183365, "learning_rate": 1.461690604846248e-05, "loss": 0.5896, "step": 4795 }, { "epoch": 0.7338944146901301, "grad_norm": 2.168429526773284, "learning_rate": 1.4614707656672775e-05, "loss": 0.7213, "step": 4796 }, { "epoch": 0.7340474368783474, "grad_norm": 2.317615047962928, "learning_rate": 1.4612508981475819e-05, "loss": 0.7598, "step": 4797 }, { "epoch": 0.7342004590665646, "grad_norm": 2.000689298049806, "learning_rate": 1.4610310023006643e-05, "loss": 0.67, "step": 4798 }, { "epoch": 0.734353481254782, "grad_norm": 2.340228400641039, "learning_rate": 1.4608110781400293e-05, "loss": 0.7711, "step": 4799 }, { "epoch": 0.7345065034429993, "grad_norm": 2.044964571461997, "learning_rate": 1.4605911256791831e-05, "loss": 0.6814, "step": 4800 }, { "epoch": 0.7346595256312165, "grad_norm": 2.3241346893015478, "learning_rate": 1.4603711449316342e-05, "loss": 0.7522, "step": 4801 }, { "epoch": 0.7348125478194338, "grad_norm": 2.3624868394174325, "learning_rate": 1.4601511359108924e-05, "loss": 0.7272, "step": 4802 }, { "epoch": 0.7349655700076511, "grad_norm": 2.475081517037485, "learning_rate": 1.4599310986304691e-05, "loss": 0.735, "step": 4803 }, { "epoch": 0.7351185921958684, "grad_norm": 2.2359477222481865, "learning_rate": 1.4597110331038775e-05, "loss": 0.6041, "step": 4804 }, { "epoch": 0.7352716143840857, "grad_norm": 2.0780513139500374, "learning_rate": 1.4594909393446334e-05, "loss": 0.7102, "step": 4805 }, { "epoch": 0.735424636572303, "grad_norm": 2.0920809638081943, "learning_rate": 1.459270817366253e-05, "loss": 0.7028, "step": 4806 }, { "epoch": 0.7355776587605203, "grad_norm": 2.222222916454537, "learning_rate": 1.459050667182255e-05, "loss": 0.5553, "step": 4807 }, { "epoch": 0.7357306809487376, "grad_norm": 2.2204854583000886, "learning_rate": 1.4588304888061597e-05, "loss": 0.6874, "step": 4808 }, { "epoch": 0.7358837031369548, "grad_norm": 2.2720313280348208, "learning_rate": 1.4586102822514896e-05, "loss": 0.7076, "step": 4809 }, { "epoch": 0.7360367253251722, "grad_norm": 2.387822079584415, "learning_rate": 1.458390047531767e-05, "loss": 0.7585, "step": 4810 }, { "epoch": 0.7361897475133894, "grad_norm": 2.428520327068487, "learning_rate": 1.4581697846605192e-05, "loss": 0.7851, "step": 4811 }, { "epoch": 0.7363427697016067, "grad_norm": 2.1643869130717475, "learning_rate": 1.4579494936512722e-05, "loss": 0.7187, "step": 4812 }, { "epoch": 0.7364957918898241, "grad_norm": 2.2809620466631193, "learning_rate": 1.4577291745175555e-05, "loss": 0.6697, "step": 4813 }, { "epoch": 0.7366488140780413, "grad_norm": 2.015931630069814, "learning_rate": 1.4575088272728992e-05, "loss": 0.7241, "step": 4814 }, { "epoch": 0.7368018362662586, "grad_norm": 2.48264062580387, "learning_rate": 1.4572884519308363e-05, "loss": 0.7079, "step": 4815 }, { "epoch": 0.736954858454476, "grad_norm": 2.5497171898501088, "learning_rate": 1.4570680485049007e-05, "loss": 0.7401, "step": 4816 }, { "epoch": 0.7371078806426932, "grad_norm": 2.2285617237271182, "learning_rate": 1.4568476170086281e-05, "loss": 0.6802, "step": 4817 }, { "epoch": 0.7372609028309105, "grad_norm": 2.4025875558068925, "learning_rate": 1.4566271574555559e-05, "loss": 0.6301, "step": 4818 }, { "epoch": 0.7374139250191277, "grad_norm": 2.4112017394461644, "learning_rate": 1.4564066698592238e-05, "loss": 0.6759, "step": 4819 }, { "epoch": 0.7375669472073451, "grad_norm": 2.195257542503888, "learning_rate": 1.4561861542331725e-05, "loss": 0.6785, "step": 4820 }, { "epoch": 0.7377199693955624, "grad_norm": 2.134046236215962, "learning_rate": 1.4559656105909449e-05, "loss": 0.66, "step": 4821 }, { "epoch": 0.7378729915837796, "grad_norm": 1.7848717620286476, "learning_rate": 1.4557450389460859e-05, "loss": 0.592, "step": 4822 }, { "epoch": 0.738026013771997, "grad_norm": 2.1999375705173656, "learning_rate": 1.4555244393121406e-05, "loss": 0.6376, "step": 4823 }, { "epoch": 0.7381790359602143, "grad_norm": 2.192595297089319, "learning_rate": 1.4553038117026577e-05, "loss": 0.6949, "step": 4824 }, { "epoch": 0.7383320581484315, "grad_norm": 2.5214156134793764, "learning_rate": 1.4550831561311864e-05, "loss": 0.8018, "step": 4825 }, { "epoch": 0.7384850803366488, "grad_norm": 2.8632598195698655, "learning_rate": 1.4548624726112782e-05, "loss": 0.7681, "step": 4826 }, { "epoch": 0.7386381025248661, "grad_norm": 2.310980789411813, "learning_rate": 1.4546417611564864e-05, "loss": 0.7135, "step": 4827 }, { "epoch": 0.7387911247130834, "grad_norm": 2.324914918749278, "learning_rate": 1.4544210217803651e-05, "loss": 0.6451, "step": 4828 }, { "epoch": 0.7389441469013007, "grad_norm": 2.2455011773034212, "learning_rate": 1.4542002544964713e-05, "loss": 0.7251, "step": 4829 }, { "epoch": 0.739097169089518, "grad_norm": 2.3997081873057353, "learning_rate": 1.4539794593183634e-05, "loss": 0.6291, "step": 4830 }, { "epoch": 0.7392501912777353, "grad_norm": 2.4566939192312356, "learning_rate": 1.4537586362596005e-05, "loss": 0.7435, "step": 4831 }, { "epoch": 0.7394032134659526, "grad_norm": 2.4455663976068145, "learning_rate": 1.4535377853337448e-05, "loss": 0.6491, "step": 4832 }, { "epoch": 0.7395562356541698, "grad_norm": 2.1715569659281044, "learning_rate": 1.4533169065543593e-05, "loss": 0.6749, "step": 4833 }, { "epoch": 0.7397092578423872, "grad_norm": 2.291412184131578, "learning_rate": 1.4530959999350095e-05, "loss": 0.643, "step": 4834 }, { "epoch": 0.7398622800306044, "grad_norm": 2.204427629847068, "learning_rate": 1.4528750654892614e-05, "loss": 0.8016, "step": 4835 }, { "epoch": 0.7400153022188217, "grad_norm": 1.9802730994342306, "learning_rate": 1.452654103230684e-05, "loss": 0.7306, "step": 4836 }, { "epoch": 0.7401683244070391, "grad_norm": 2.4833975120925977, "learning_rate": 1.4524331131728472e-05, "loss": 0.7693, "step": 4837 }, { "epoch": 0.7403213465952563, "grad_norm": 2.2973589798555634, "learning_rate": 1.4522120953293233e-05, "loss": 0.6689, "step": 4838 }, { "epoch": 0.7404743687834736, "grad_norm": 2.025223238613172, "learning_rate": 1.4519910497136856e-05, "loss": 0.6473, "step": 4839 }, { "epoch": 0.740627390971691, "grad_norm": 2.348915095079573, "learning_rate": 1.451769976339509e-05, "loss": 0.8239, "step": 4840 }, { "epoch": 0.7407804131599082, "grad_norm": 2.309855719910271, "learning_rate": 1.4515488752203713e-05, "loss": 0.7992, "step": 4841 }, { "epoch": 0.7409334353481255, "grad_norm": 2.206226950757133, "learning_rate": 1.4513277463698503e-05, "loss": 0.6737, "step": 4842 }, { "epoch": 0.7410864575363427, "grad_norm": 2.2352741297777756, "learning_rate": 1.4511065898015269e-05, "loss": 0.7212, "step": 4843 }, { "epoch": 0.7412394797245601, "grad_norm": 2.38240113752834, "learning_rate": 1.450885405528983e-05, "loss": 0.8639, "step": 4844 }, { "epoch": 0.7413925019127774, "grad_norm": 2.2151203809554496, "learning_rate": 1.4506641935658023e-05, "loss": 0.7709, "step": 4845 }, { "epoch": 0.7415455241009946, "grad_norm": 2.156453587248878, "learning_rate": 1.4504429539255708e-05, "loss": 0.7863, "step": 4846 }, { "epoch": 0.741698546289212, "grad_norm": 2.139602715597007, "learning_rate": 1.4502216866218755e-05, "loss": 0.7202, "step": 4847 }, { "epoch": 0.7418515684774293, "grad_norm": 2.2784471326319156, "learning_rate": 1.4500003916683047e-05, "loss": 0.8374, "step": 4848 }, { "epoch": 0.7420045906656465, "grad_norm": 2.3563875101559217, "learning_rate": 1.4497790690784498e-05, "loss": 0.6719, "step": 4849 }, { "epoch": 0.7421576128538638, "grad_norm": 2.2202100939612825, "learning_rate": 1.4495577188659028e-05, "loss": 0.6286, "step": 4850 }, { "epoch": 0.7423106350420811, "grad_norm": 2.5782599669858586, "learning_rate": 1.449336341044257e-05, "loss": 0.7125, "step": 4851 }, { "epoch": 0.7424636572302984, "grad_norm": 2.374270686758012, "learning_rate": 1.4491149356271092e-05, "loss": 0.6643, "step": 4852 }, { "epoch": 0.7426166794185157, "grad_norm": 2.5637372901568094, "learning_rate": 1.4488935026280561e-05, "loss": 0.6725, "step": 4853 }, { "epoch": 0.742769701606733, "grad_norm": 2.1071033399459136, "learning_rate": 1.448672042060697e-05, "loss": 0.676, "step": 4854 }, { "epoch": 0.7429227237949503, "grad_norm": 2.4993248270173747, "learning_rate": 1.4484505539386324e-05, "loss": 0.7052, "step": 4855 }, { "epoch": 0.7430757459831676, "grad_norm": 2.076983925023967, "learning_rate": 1.448229038275465e-05, "loss": 0.6189, "step": 4856 }, { "epoch": 0.7432287681713848, "grad_norm": 2.433934235940275, "learning_rate": 1.4480074950847991e-05, "loss": 0.8599, "step": 4857 }, { "epoch": 0.7433817903596022, "grad_norm": 2.2124457746802637, "learning_rate": 1.4477859243802401e-05, "loss": 0.7823, "step": 4858 }, { "epoch": 0.7435348125478194, "grad_norm": 2.570932508736398, "learning_rate": 1.447564326175396e-05, "loss": 0.8181, "step": 4859 }, { "epoch": 0.7436878347360367, "grad_norm": 2.4117017713360904, "learning_rate": 1.4473427004838754e-05, "loss": 0.7226, "step": 4860 }, { "epoch": 0.7438408569242541, "grad_norm": 2.176505200952037, "learning_rate": 1.4471210473192896e-05, "loss": 0.7055, "step": 4861 }, { "epoch": 0.7439938791124713, "grad_norm": 2.1857445379147755, "learning_rate": 1.4468993666952512e-05, "loss": 0.6834, "step": 4862 }, { "epoch": 0.7441469013006886, "grad_norm": 2.635693368966689, "learning_rate": 1.4466776586253745e-05, "loss": 0.8412, "step": 4863 }, { "epoch": 0.744299923488906, "grad_norm": 2.0451933975323193, "learning_rate": 1.446455923123275e-05, "loss": 0.6854, "step": 4864 }, { "epoch": 0.7444529456771232, "grad_norm": 2.275358040160873, "learning_rate": 1.4462341602025714e-05, "loss": 0.7035, "step": 4865 }, { "epoch": 0.7446059678653405, "grad_norm": 2.4710038242202956, "learning_rate": 1.4460123698768817e-05, "loss": 0.8401, "step": 4866 }, { "epoch": 0.7447589900535577, "grad_norm": 2.4289044690905897, "learning_rate": 1.4457905521598279e-05, "loss": 0.7351, "step": 4867 }, { "epoch": 0.7449120122417751, "grad_norm": 2.31025768005515, "learning_rate": 1.4455687070650324e-05, "loss": 0.782, "step": 4868 }, { "epoch": 0.7450650344299924, "grad_norm": 2.1544459445175232, "learning_rate": 1.4453468346061193e-05, "loss": 0.7421, "step": 4869 }, { "epoch": 0.7452180566182096, "grad_norm": 2.3447105605966807, "learning_rate": 1.445124934796715e-05, "loss": 0.7559, "step": 4870 }, { "epoch": 0.745371078806427, "grad_norm": 2.239958777557601, "learning_rate": 1.444903007650447e-05, "loss": 0.6981, "step": 4871 }, { "epoch": 0.7455241009946443, "grad_norm": 2.249734548223549, "learning_rate": 1.444681053180945e-05, "loss": 0.751, "step": 4872 }, { "epoch": 0.7456771231828615, "grad_norm": 2.340819325628896, "learning_rate": 1.4444590714018398e-05, "loss": 0.7303, "step": 4873 }, { "epoch": 0.7458301453710788, "grad_norm": 2.34414004865152, "learning_rate": 1.4442370623267643e-05, "loss": 0.7181, "step": 4874 }, { "epoch": 0.7459831675592961, "grad_norm": 3.0286066038526562, "learning_rate": 1.444015025969353e-05, "loss": 0.831, "step": 4875 }, { "epoch": 0.7461361897475134, "grad_norm": 2.425379330178627, "learning_rate": 1.4437929623432418e-05, "loss": 0.7453, "step": 4876 }, { "epoch": 0.7462892119357307, "grad_norm": 2.228493718435368, "learning_rate": 1.443570871462069e-05, "loss": 0.6826, "step": 4877 }, { "epoch": 0.746442234123948, "grad_norm": 2.1177422595743685, "learning_rate": 1.4433487533394734e-05, "loss": 0.6157, "step": 4878 }, { "epoch": 0.7465952563121653, "grad_norm": 2.0833369615704513, "learning_rate": 1.4431266079890964e-05, "loss": 0.5789, "step": 4879 }, { "epoch": 0.7467482785003825, "grad_norm": 2.8278316899269873, "learning_rate": 1.4429044354245812e-05, "loss": 0.7478, "step": 4880 }, { "epoch": 0.7469013006885998, "grad_norm": 2.478404645618318, "learning_rate": 1.4426822356595718e-05, "loss": 0.6872, "step": 4881 }, { "epoch": 0.7470543228768172, "grad_norm": 2.3329379350497508, "learning_rate": 1.4424600087077148e-05, "loss": 0.7983, "step": 4882 }, { "epoch": 0.7472073450650344, "grad_norm": 2.2267621885651003, "learning_rate": 1.4422377545826574e-05, "loss": 0.7222, "step": 4883 }, { "epoch": 0.7473603672532517, "grad_norm": 2.4536454346196424, "learning_rate": 1.4420154732980493e-05, "loss": 0.7625, "step": 4884 }, { "epoch": 0.7475133894414691, "grad_norm": 2.022708265593278, "learning_rate": 1.4417931648675423e-05, "loss": 0.7153, "step": 4885 }, { "epoch": 0.7476664116296863, "grad_norm": 2.256460123168098, "learning_rate": 1.4415708293047884e-05, "loss": 0.7022, "step": 4886 }, { "epoch": 0.7478194338179036, "grad_norm": 2.136078642899449, "learning_rate": 1.4413484666234427e-05, "loss": 0.6409, "step": 4887 }, { "epoch": 0.7479724560061208, "grad_norm": 2.2325055404407994, "learning_rate": 1.4411260768371609e-05, "loss": 0.729, "step": 4888 }, { "epoch": 0.7481254781943382, "grad_norm": 2.3585569399038695, "learning_rate": 1.440903659959601e-05, "loss": 0.7476, "step": 4889 }, { "epoch": 0.7482785003825555, "grad_norm": 2.3183366182918888, "learning_rate": 1.4406812160044226e-05, "loss": 0.7876, "step": 4890 }, { "epoch": 0.7484315225707727, "grad_norm": 2.4632969026460687, "learning_rate": 1.4404587449852865e-05, "loss": 0.7183, "step": 4891 }, { "epoch": 0.7485845447589901, "grad_norm": 2.265010236192547, "learning_rate": 1.4402362469158562e-05, "loss": 0.7735, "step": 4892 }, { "epoch": 0.7487375669472074, "grad_norm": 2.2999201065686266, "learning_rate": 1.4400137218097956e-05, "loss": 0.7304, "step": 4893 }, { "epoch": 0.7488905891354246, "grad_norm": 2.2799436177017216, "learning_rate": 1.4397911696807708e-05, "loss": 0.7187, "step": 4894 }, { "epoch": 0.749043611323642, "grad_norm": 2.133201532746618, "learning_rate": 1.43956859054245e-05, "loss": 0.735, "step": 4895 }, { "epoch": 0.7491966335118592, "grad_norm": 2.3822318079150535, "learning_rate": 1.4393459844085027e-05, "loss": 0.751, "step": 4896 }, { "epoch": 0.7493496557000765, "grad_norm": 2.276622654679582, "learning_rate": 1.4391233512925994e-05, "loss": 0.6863, "step": 4897 }, { "epoch": 0.7495026778882938, "grad_norm": 2.4161787604889193, "learning_rate": 1.4389006912084136e-05, "loss": 0.8374, "step": 4898 }, { "epoch": 0.7496557000765111, "grad_norm": 2.27888042381393, "learning_rate": 1.4386780041696189e-05, "loss": 0.6775, "step": 4899 }, { "epoch": 0.7498087222647284, "grad_norm": 2.520476013025121, "learning_rate": 1.438455290189892e-05, "loss": 0.7428, "step": 4900 }, { "epoch": 0.7499617444529457, "grad_norm": 2.232440986785114, "learning_rate": 1.4382325492829108e-05, "loss": 0.6929, "step": 4901 }, { "epoch": 0.7501147666411629, "grad_norm": 2.2194515852033816, "learning_rate": 1.4380097814623539e-05, "loss": 0.7759, "step": 4902 }, { "epoch": 0.7502677888293803, "grad_norm": 2.493029827031643, "learning_rate": 1.437786986741903e-05, "loss": 0.7942, "step": 4903 }, { "epoch": 0.7504208110175975, "grad_norm": 2.532507217587678, "learning_rate": 1.4375641651352408e-05, "loss": 0.7536, "step": 4904 }, { "epoch": 0.7505738332058148, "grad_norm": 2.1324616550429343, "learning_rate": 1.4373413166560512e-05, "loss": 0.6899, "step": 4905 }, { "epoch": 0.7507268553940322, "grad_norm": 2.404454010344963, "learning_rate": 1.4371184413180205e-05, "loss": 0.7995, "step": 4906 }, { "epoch": 0.7508798775822494, "grad_norm": 2.212175857528018, "learning_rate": 1.4368955391348366e-05, "loss": 0.5926, "step": 4907 }, { "epoch": 0.7510328997704667, "grad_norm": 2.155592680667354, "learning_rate": 1.436672610120188e-05, "loss": 0.6284, "step": 4908 }, { "epoch": 0.751185921958684, "grad_norm": 2.3741727185360832, "learning_rate": 1.4364496542877663e-05, "loss": 0.7462, "step": 4909 }, { "epoch": 0.7513389441469013, "grad_norm": 2.1469671874999325, "learning_rate": 1.4362266716512643e-05, "loss": 0.7229, "step": 4910 }, { "epoch": 0.7514919663351186, "grad_norm": 2.121527275206317, "learning_rate": 1.4360036622243754e-05, "loss": 0.7239, "step": 4911 }, { "epoch": 0.7516449885233358, "grad_norm": 2.2376730417194812, "learning_rate": 1.4357806260207962e-05, "loss": 0.696, "step": 4912 }, { "epoch": 0.7517980107115532, "grad_norm": 2.2821150028836574, "learning_rate": 1.435557563054224e-05, "loss": 0.785, "step": 4913 }, { "epoch": 0.7519510328997705, "grad_norm": 2.0970063152782066, "learning_rate": 1.4353344733383576e-05, "loss": 0.7399, "step": 4914 }, { "epoch": 0.7521040550879877, "grad_norm": 2.083207329954238, "learning_rate": 1.4351113568868987e-05, "loss": 0.5428, "step": 4915 }, { "epoch": 0.752257077276205, "grad_norm": 2.245970612654284, "learning_rate": 1.434888213713549e-05, "loss": 0.7392, "step": 4916 }, { "epoch": 0.7524100994644224, "grad_norm": 2.3609964289459113, "learning_rate": 1.4346650438320124e-05, "loss": 0.6814, "step": 4917 }, { "epoch": 0.7525631216526396, "grad_norm": 2.1525419459253556, "learning_rate": 1.4344418472559956e-05, "loss": 0.6854, "step": 4918 }, { "epoch": 0.7527161438408569, "grad_norm": 2.330687789914893, "learning_rate": 1.434218623999205e-05, "loss": 0.6747, "step": 4919 }, { "epoch": 0.7528691660290742, "grad_norm": 2.337433181523013, "learning_rate": 1.4339953740753502e-05, "loss": 0.6729, "step": 4920 }, { "epoch": 0.7530221882172915, "grad_norm": 2.165023230727938, "learning_rate": 1.4337720974981417e-05, "loss": 0.6102, "step": 4921 }, { "epoch": 0.7531752104055088, "grad_norm": 2.095474532830923, "learning_rate": 1.4335487942812919e-05, "loss": 0.7267, "step": 4922 }, { "epoch": 0.753328232593726, "grad_norm": 2.1802082090993777, "learning_rate": 1.4333254644385144e-05, "loss": 0.6848, "step": 4923 }, { "epoch": 0.7534812547819434, "grad_norm": 2.6591708663481715, "learning_rate": 1.4331021079835249e-05, "loss": 0.8153, "step": 4924 }, { "epoch": 0.7536342769701607, "grad_norm": 2.3136679846972665, "learning_rate": 1.432878724930041e-05, "loss": 0.69, "step": 4925 }, { "epoch": 0.7537872991583779, "grad_norm": 2.2807435166362113, "learning_rate": 1.4326553152917808e-05, "loss": 0.6869, "step": 4926 }, { "epoch": 0.7539403213465953, "grad_norm": 2.1228252902705402, "learning_rate": 1.432431879082465e-05, "loss": 0.7477, "step": 4927 }, { "epoch": 0.7540933435348125, "grad_norm": 2.4278219077579473, "learning_rate": 1.432208416315816e-05, "loss": 0.7974, "step": 4928 }, { "epoch": 0.7542463657230298, "grad_norm": 2.5232419497439396, "learning_rate": 1.4319849270055576e-05, "loss": 0.732, "step": 4929 }, { "epoch": 0.7543993879112472, "grad_norm": 2.0991061394057406, "learning_rate": 1.4317614111654145e-05, "loss": 0.7423, "step": 4930 }, { "epoch": 0.7545524100994644, "grad_norm": 2.3815083789734164, "learning_rate": 1.4315378688091143e-05, "loss": 0.9114, "step": 4931 }, { "epoch": 0.7547054322876817, "grad_norm": 2.185730306642332, "learning_rate": 1.4313142999503855e-05, "loss": 0.7058, "step": 4932 }, { "epoch": 0.754858454475899, "grad_norm": 2.1376989838870113, "learning_rate": 1.4310907046029581e-05, "loss": 0.702, "step": 4933 }, { "epoch": 0.7550114766641163, "grad_norm": 2.185315720970579, "learning_rate": 1.4308670827805642e-05, "loss": 0.6436, "step": 4934 }, { "epoch": 0.7551644988523336, "grad_norm": 2.325819141198931, "learning_rate": 1.430643434496937e-05, "loss": 0.7432, "step": 4935 }, { "epoch": 0.7553175210405508, "grad_norm": 2.022900910011311, "learning_rate": 1.4304197597658119e-05, "loss": 0.7569, "step": 4936 }, { "epoch": 0.7554705432287682, "grad_norm": 2.2978717235749917, "learning_rate": 1.4301960586009255e-05, "loss": 0.8398, "step": 4937 }, { "epoch": 0.7556235654169855, "grad_norm": 2.0323787781258114, "learning_rate": 1.4299723310160163e-05, "loss": 0.8232, "step": 4938 }, { "epoch": 0.7557765876052027, "grad_norm": 2.4687308360656344, "learning_rate": 1.429748577024824e-05, "loss": 0.7811, "step": 4939 }, { "epoch": 0.75592960979342, "grad_norm": 2.416728876586808, "learning_rate": 1.4295247966410903e-05, "loss": 0.7862, "step": 4940 }, { "epoch": 0.7560826319816374, "grad_norm": 2.430436233578812, "learning_rate": 1.429300989878559e-05, "loss": 0.7025, "step": 4941 }, { "epoch": 0.7562356541698546, "grad_norm": 2.2106978733410556, "learning_rate": 1.4290771567509745e-05, "loss": 0.6891, "step": 4942 }, { "epoch": 0.7563886763580719, "grad_norm": 2.4306496637568484, "learning_rate": 1.4288532972720825e-05, "loss": 0.8306, "step": 4943 }, { "epoch": 0.7565416985462892, "grad_norm": 2.469829207837791, "learning_rate": 1.4286294114556325e-05, "loss": 0.692, "step": 4944 }, { "epoch": 0.7566947207345065, "grad_norm": 2.4309218792187273, "learning_rate": 1.4284054993153735e-05, "loss": 0.7137, "step": 4945 }, { "epoch": 0.7568477429227238, "grad_norm": 2.579777669684564, "learning_rate": 1.4281815608650565e-05, "loss": 0.7477, "step": 4946 }, { "epoch": 0.757000765110941, "grad_norm": 2.0971978445223227, "learning_rate": 1.4279575961184348e-05, "loss": 0.6919, "step": 4947 }, { "epoch": 0.7571537872991584, "grad_norm": 2.2553951720139422, "learning_rate": 1.4277336050892631e-05, "loss": 0.7135, "step": 4948 }, { "epoch": 0.7573068094873757, "grad_norm": 2.149299088970806, "learning_rate": 1.4275095877912976e-05, "loss": 0.6381, "step": 4949 }, { "epoch": 0.7574598316755929, "grad_norm": 2.1629746985410523, "learning_rate": 1.4272855442382957e-05, "loss": 0.6806, "step": 4950 }, { "epoch": 0.7576128538638103, "grad_norm": 2.3261663770030157, "learning_rate": 1.427061474444017e-05, "loss": 0.6916, "step": 4951 }, { "epoch": 0.7577658760520275, "grad_norm": 2.1499157181080566, "learning_rate": 1.4268373784222225e-05, "loss": 0.676, "step": 4952 }, { "epoch": 0.7579188982402448, "grad_norm": 2.4744089486881293, "learning_rate": 1.4266132561866747e-05, "loss": 0.8978, "step": 4953 }, { "epoch": 0.7580719204284622, "grad_norm": 2.168743668047201, "learning_rate": 1.4263891077511383e-05, "loss": 0.6623, "step": 4954 }, { "epoch": 0.7582249426166794, "grad_norm": 2.3449514837783063, "learning_rate": 1.4261649331293781e-05, "loss": 0.6634, "step": 4955 }, { "epoch": 0.7583779648048967, "grad_norm": 2.5034072035723023, "learning_rate": 1.4259407323351626e-05, "loss": 0.7541, "step": 4956 }, { "epoch": 0.758530986993114, "grad_norm": 2.3696350755990485, "learning_rate": 1.4257165053822605e-05, "loss": 0.6219, "step": 4957 }, { "epoch": 0.7586840091813313, "grad_norm": 2.3382755188679742, "learning_rate": 1.425492252284442e-05, "loss": 0.7696, "step": 4958 }, { "epoch": 0.7588370313695486, "grad_norm": 2.135467705544331, "learning_rate": 1.4252679730554801e-05, "loss": 0.7307, "step": 4959 }, { "epoch": 0.7589900535577658, "grad_norm": 2.408284072064693, "learning_rate": 1.4250436677091482e-05, "loss": 0.7607, "step": 4960 }, { "epoch": 0.7591430757459832, "grad_norm": 2.2487141060628337, "learning_rate": 1.424819336259222e-05, "loss": 0.7047, "step": 4961 }, { "epoch": 0.7592960979342005, "grad_norm": 2.174625864768563, "learning_rate": 1.4245949787194783e-05, "loss": 0.7431, "step": 4962 }, { "epoch": 0.7594491201224177, "grad_norm": 2.3367292934749084, "learning_rate": 1.4243705951036961e-05, "loss": 0.7039, "step": 4963 }, { "epoch": 0.759602142310635, "grad_norm": 2.3698750474828425, "learning_rate": 1.4241461854256553e-05, "loss": 0.692, "step": 4964 }, { "epoch": 0.7597551644988524, "grad_norm": 2.3837948494422525, "learning_rate": 1.423921749699138e-05, "loss": 0.682, "step": 4965 }, { "epoch": 0.7599081866870696, "grad_norm": 1.9568026488800232, "learning_rate": 1.423697287937928e-05, "loss": 0.5177, "step": 4966 }, { "epoch": 0.7600612088752869, "grad_norm": 2.011750007271983, "learning_rate": 1.4234728001558098e-05, "loss": 0.7043, "step": 4967 }, { "epoch": 0.7602142310635042, "grad_norm": 2.523991868179148, "learning_rate": 1.42324828636657e-05, "loss": 0.8005, "step": 4968 }, { "epoch": 0.7603672532517215, "grad_norm": 1.9438899634689, "learning_rate": 1.4230237465839975e-05, "loss": 0.6676, "step": 4969 }, { "epoch": 0.7605202754399388, "grad_norm": 2.224899265987228, "learning_rate": 1.422799180821882e-05, "loss": 0.6246, "step": 4970 }, { "epoch": 0.760673297628156, "grad_norm": 2.2936026183785567, "learning_rate": 1.4225745890940145e-05, "loss": 0.7018, "step": 4971 }, { "epoch": 0.7608263198163734, "grad_norm": 2.283389788103775, "learning_rate": 1.4223499714141885e-05, "loss": 0.7678, "step": 4972 }, { "epoch": 0.7609793420045907, "grad_norm": 2.1905560444534546, "learning_rate": 1.4221253277961987e-05, "loss": 0.696, "step": 4973 }, { "epoch": 0.7611323641928079, "grad_norm": 2.2357422103556504, "learning_rate": 1.4219006582538409e-05, "loss": 0.7681, "step": 4974 }, { "epoch": 0.7612853863810253, "grad_norm": 2.104374866205293, "learning_rate": 1.4216759628009132e-05, "loss": 0.6724, "step": 4975 }, { "epoch": 0.7614384085692425, "grad_norm": 2.376649516428986, "learning_rate": 1.421451241451215e-05, "loss": 0.7457, "step": 4976 }, { "epoch": 0.7615914307574598, "grad_norm": 2.5026682947159773, "learning_rate": 1.4212264942185473e-05, "loss": 0.7707, "step": 4977 }, { "epoch": 0.7617444529456772, "grad_norm": 2.339951422144216, "learning_rate": 1.421001721116713e-05, "loss": 0.7215, "step": 4978 }, { "epoch": 0.7618974751338944, "grad_norm": 2.5119998784613524, "learning_rate": 1.4207769221595157e-05, "loss": 0.8083, "step": 4979 }, { "epoch": 0.7620504973221117, "grad_norm": 2.503174353934065, "learning_rate": 1.4205520973607618e-05, "loss": 0.6887, "step": 4980 }, { "epoch": 0.762203519510329, "grad_norm": 2.4742332404941387, "learning_rate": 1.4203272467342582e-05, "loss": 0.7633, "step": 4981 }, { "epoch": 0.7623565416985463, "grad_norm": 2.4392406082909655, "learning_rate": 1.4201023702938143e-05, "loss": 0.6757, "step": 4982 }, { "epoch": 0.7625095638867636, "grad_norm": 2.3098987205477064, "learning_rate": 1.4198774680532403e-05, "loss": 0.7513, "step": 4983 }, { "epoch": 0.7626625860749808, "grad_norm": 2.258614987166009, "learning_rate": 1.4196525400263482e-05, "loss": 0.7191, "step": 4984 }, { "epoch": 0.7628156082631982, "grad_norm": 2.3404733220525196, "learning_rate": 1.419427586226952e-05, "loss": 0.799, "step": 4985 }, { "epoch": 0.7629686304514155, "grad_norm": 2.2779284684505354, "learning_rate": 1.4192026066688673e-05, "loss": 0.8147, "step": 4986 }, { "epoch": 0.7631216526396327, "grad_norm": 2.31643846075932, "learning_rate": 1.41897760136591e-05, "loss": 0.7568, "step": 4987 }, { "epoch": 0.76327467482785, "grad_norm": 2.2663948410577337, "learning_rate": 1.4187525703318996e-05, "loss": 0.6588, "step": 4988 }, { "epoch": 0.7634276970160673, "grad_norm": 2.295491923173907, "learning_rate": 1.4185275135806555e-05, "loss": 0.8117, "step": 4989 }, { "epoch": 0.7635807192042846, "grad_norm": 2.353699212264283, "learning_rate": 1.4183024311259997e-05, "loss": 0.6288, "step": 4990 }, { "epoch": 0.7637337413925019, "grad_norm": 2.1939178575915124, "learning_rate": 1.4180773229817548e-05, "loss": 0.6682, "step": 4991 }, { "epoch": 0.7638867635807192, "grad_norm": 2.2979720867197377, "learning_rate": 1.4178521891617462e-05, "loss": 0.7026, "step": 4992 }, { "epoch": 0.7640397857689365, "grad_norm": 2.4221273394850487, "learning_rate": 1.4176270296797998e-05, "loss": 0.7267, "step": 4993 }, { "epoch": 0.7641928079571538, "grad_norm": 2.3662257487773797, "learning_rate": 1.4174018445497439e-05, "loss": 0.7745, "step": 4994 }, { "epoch": 0.764345830145371, "grad_norm": 2.2759670548636133, "learning_rate": 1.4171766337854083e-05, "loss": 0.6579, "step": 4995 }, { "epoch": 0.7644988523335884, "grad_norm": 1.9781887539285967, "learning_rate": 1.416951397400623e-05, "loss": 0.6649, "step": 4996 }, { "epoch": 0.7646518745218056, "grad_norm": 2.2189360667232414, "learning_rate": 1.4167261354092214e-05, "loss": 0.7571, "step": 4997 }, { "epoch": 0.7648048967100229, "grad_norm": 2.280749037561788, "learning_rate": 1.4165008478250377e-05, "loss": 0.717, "step": 4998 }, { "epoch": 0.7649579188982403, "grad_norm": 2.362059249926859, "learning_rate": 1.4162755346619075e-05, "loss": 0.6807, "step": 4999 }, { "epoch": 0.7651109410864575, "grad_norm": 2.2248228523984084, "learning_rate": 1.4160501959336684e-05, "loss": 0.6791, "step": 5000 }, { "epoch": 0.7652639632746748, "grad_norm": 2.2767134208389717, "learning_rate": 1.415824831654159e-05, "loss": 0.7279, "step": 5001 }, { "epoch": 0.7654169854628922, "grad_norm": 2.259109443503308, "learning_rate": 1.41559944183722e-05, "loss": 0.6783, "step": 5002 }, { "epoch": 0.7655700076511094, "grad_norm": 2.119358016956952, "learning_rate": 1.4153740264966935e-05, "loss": 0.7382, "step": 5003 }, { "epoch": 0.7657230298393267, "grad_norm": 2.2868498026059507, "learning_rate": 1.4151485856464231e-05, "loss": 0.728, "step": 5004 }, { "epoch": 0.7658760520275439, "grad_norm": 2.3259370090069074, "learning_rate": 1.414923119300254e-05, "loss": 0.7322, "step": 5005 }, { "epoch": 0.7660290742157613, "grad_norm": 2.0952658010872325, "learning_rate": 1.4146976274720325e-05, "loss": 0.6611, "step": 5006 }, { "epoch": 0.7661820964039786, "grad_norm": 2.2507490230036615, "learning_rate": 1.414472110175608e-05, "loss": 0.6851, "step": 5007 }, { "epoch": 0.7663351185921958, "grad_norm": 2.3596475176429577, "learning_rate": 1.4142465674248295e-05, "loss": 0.7103, "step": 5008 }, { "epoch": 0.7664881407804132, "grad_norm": 2.2428654694936165, "learning_rate": 1.4140209992335488e-05, "loss": 0.7419, "step": 5009 }, { "epoch": 0.7666411629686305, "grad_norm": 2.412219562600667, "learning_rate": 1.4137954056156189e-05, "loss": 0.822, "step": 5010 }, { "epoch": 0.7667941851568477, "grad_norm": 2.469683847966789, "learning_rate": 1.4135697865848945e-05, "loss": 0.821, "step": 5011 }, { "epoch": 0.766947207345065, "grad_norm": 2.380874074809561, "learning_rate": 1.4133441421552312e-05, "loss": 0.7444, "step": 5012 }, { "epoch": 0.7671002295332823, "grad_norm": 2.54268340852517, "learning_rate": 1.4131184723404876e-05, "loss": 0.8583, "step": 5013 }, { "epoch": 0.7672532517214996, "grad_norm": 2.1625190284078863, "learning_rate": 1.4128927771545222e-05, "loss": 0.6829, "step": 5014 }, { "epoch": 0.7674062739097169, "grad_norm": 2.2366367904796176, "learning_rate": 1.4126670566111962e-05, "loss": 0.7066, "step": 5015 }, { "epoch": 0.7675592960979342, "grad_norm": 2.2869932661750005, "learning_rate": 1.4124413107243718e-05, "loss": 0.7291, "step": 5016 }, { "epoch": 0.7677123182861515, "grad_norm": 2.461913309036648, "learning_rate": 1.4122155395079132e-05, "loss": 0.7526, "step": 5017 }, { "epoch": 0.7678653404743688, "grad_norm": 2.4062027749990755, "learning_rate": 1.4119897429756855e-05, "loss": 0.742, "step": 5018 }, { "epoch": 0.768018362662586, "grad_norm": 2.1281105958816373, "learning_rate": 1.4117639211415561e-05, "loss": 0.6535, "step": 5019 }, { "epoch": 0.7681713848508034, "grad_norm": 2.442149676668343, "learning_rate": 1.4115380740193936e-05, "loss": 0.6923, "step": 5020 }, { "epoch": 0.7683244070390206, "grad_norm": 2.3490210282430426, "learning_rate": 1.4113122016230678e-05, "loss": 0.7034, "step": 5021 }, { "epoch": 0.7684774292272379, "grad_norm": 2.18584937837184, "learning_rate": 1.4110863039664506e-05, "loss": 0.654, "step": 5022 }, { "epoch": 0.7686304514154553, "grad_norm": 2.132444677822337, "learning_rate": 1.4108603810634157e-05, "loss": 0.6469, "step": 5023 }, { "epoch": 0.7687834736036725, "grad_norm": 2.2211029054390634, "learning_rate": 1.4106344329278372e-05, "loss": 0.7746, "step": 5024 }, { "epoch": 0.7689364957918898, "grad_norm": 2.5642196111069344, "learning_rate": 1.4104084595735916e-05, "loss": 0.7419, "step": 5025 }, { "epoch": 0.7690895179801072, "grad_norm": 2.2008072174788715, "learning_rate": 1.410182461014557e-05, "loss": 0.7026, "step": 5026 }, { "epoch": 0.7692425401683244, "grad_norm": 2.1514935715003087, "learning_rate": 1.4099564372646132e-05, "loss": 0.7478, "step": 5027 }, { "epoch": 0.7693955623565417, "grad_norm": 2.1472496992000836, "learning_rate": 1.4097303883376405e-05, "loss": 0.7736, "step": 5028 }, { "epoch": 0.7695485845447589, "grad_norm": 2.1083158311876997, "learning_rate": 1.409504314247522e-05, "loss": 0.7575, "step": 5029 }, { "epoch": 0.7697016067329763, "grad_norm": 2.3859948402117506, "learning_rate": 1.4092782150081415e-05, "loss": 0.8054, "step": 5030 }, { "epoch": 0.7698546289211936, "grad_norm": 2.2696789787921308, "learning_rate": 1.4090520906333844e-05, "loss": 0.6825, "step": 5031 }, { "epoch": 0.7700076511094108, "grad_norm": 2.5043142859591954, "learning_rate": 1.4088259411371388e-05, "loss": 0.7305, "step": 5032 }, { "epoch": 0.7701606732976282, "grad_norm": 2.8269162934283716, "learning_rate": 1.4085997665332925e-05, "loss": 0.6334, "step": 5033 }, { "epoch": 0.7703136954858455, "grad_norm": 2.363998579657459, "learning_rate": 1.4083735668357359e-05, "loss": 0.717, "step": 5034 }, { "epoch": 0.7704667176740627, "grad_norm": 2.308357524429582, "learning_rate": 1.4081473420583612e-05, "loss": 0.7569, "step": 5035 }, { "epoch": 0.77061973986228, "grad_norm": 2.1405563499565625, "learning_rate": 1.4079210922150615e-05, "loss": 0.7978, "step": 5036 }, { "epoch": 0.7707727620504973, "grad_norm": 2.2014674249133113, "learning_rate": 1.4076948173197316e-05, "loss": 0.7363, "step": 5037 }, { "epoch": 0.7709257842387146, "grad_norm": 2.5632111129243795, "learning_rate": 1.4074685173862684e-05, "loss": 0.7573, "step": 5038 }, { "epoch": 0.7710788064269319, "grad_norm": 2.440105505427757, "learning_rate": 1.4072421924285693e-05, "loss": 0.7406, "step": 5039 }, { "epoch": 0.7712318286151492, "grad_norm": 2.328805243013259, "learning_rate": 1.4070158424605338e-05, "loss": 0.7432, "step": 5040 }, { "epoch": 0.7713848508033665, "grad_norm": 2.265426601466516, "learning_rate": 1.4067894674960637e-05, "loss": 0.6968, "step": 5041 }, { "epoch": 0.7715378729915838, "grad_norm": 2.268614568748264, "learning_rate": 1.4065630675490605e-05, "loss": 0.6453, "step": 5042 }, { "epoch": 0.771690895179801, "grad_norm": 2.2010842207373305, "learning_rate": 1.4063366426334293e-05, "loss": 0.7092, "step": 5043 }, { "epoch": 0.7718439173680184, "grad_norm": 2.0805941230225424, "learning_rate": 1.4061101927630749e-05, "loss": 0.5924, "step": 5044 }, { "epoch": 0.7719969395562356, "grad_norm": 2.1821898010478353, "learning_rate": 1.405883717951905e-05, "loss": 0.6982, "step": 5045 }, { "epoch": 0.7721499617444529, "grad_norm": 2.093645143185147, "learning_rate": 1.4056572182138281e-05, "loss": 0.6132, "step": 5046 }, { "epoch": 0.7723029839326703, "grad_norm": 2.253233064275212, "learning_rate": 1.4054306935627544e-05, "loss": 0.7959, "step": 5047 }, { "epoch": 0.7724560061208875, "grad_norm": 2.2821754594763073, "learning_rate": 1.405204144012596e-05, "loss": 0.7634, "step": 5048 }, { "epoch": 0.7726090283091048, "grad_norm": 2.5505280501375576, "learning_rate": 1.404977569577266e-05, "loss": 0.775, "step": 5049 }, { "epoch": 0.7727620504973222, "grad_norm": 2.1972396897268847, "learning_rate": 1.404750970270679e-05, "loss": 0.6405, "step": 5050 }, { "epoch": 0.7729150726855394, "grad_norm": 2.0449200641167264, "learning_rate": 1.4045243461067514e-05, "loss": 0.6206, "step": 5051 }, { "epoch": 0.7730680948737567, "grad_norm": 2.386168396663282, "learning_rate": 1.4042976970994015e-05, "loss": 0.6859, "step": 5052 }, { "epoch": 0.7732211170619739, "grad_norm": 2.514122014683401, "learning_rate": 1.4040710232625481e-05, "loss": 0.7822, "step": 5053 }, { "epoch": 0.7733741392501913, "grad_norm": 2.5342070484797476, "learning_rate": 1.4038443246101125e-05, "loss": 0.6788, "step": 5054 }, { "epoch": 0.7735271614384086, "grad_norm": 2.345617758130968, "learning_rate": 1.4036176011560172e-05, "loss": 0.8571, "step": 5055 }, { "epoch": 0.7736801836266258, "grad_norm": 2.18626517984038, "learning_rate": 1.403390852914186e-05, "loss": 0.5995, "step": 5056 }, { "epoch": 0.7738332058148432, "grad_norm": 2.23239353461061, "learning_rate": 1.4031640798985446e-05, "loss": 0.7064, "step": 5057 }, { "epoch": 0.7739862280030605, "grad_norm": 2.1588061241982417, "learning_rate": 1.4029372821230196e-05, "loss": 0.622, "step": 5058 }, { "epoch": 0.7741392501912777, "grad_norm": 2.321759106687769, "learning_rate": 1.40271045960154e-05, "loss": 0.746, "step": 5059 }, { "epoch": 0.774292272379495, "grad_norm": 2.3401183762900266, "learning_rate": 1.4024836123480356e-05, "loss": 0.7694, "step": 5060 }, { "epoch": 0.7744452945677123, "grad_norm": 2.353276037005396, "learning_rate": 1.402256740376438e-05, "loss": 0.7548, "step": 5061 }, { "epoch": 0.7745983167559296, "grad_norm": 2.2155907355110984, "learning_rate": 1.4020298437006803e-05, "loss": 0.6635, "step": 5062 }, { "epoch": 0.7747513389441469, "grad_norm": 2.254233811440101, "learning_rate": 1.4018029223346972e-05, "loss": 0.5666, "step": 5063 }, { "epoch": 0.7749043611323642, "grad_norm": 2.2481526640750764, "learning_rate": 1.4015759762924246e-05, "loss": 0.6752, "step": 5064 }, { "epoch": 0.7750573833205815, "grad_norm": 2.148794153992223, "learning_rate": 1.4013490055878008e-05, "loss": 0.7136, "step": 5065 }, { "epoch": 0.7752104055087988, "grad_norm": 2.3975560671750022, "learning_rate": 1.401122010234764e-05, "loss": 0.7725, "step": 5066 }, { "epoch": 0.775363427697016, "grad_norm": 2.2347633781944407, "learning_rate": 1.4008949902472554e-05, "loss": 0.637, "step": 5067 }, { "epoch": 0.7755164498852334, "grad_norm": 2.285143070661817, "learning_rate": 1.4006679456392174e-05, "loss": 0.7154, "step": 5068 }, { "epoch": 0.7756694720734506, "grad_norm": 2.2031275325618553, "learning_rate": 1.4004408764245934e-05, "loss": 0.7259, "step": 5069 }, { "epoch": 0.7758224942616679, "grad_norm": 2.315668163634487, "learning_rate": 1.4002137826173286e-05, "loss": 0.7905, "step": 5070 }, { "epoch": 0.7759755164498853, "grad_norm": 2.541307986854028, "learning_rate": 1.3999866642313698e-05, "loss": 0.6911, "step": 5071 }, { "epoch": 0.7761285386381025, "grad_norm": 2.27496047507574, "learning_rate": 1.3997595212806648e-05, "loss": 0.6393, "step": 5072 }, { "epoch": 0.7762815608263198, "grad_norm": 2.247769499454639, "learning_rate": 1.3995323537791643e-05, "loss": 0.76, "step": 5073 }, { "epoch": 0.7764345830145372, "grad_norm": 2.2888853081384974, "learning_rate": 1.3993051617408186e-05, "loss": 0.7552, "step": 5074 }, { "epoch": 0.7765876052027544, "grad_norm": 2.2001010956721037, "learning_rate": 1.3990779451795808e-05, "loss": 0.6797, "step": 5075 }, { "epoch": 0.7767406273909717, "grad_norm": 2.4115578977098164, "learning_rate": 1.3988507041094055e-05, "loss": 0.7154, "step": 5076 }, { "epoch": 0.7768936495791889, "grad_norm": 2.1463792083802273, "learning_rate": 1.3986234385442481e-05, "loss": 0.6909, "step": 5077 }, { "epoch": 0.7770466717674063, "grad_norm": 2.0897406819197863, "learning_rate": 1.3983961484980656e-05, "loss": 0.751, "step": 5078 }, { "epoch": 0.7771996939556236, "grad_norm": 2.1608247723805687, "learning_rate": 1.3981688339848174e-05, "loss": 0.6893, "step": 5079 }, { "epoch": 0.7773527161438408, "grad_norm": 2.882363155314608, "learning_rate": 1.3979414950184632e-05, "loss": 0.6289, "step": 5080 }, { "epoch": 0.7775057383320582, "grad_norm": 2.5337887564427883, "learning_rate": 1.3977141316129653e-05, "loss": 0.7907, "step": 5081 }, { "epoch": 0.7776587605202755, "grad_norm": 2.368738369887408, "learning_rate": 1.3974867437822866e-05, "loss": 0.777, "step": 5082 }, { "epoch": 0.7778117827084927, "grad_norm": 2.3466429855351505, "learning_rate": 1.3972593315403919e-05, "loss": 0.748, "step": 5083 }, { "epoch": 0.77796480489671, "grad_norm": 2.269874457550218, "learning_rate": 1.3970318949012475e-05, "loss": 0.7797, "step": 5084 }, { "epoch": 0.7781178270849273, "grad_norm": 2.284119463233079, "learning_rate": 1.3968044338788216e-05, "loss": 0.7871, "step": 5085 }, { "epoch": 0.7782708492731446, "grad_norm": 2.2696960329330813, "learning_rate": 1.3965769484870829e-05, "loss": 0.6889, "step": 5086 }, { "epoch": 0.7784238714613619, "grad_norm": 2.420011775174571, "learning_rate": 1.3963494387400023e-05, "loss": 0.6882, "step": 5087 }, { "epoch": 0.7785768936495792, "grad_norm": 2.1957556052360916, "learning_rate": 1.3961219046515519e-05, "loss": 0.7425, "step": 5088 }, { "epoch": 0.7787299158377965, "grad_norm": 2.119582758763422, "learning_rate": 1.3958943462357065e-05, "loss": 0.6109, "step": 5089 }, { "epoch": 0.7788829380260138, "grad_norm": 2.6124106631663984, "learning_rate": 1.3956667635064398e-05, "loss": 0.7887, "step": 5090 }, { "epoch": 0.779035960214231, "grad_norm": 2.2893517601019107, "learning_rate": 1.3954391564777295e-05, "loss": 0.6306, "step": 5091 }, { "epoch": 0.7791889824024484, "grad_norm": 2.1777151683208444, "learning_rate": 1.3952115251635536e-05, "loss": 0.7121, "step": 5092 }, { "epoch": 0.7793420045906656, "grad_norm": 2.2899237702858746, "learning_rate": 1.3949838695778921e-05, "loss": 0.7505, "step": 5093 }, { "epoch": 0.7794950267788829, "grad_norm": 2.42586559436103, "learning_rate": 1.3947561897347257e-05, "loss": 0.7946, "step": 5094 }, { "epoch": 0.7796480489671003, "grad_norm": 1.9766272067621482, "learning_rate": 1.3945284856480376e-05, "loss": 0.6901, "step": 5095 }, { "epoch": 0.7798010711553175, "grad_norm": 2.156148901403677, "learning_rate": 1.3943007573318117e-05, "loss": 0.7859, "step": 5096 }, { "epoch": 0.7799540933435348, "grad_norm": 2.5202800450554865, "learning_rate": 1.3940730048000338e-05, "loss": 0.7896, "step": 5097 }, { "epoch": 0.780107115531752, "grad_norm": 2.4171048421326207, "learning_rate": 1.3938452280666909e-05, "loss": 0.7879, "step": 5098 }, { "epoch": 0.7802601377199694, "grad_norm": 2.4109847004996174, "learning_rate": 1.3936174271457721e-05, "loss": 0.8669, "step": 5099 }, { "epoch": 0.7804131599081867, "grad_norm": 2.0993258641086228, "learning_rate": 1.3933896020512671e-05, "loss": 0.727, "step": 5100 }, { "epoch": 0.7805661820964039, "grad_norm": 2.027143937230042, "learning_rate": 1.3931617527971674e-05, "loss": 0.6448, "step": 5101 }, { "epoch": 0.7807192042846213, "grad_norm": 1.984107315848014, "learning_rate": 1.3929338793974671e-05, "loss": 0.6419, "step": 5102 }, { "epoch": 0.7808722264728386, "grad_norm": 1.9833282224258104, "learning_rate": 1.3927059818661596e-05, "loss": 0.5596, "step": 5103 }, { "epoch": 0.7810252486610558, "grad_norm": 2.1670221414876756, "learning_rate": 1.3924780602172413e-05, "loss": 0.717, "step": 5104 }, { "epoch": 0.7811782708492732, "grad_norm": 2.2583166303336237, "learning_rate": 1.3922501144647105e-05, "loss": 0.7974, "step": 5105 }, { "epoch": 0.7813312930374904, "grad_norm": 2.380217191342555, "learning_rate": 1.3920221446225654e-05, "loss": 0.7113, "step": 5106 }, { "epoch": 0.7814843152257077, "grad_norm": 2.220484377761435, "learning_rate": 1.3917941507048068e-05, "loss": 0.7526, "step": 5107 }, { "epoch": 0.781637337413925, "grad_norm": 2.298676239650065, "learning_rate": 1.3915661327254367e-05, "loss": 0.7043, "step": 5108 }, { "epoch": 0.7817903596021423, "grad_norm": 2.1637013431269447, "learning_rate": 1.3913380906984586e-05, "loss": 0.7548, "step": 5109 }, { "epoch": 0.7819433817903596, "grad_norm": 2.1322701567201006, "learning_rate": 1.3911100246378775e-05, "loss": 0.758, "step": 5110 }, { "epoch": 0.7820964039785769, "grad_norm": 2.192596038504427, "learning_rate": 1.3908819345576996e-05, "loss": 0.6943, "step": 5111 }, { "epoch": 0.7822494261667942, "grad_norm": 2.2990794845292317, "learning_rate": 1.3906538204719329e-05, "loss": 0.6867, "step": 5112 }, { "epoch": 0.7824024483550115, "grad_norm": 2.3025056442381735, "learning_rate": 1.3904256823945868e-05, "loss": 0.7531, "step": 5113 }, { "epoch": 0.7825554705432287, "grad_norm": 2.318696680589184, "learning_rate": 1.3901975203396724e-05, "loss": 0.6702, "step": 5114 }, { "epoch": 0.782708492731446, "grad_norm": 2.0791679798970897, "learning_rate": 1.389969334321202e-05, "loss": 0.7031, "step": 5115 }, { "epoch": 0.7828615149196634, "grad_norm": 2.2745500029402694, "learning_rate": 1.3897411243531886e-05, "loss": 0.6795, "step": 5116 }, { "epoch": 0.7830145371078806, "grad_norm": 2.4518559288103554, "learning_rate": 1.3895128904496486e-05, "loss": 0.7113, "step": 5117 }, { "epoch": 0.7831675592960979, "grad_norm": 2.2388389113594553, "learning_rate": 1.3892846326245984e-05, "loss": 0.7826, "step": 5118 }, { "epoch": 0.7833205814843153, "grad_norm": 2.0864895233637775, "learning_rate": 1.3890563508920554e-05, "loss": 0.7918, "step": 5119 }, { "epoch": 0.7834736036725325, "grad_norm": 2.250652586337949, "learning_rate": 1.3888280452660401e-05, "loss": 0.8105, "step": 5120 }, { "epoch": 0.7836266258607498, "grad_norm": 2.5348100337657438, "learning_rate": 1.3885997157605737e-05, "loss": 0.7097, "step": 5121 }, { "epoch": 0.783779648048967, "grad_norm": 2.2521060385487277, "learning_rate": 1.3883713623896782e-05, "loss": 0.8175, "step": 5122 }, { "epoch": 0.7839326702371844, "grad_norm": 2.4476407046502446, "learning_rate": 1.3881429851673781e-05, "loss": 0.7518, "step": 5123 }, { "epoch": 0.7840856924254017, "grad_norm": 2.512045611078853, "learning_rate": 1.3879145841076991e-05, "loss": 0.7752, "step": 5124 }, { "epoch": 0.7842387146136189, "grad_norm": 2.278880822195872, "learning_rate": 1.3876861592246678e-05, "loss": 0.7291, "step": 5125 }, { "epoch": 0.7843917368018363, "grad_norm": 1.9592367955208152, "learning_rate": 1.3874577105323127e-05, "loss": 0.5866, "step": 5126 }, { "epoch": 0.7845447589900536, "grad_norm": 2.1204944930182466, "learning_rate": 1.3872292380446641e-05, "loss": 0.6941, "step": 5127 }, { "epoch": 0.7846977811782708, "grad_norm": 2.436918071377942, "learning_rate": 1.3870007417757529e-05, "loss": 0.7549, "step": 5128 }, { "epoch": 0.7848508033664882, "grad_norm": 2.049946768306998, "learning_rate": 1.3867722217396122e-05, "loss": 0.7674, "step": 5129 }, { "epoch": 0.7850038255547054, "grad_norm": 2.5063329923729554, "learning_rate": 1.3865436779502767e-05, "loss": 0.7467, "step": 5130 }, { "epoch": 0.7851568477429227, "grad_norm": 1.9886281059847166, "learning_rate": 1.3863151104217816e-05, "loss": 0.6673, "step": 5131 }, { "epoch": 0.78530986993114, "grad_norm": 2.1184066490210727, "learning_rate": 1.3860865191681639e-05, "loss": 0.6644, "step": 5132 }, { "epoch": 0.7854628921193573, "grad_norm": 2.2524087014750336, "learning_rate": 1.385857904203463e-05, "loss": 0.8056, "step": 5133 }, { "epoch": 0.7856159143075746, "grad_norm": 2.1941572669081326, "learning_rate": 1.3856292655417187e-05, "loss": 0.7362, "step": 5134 }, { "epoch": 0.7857689364957919, "grad_norm": 2.1730871002177836, "learning_rate": 1.3854006031969727e-05, "loss": 0.6781, "step": 5135 }, { "epoch": 0.7859219586840092, "grad_norm": 2.4598351761040256, "learning_rate": 1.3851719171832678e-05, "loss": 0.8042, "step": 5136 }, { "epoch": 0.7860749808722265, "grad_norm": 2.38153534428331, "learning_rate": 1.3849432075146485e-05, "loss": 0.7745, "step": 5137 }, { "epoch": 0.7862280030604437, "grad_norm": 2.1360826368406354, "learning_rate": 1.3847144742051613e-05, "loss": 0.8381, "step": 5138 }, { "epoch": 0.786381025248661, "grad_norm": 2.85778086002553, "learning_rate": 1.3844857172688531e-05, "loss": 0.8439, "step": 5139 }, { "epoch": 0.7865340474368784, "grad_norm": 2.2874345985423896, "learning_rate": 1.3842569367197726e-05, "loss": 0.7498, "step": 5140 }, { "epoch": 0.7866870696250956, "grad_norm": 2.132202408900261, "learning_rate": 1.3840281325719708e-05, "loss": 0.6593, "step": 5141 }, { "epoch": 0.7868400918133129, "grad_norm": 2.224486844047528, "learning_rate": 1.3837993048394988e-05, "loss": 0.7379, "step": 5142 }, { "epoch": 0.7869931140015303, "grad_norm": 2.2894622734424734, "learning_rate": 1.3835704535364103e-05, "loss": 0.7231, "step": 5143 }, { "epoch": 0.7871461361897475, "grad_norm": 2.100328372449939, "learning_rate": 1.3833415786767596e-05, "loss": 0.6338, "step": 5144 }, { "epoch": 0.7872991583779648, "grad_norm": 2.4355056805050217, "learning_rate": 1.3831126802746026e-05, "loss": 0.6756, "step": 5145 }, { "epoch": 0.787452180566182, "grad_norm": 2.2133365219845618, "learning_rate": 1.3828837583439975e-05, "loss": 0.7254, "step": 5146 }, { "epoch": 0.7876052027543994, "grad_norm": 2.286610066393195, "learning_rate": 1.3826548128990031e-05, "loss": 0.7471, "step": 5147 }, { "epoch": 0.7877582249426167, "grad_norm": 2.545738125909192, "learning_rate": 1.3824258439536793e-05, "loss": 0.7792, "step": 5148 }, { "epoch": 0.7879112471308339, "grad_norm": 2.1321283437860665, "learning_rate": 1.3821968515220885e-05, "loss": 0.6472, "step": 5149 }, { "epoch": 0.7880642693190513, "grad_norm": 2.7061865458512995, "learning_rate": 1.381967835618294e-05, "loss": 0.791, "step": 5150 }, { "epoch": 0.7882172915072686, "grad_norm": 2.5000426098798427, "learning_rate": 1.3817387962563605e-05, "loss": 0.7419, "step": 5151 }, { "epoch": 0.7883703136954858, "grad_norm": 2.1413176672534, "learning_rate": 1.381509733450354e-05, "loss": 0.6886, "step": 5152 }, { "epoch": 0.7885233358837032, "grad_norm": 2.0937281811946438, "learning_rate": 1.3812806472143423e-05, "loss": 0.6495, "step": 5153 }, { "epoch": 0.7886763580719204, "grad_norm": 2.350309442778104, "learning_rate": 1.3810515375623944e-05, "loss": 0.6501, "step": 5154 }, { "epoch": 0.7888293802601377, "grad_norm": 2.152716288815514, "learning_rate": 1.3808224045085812e-05, "loss": 0.7354, "step": 5155 }, { "epoch": 0.788982402448355, "grad_norm": 2.15154503852824, "learning_rate": 1.3805932480669739e-05, "loss": 0.7494, "step": 5156 }, { "epoch": 0.7891354246365723, "grad_norm": 2.0973581972487745, "learning_rate": 1.3803640682516466e-05, "loss": 0.7602, "step": 5157 }, { "epoch": 0.7892884468247896, "grad_norm": 2.711881599173585, "learning_rate": 1.3801348650766739e-05, "loss": 0.6601, "step": 5158 }, { "epoch": 0.7894414690130069, "grad_norm": 2.184034530375545, "learning_rate": 1.379905638556132e-05, "loss": 0.6898, "step": 5159 }, { "epoch": 0.7895944912012242, "grad_norm": 2.095943948257656, "learning_rate": 1.3796763887040987e-05, "loss": 0.7355, "step": 5160 }, { "epoch": 0.7897475133894415, "grad_norm": 2.2534986734403595, "learning_rate": 1.3794471155346529e-05, "loss": 0.7118, "step": 5161 }, { "epoch": 0.7899005355776587, "grad_norm": 2.1206432371445545, "learning_rate": 1.3792178190618754e-05, "loss": 0.6405, "step": 5162 }, { "epoch": 0.790053557765876, "grad_norm": 1.9695127305630176, "learning_rate": 1.3789884992998484e-05, "loss": 0.6778, "step": 5163 }, { "epoch": 0.7902065799540934, "grad_norm": 2.3350051931180227, "learning_rate": 1.3787591562626545e-05, "loss": 0.8381, "step": 5164 }, { "epoch": 0.7903596021423106, "grad_norm": 2.52231424297307, "learning_rate": 1.3785297899643797e-05, "loss": 0.8362, "step": 5165 }, { "epoch": 0.7905126243305279, "grad_norm": 2.1652917778433753, "learning_rate": 1.3783004004191095e-05, "loss": 0.825, "step": 5166 }, { "epoch": 0.7906656465187453, "grad_norm": 2.156387877386113, "learning_rate": 1.3780709876409315e-05, "loss": 0.5602, "step": 5167 }, { "epoch": 0.7908186687069625, "grad_norm": 2.42839264979406, "learning_rate": 1.3778415516439352e-05, "loss": 0.7292, "step": 5168 }, { "epoch": 0.7909716908951798, "grad_norm": 2.4513243449235134, "learning_rate": 1.3776120924422114e-05, "loss": 0.8545, "step": 5169 }, { "epoch": 0.791124713083397, "grad_norm": 2.2296488916634756, "learning_rate": 1.3773826100498512e-05, "loss": 0.641, "step": 5170 }, { "epoch": 0.7912777352716144, "grad_norm": 2.2384319518642677, "learning_rate": 1.377153104480949e-05, "loss": 0.7482, "step": 5171 }, { "epoch": 0.7914307574598317, "grad_norm": 2.2378257415185385, "learning_rate": 1.3769235757495994e-05, "loss": 0.7268, "step": 5172 }, { "epoch": 0.7915837796480489, "grad_norm": 2.1791575768084375, "learning_rate": 1.3766940238698983e-05, "loss": 0.6738, "step": 5173 }, { "epoch": 0.7917368018362663, "grad_norm": 2.214967735911992, "learning_rate": 1.3764644488559433e-05, "loss": 0.6761, "step": 5174 }, { "epoch": 0.7918898240244836, "grad_norm": 2.0880221650456083, "learning_rate": 1.3762348507218342e-05, "loss": 0.701, "step": 5175 }, { "epoch": 0.7920428462127008, "grad_norm": 1.9249158885011985, "learning_rate": 1.3760052294816708e-05, "loss": 0.6566, "step": 5176 }, { "epoch": 0.7921958684009182, "grad_norm": 2.304539470343536, "learning_rate": 1.3757755851495553e-05, "loss": 0.7316, "step": 5177 }, { "epoch": 0.7923488905891354, "grad_norm": 2.040611815803622, "learning_rate": 1.3755459177395911e-05, "loss": 0.7166, "step": 5178 }, { "epoch": 0.7925019127773527, "grad_norm": 2.3706391834263942, "learning_rate": 1.3753162272658832e-05, "loss": 0.6723, "step": 5179 }, { "epoch": 0.79265493496557, "grad_norm": 2.1589457526111877, "learning_rate": 1.3750865137425371e-05, "loss": 0.7957, "step": 5180 }, { "epoch": 0.7928079571537873, "grad_norm": 2.357150090202171, "learning_rate": 1.3748567771836612e-05, "loss": 0.7333, "step": 5181 }, { "epoch": 0.7929609793420046, "grad_norm": 2.035270416220051, "learning_rate": 1.374627017603364e-05, "loss": 0.6303, "step": 5182 }, { "epoch": 0.7931140015302219, "grad_norm": 2.310334406515395, "learning_rate": 1.374397235015756e-05, "loss": 0.6799, "step": 5183 }, { "epoch": 0.7932670237184392, "grad_norm": 2.5329925277281378, "learning_rate": 1.3741674294349494e-05, "loss": 0.7163, "step": 5184 }, { "epoch": 0.7934200459066565, "grad_norm": 2.3209415935722078, "learning_rate": 1.373937600875057e-05, "loss": 0.6513, "step": 5185 }, { "epoch": 0.7935730680948737, "grad_norm": 2.2392121898655044, "learning_rate": 1.3737077493501939e-05, "loss": 0.6232, "step": 5186 }, { "epoch": 0.793726090283091, "grad_norm": 2.053324943153721, "learning_rate": 1.373477874874476e-05, "loss": 0.6207, "step": 5187 }, { "epoch": 0.7938791124713084, "grad_norm": 2.3153062662700026, "learning_rate": 1.3732479774620206e-05, "loss": 0.7312, "step": 5188 }, { "epoch": 0.7940321346595256, "grad_norm": 2.202811664872602, "learning_rate": 1.3730180571269465e-05, "loss": 0.736, "step": 5189 }, { "epoch": 0.7941851568477429, "grad_norm": 2.095998770625852, "learning_rate": 1.3727881138833746e-05, "loss": 0.6731, "step": 5190 }, { "epoch": 0.7943381790359603, "grad_norm": 1.841345240252431, "learning_rate": 1.3725581477454262e-05, "loss": 0.5946, "step": 5191 }, { "epoch": 0.7944912012241775, "grad_norm": 2.390734382027951, "learning_rate": 1.3723281587272243e-05, "loss": 0.6396, "step": 5192 }, { "epoch": 0.7946442234123948, "grad_norm": 2.2841584319362758, "learning_rate": 1.3720981468428938e-05, "loss": 0.7108, "step": 5193 }, { "epoch": 0.794797245600612, "grad_norm": 2.3818717367728075, "learning_rate": 1.3718681121065605e-05, "loss": 0.7718, "step": 5194 }, { "epoch": 0.7949502677888294, "grad_norm": 2.5859476893129165, "learning_rate": 1.3716380545323516e-05, "loss": 0.7855, "step": 5195 }, { "epoch": 0.7951032899770467, "grad_norm": 2.3940293661363654, "learning_rate": 1.371407974134396e-05, "loss": 0.7146, "step": 5196 }, { "epoch": 0.7952563121652639, "grad_norm": 2.2903896079503316, "learning_rate": 1.3711778709268235e-05, "loss": 0.7184, "step": 5197 }, { "epoch": 0.7954093343534813, "grad_norm": 2.01581293118149, "learning_rate": 1.3709477449237661e-05, "loss": 0.6431, "step": 5198 }, { "epoch": 0.7955623565416986, "grad_norm": 1.8836934333964197, "learning_rate": 1.3707175961393564e-05, "loss": 0.616, "step": 5199 }, { "epoch": 0.7957153787299158, "grad_norm": 2.2744019198062815, "learning_rate": 1.370487424587729e-05, "loss": 0.6745, "step": 5200 }, { "epoch": 0.7958684009181332, "grad_norm": 2.117675246910853, "learning_rate": 1.3702572302830194e-05, "loss": 0.6382, "step": 5201 }, { "epoch": 0.7960214231063504, "grad_norm": 2.125438352072235, "learning_rate": 1.370027013239365e-05, "loss": 0.6448, "step": 5202 }, { "epoch": 0.7961744452945677, "grad_norm": 2.174518702017577, "learning_rate": 1.369796773470904e-05, "loss": 0.5696, "step": 5203 }, { "epoch": 0.796327467482785, "grad_norm": 2.6649237891451216, "learning_rate": 1.3695665109917764e-05, "loss": 0.7357, "step": 5204 }, { "epoch": 0.7964804896710023, "grad_norm": 2.130917757031721, "learning_rate": 1.3693362258161239e-05, "loss": 0.7103, "step": 5205 }, { "epoch": 0.7966335118592196, "grad_norm": 2.308278946221109, "learning_rate": 1.3691059179580888e-05, "loss": 0.7996, "step": 5206 }, { "epoch": 0.7967865340474368, "grad_norm": 2.2292739591762567, "learning_rate": 1.3688755874318154e-05, "loss": 0.7414, "step": 5207 }, { "epoch": 0.7969395562356542, "grad_norm": 2.3250780563176474, "learning_rate": 1.3686452342514486e-05, "loss": 0.6129, "step": 5208 }, { "epoch": 0.7970925784238715, "grad_norm": 2.0736440654210644, "learning_rate": 1.3684148584311365e-05, "loss": 0.6746, "step": 5209 }, { "epoch": 0.7972456006120887, "grad_norm": 2.3603589942528678, "learning_rate": 1.3681844599850265e-05, "loss": 0.7034, "step": 5210 }, { "epoch": 0.797398622800306, "grad_norm": 2.349702397627265, "learning_rate": 1.3679540389272683e-05, "loss": 0.6011, "step": 5211 }, { "epoch": 0.7975516449885234, "grad_norm": 2.366788617289836, "learning_rate": 1.3677235952720132e-05, "loss": 0.6788, "step": 5212 }, { "epoch": 0.7977046671767406, "grad_norm": 2.0913280489885233, "learning_rate": 1.3674931290334137e-05, "loss": 0.6981, "step": 5213 }, { "epoch": 0.7978576893649579, "grad_norm": 2.5304173735852045, "learning_rate": 1.3672626402256233e-05, "loss": 0.7291, "step": 5214 }, { "epoch": 0.7980107115531752, "grad_norm": 2.3922657289236673, "learning_rate": 1.3670321288627975e-05, "loss": 0.7516, "step": 5215 }, { "epoch": 0.7981637337413925, "grad_norm": 2.198486199079782, "learning_rate": 1.3668015949590929e-05, "loss": 0.6851, "step": 5216 }, { "epoch": 0.7983167559296098, "grad_norm": 2.3466456024492053, "learning_rate": 1.3665710385286674e-05, "loss": 0.6804, "step": 5217 }, { "epoch": 0.798469778117827, "grad_norm": 2.166891644272971, "learning_rate": 1.3663404595856804e-05, "loss": 0.7362, "step": 5218 }, { "epoch": 0.7986228003060444, "grad_norm": 2.3442111181100946, "learning_rate": 1.3661098581442924e-05, "loss": 0.7576, "step": 5219 }, { "epoch": 0.7987758224942617, "grad_norm": 2.2049727285082357, "learning_rate": 1.3658792342186662e-05, "loss": 0.632, "step": 5220 }, { "epoch": 0.7989288446824789, "grad_norm": 2.4747037218029666, "learning_rate": 1.3656485878229646e-05, "loss": 0.8662, "step": 5221 }, { "epoch": 0.7990818668706963, "grad_norm": 2.154683342466119, "learning_rate": 1.365417918971353e-05, "loss": 0.7382, "step": 5222 }, { "epoch": 0.7992348890589135, "grad_norm": 2.3297008480161994, "learning_rate": 1.3651872276779975e-05, "loss": 0.7879, "step": 5223 }, { "epoch": 0.7993879112471308, "grad_norm": 2.678458704148208, "learning_rate": 1.3649565139570653e-05, "loss": 0.8128, "step": 5224 }, { "epoch": 0.7995409334353482, "grad_norm": 2.4406095933985035, "learning_rate": 1.3647257778227263e-05, "loss": 0.6893, "step": 5225 }, { "epoch": 0.7996939556235654, "grad_norm": 2.533590774082434, "learning_rate": 1.3644950192891502e-05, "loss": 0.8111, "step": 5226 }, { "epoch": 0.7998469778117827, "grad_norm": 2.1737368302955056, "learning_rate": 1.3642642383705092e-05, "loss": 0.7153, "step": 5227 }, { "epoch": 0.8, "grad_norm": 2.033682378494723, "learning_rate": 1.3640334350809763e-05, "loss": 0.7538, "step": 5228 }, { "epoch": 0.8001530221882173, "grad_norm": 2.2980150041237635, "learning_rate": 1.3638026094347261e-05, "loss": 0.7585, "step": 5229 }, { "epoch": 0.8003060443764346, "grad_norm": 2.139295198153623, "learning_rate": 1.3635717614459342e-05, "loss": 0.6941, "step": 5230 }, { "epoch": 0.8004590665646518, "grad_norm": 2.331590755207613, "learning_rate": 1.3633408911287785e-05, "loss": 0.7219, "step": 5231 }, { "epoch": 0.8006120887528692, "grad_norm": 2.132296410347176, "learning_rate": 1.363109998497437e-05, "loss": 0.742, "step": 5232 }, { "epoch": 0.8007651109410865, "grad_norm": 2.2753739670012743, "learning_rate": 1.3628790835660901e-05, "loss": 0.6547, "step": 5233 }, { "epoch": 0.8009181331293037, "grad_norm": 2.213855353645808, "learning_rate": 1.3626481463489189e-05, "loss": 0.6624, "step": 5234 }, { "epoch": 0.801071155317521, "grad_norm": 2.3897025233653797, "learning_rate": 1.3624171868601066e-05, "loss": 0.7068, "step": 5235 }, { "epoch": 0.8012241775057384, "grad_norm": 2.161798259768198, "learning_rate": 1.3621862051138368e-05, "loss": 0.6786, "step": 5236 }, { "epoch": 0.8013771996939556, "grad_norm": 2.063352788797973, "learning_rate": 1.3619552011242956e-05, "loss": 0.6605, "step": 5237 }, { "epoch": 0.8015302218821729, "grad_norm": 2.191978633874912, "learning_rate": 1.3617241749056693e-05, "loss": 0.6603, "step": 5238 }, { "epoch": 0.8016832440703902, "grad_norm": 2.236976470780728, "learning_rate": 1.3614931264721464e-05, "loss": 0.7457, "step": 5239 }, { "epoch": 0.8018362662586075, "grad_norm": 2.348713734882274, "learning_rate": 1.3612620558379164e-05, "loss": 0.788, "step": 5240 }, { "epoch": 0.8019892884468248, "grad_norm": 2.1540503355147136, "learning_rate": 1.3610309630171703e-05, "loss": 0.6961, "step": 5241 }, { "epoch": 0.802142310635042, "grad_norm": 2.2481983626353297, "learning_rate": 1.3607998480241005e-05, "loss": 0.7852, "step": 5242 }, { "epoch": 0.8022953328232594, "grad_norm": 2.223866399302239, "learning_rate": 1.3605687108729005e-05, "loss": 0.7054, "step": 5243 }, { "epoch": 0.8024483550114767, "grad_norm": 2.2526593082756587, "learning_rate": 1.3603375515777654e-05, "loss": 0.7195, "step": 5244 }, { "epoch": 0.8026013771996939, "grad_norm": 2.1762612602974545, "learning_rate": 1.3601063701528916e-05, "loss": 0.7121, "step": 5245 }, { "epoch": 0.8027543993879113, "grad_norm": 2.19828314276474, "learning_rate": 1.359875166612477e-05, "loss": 0.6465, "step": 5246 }, { "epoch": 0.8029074215761285, "grad_norm": 2.1003393586466186, "learning_rate": 1.3596439409707205e-05, "loss": 0.6349, "step": 5247 }, { "epoch": 0.8030604437643458, "grad_norm": 2.481715046553168, "learning_rate": 1.3594126932418226e-05, "loss": 0.7672, "step": 5248 }, { "epoch": 0.8032134659525632, "grad_norm": 2.055351929826298, "learning_rate": 1.359181423439985e-05, "loss": 0.6777, "step": 5249 }, { "epoch": 0.8033664881407804, "grad_norm": 1.9408465558535808, "learning_rate": 1.3589501315794115e-05, "loss": 0.5616, "step": 5250 }, { "epoch": 0.8035195103289977, "grad_norm": 2.168856933193797, "learning_rate": 1.358718817674306e-05, "loss": 0.7286, "step": 5251 }, { "epoch": 0.803672532517215, "grad_norm": 2.2388441038555706, "learning_rate": 1.3584874817388744e-05, "loss": 0.6583, "step": 5252 }, { "epoch": 0.8038255547054323, "grad_norm": 2.638753746731687, "learning_rate": 1.3582561237873244e-05, "loss": 0.7871, "step": 5253 }, { "epoch": 0.8039785768936496, "grad_norm": 2.173253012550754, "learning_rate": 1.3580247438338643e-05, "loss": 0.6701, "step": 5254 }, { "epoch": 0.8041315990818668, "grad_norm": 2.373102282978098, "learning_rate": 1.3577933418927039e-05, "loss": 0.8085, "step": 5255 }, { "epoch": 0.8042846212700842, "grad_norm": 2.1330407791676613, "learning_rate": 1.3575619179780549e-05, "loss": 0.747, "step": 5256 }, { "epoch": 0.8044376434583015, "grad_norm": 2.1281332913564484, "learning_rate": 1.3573304721041294e-05, "loss": 0.7252, "step": 5257 }, { "epoch": 0.8045906656465187, "grad_norm": 2.1458569602813062, "learning_rate": 1.3570990042851419e-05, "loss": 0.7456, "step": 5258 }, { "epoch": 0.804743687834736, "grad_norm": 2.3701157521990894, "learning_rate": 1.3568675145353076e-05, "loss": 0.8018, "step": 5259 }, { "epoch": 0.8048967100229534, "grad_norm": 2.478264328190266, "learning_rate": 1.3566360028688432e-05, "loss": 0.7114, "step": 5260 }, { "epoch": 0.8050497322111706, "grad_norm": 2.2845510559676074, "learning_rate": 1.3564044692999667e-05, "loss": 0.7534, "step": 5261 }, { "epoch": 0.8052027543993879, "grad_norm": 2.0041111447825783, "learning_rate": 1.3561729138428974e-05, "loss": 0.6483, "step": 5262 }, { "epoch": 0.8053557765876052, "grad_norm": 2.419128035536536, "learning_rate": 1.3559413365118563e-05, "loss": 0.6902, "step": 5263 }, { "epoch": 0.8055087987758225, "grad_norm": 2.154837398243689, "learning_rate": 1.3557097373210651e-05, "loss": 0.7418, "step": 5264 }, { "epoch": 0.8056618209640398, "grad_norm": 2.6319673770565983, "learning_rate": 1.3554781162847472e-05, "loss": 0.7431, "step": 5265 }, { "epoch": 0.805814843152257, "grad_norm": 2.1323745296359387, "learning_rate": 1.355246473417128e-05, "loss": 0.6952, "step": 5266 }, { "epoch": 0.8059678653404744, "grad_norm": 2.1242915789939865, "learning_rate": 1.3550148087324329e-05, "loss": 0.7627, "step": 5267 }, { "epoch": 0.8061208875286917, "grad_norm": 2.72482323781392, "learning_rate": 1.3547831222448893e-05, "loss": 0.789, "step": 5268 }, { "epoch": 0.8062739097169089, "grad_norm": 2.357414573913534, "learning_rate": 1.3545514139687264e-05, "loss": 0.7448, "step": 5269 }, { "epoch": 0.8064269319051263, "grad_norm": 2.169457925241073, "learning_rate": 1.3543196839181745e-05, "loss": 0.6584, "step": 5270 }, { "epoch": 0.8065799540933435, "grad_norm": 2.036547827305924, "learning_rate": 1.3540879321074642e-05, "loss": 0.5961, "step": 5271 }, { "epoch": 0.8067329762815608, "grad_norm": 2.3964724268378363, "learning_rate": 1.3538561585508292e-05, "loss": 0.8957, "step": 5272 }, { "epoch": 0.8068859984697782, "grad_norm": 2.3452218195555137, "learning_rate": 1.353624363262503e-05, "loss": 0.6594, "step": 5273 }, { "epoch": 0.8070390206579954, "grad_norm": 2.1038872850157735, "learning_rate": 1.3533925462567213e-05, "loss": 0.6078, "step": 5274 }, { "epoch": 0.8071920428462127, "grad_norm": 2.356954054341592, "learning_rate": 1.3531607075477209e-05, "loss": 0.7797, "step": 5275 }, { "epoch": 0.80734506503443, "grad_norm": 2.0115247230251816, "learning_rate": 1.3529288471497399e-05, "loss": 0.5769, "step": 5276 }, { "epoch": 0.8074980872226473, "grad_norm": 2.276904775969954, "learning_rate": 1.3526969650770175e-05, "loss": 0.7503, "step": 5277 }, { "epoch": 0.8076511094108646, "grad_norm": 2.167203694480653, "learning_rate": 1.3524650613437948e-05, "loss": 0.6904, "step": 5278 }, { "epoch": 0.8078041315990818, "grad_norm": 1.9616634696078104, "learning_rate": 1.3522331359643141e-05, "loss": 0.6396, "step": 5279 }, { "epoch": 0.8079571537872992, "grad_norm": 2.4533852708261406, "learning_rate": 1.3520011889528185e-05, "loss": 0.7826, "step": 5280 }, { "epoch": 0.8081101759755165, "grad_norm": 2.5398426188480645, "learning_rate": 1.3517692203235526e-05, "loss": 0.7062, "step": 5281 }, { "epoch": 0.8082631981637337, "grad_norm": 2.0377247734089594, "learning_rate": 1.351537230090763e-05, "loss": 0.6129, "step": 5282 }, { "epoch": 0.808416220351951, "grad_norm": 2.5926738747286517, "learning_rate": 1.3513052182686968e-05, "loss": 0.7073, "step": 5283 }, { "epoch": 0.8085692425401684, "grad_norm": 2.104975770665454, "learning_rate": 1.3510731848716028e-05, "loss": 0.8069, "step": 5284 }, { "epoch": 0.8087222647283856, "grad_norm": 2.1377050518059746, "learning_rate": 1.350841129913731e-05, "loss": 0.717, "step": 5285 }, { "epoch": 0.8088752869166029, "grad_norm": 2.2039599414867874, "learning_rate": 1.350609053409333e-05, "loss": 0.6571, "step": 5286 }, { "epoch": 0.8090283091048202, "grad_norm": 2.1174602800748397, "learning_rate": 1.3503769553726616e-05, "loss": 0.6212, "step": 5287 }, { "epoch": 0.8091813312930375, "grad_norm": 2.547319193448088, "learning_rate": 1.3501448358179705e-05, "loss": 0.8006, "step": 5288 }, { "epoch": 0.8093343534812548, "grad_norm": 2.3880553798277693, "learning_rate": 1.3499126947595154e-05, "loss": 0.6515, "step": 5289 }, { "epoch": 0.809487375669472, "grad_norm": 2.220555933056926, "learning_rate": 1.3496805322115525e-05, "loss": 0.7402, "step": 5290 }, { "epoch": 0.8096403978576894, "grad_norm": 2.1418695856415404, "learning_rate": 1.3494483481883408e-05, "loss": 0.6952, "step": 5291 }, { "epoch": 0.8097934200459067, "grad_norm": 2.280869004587261, "learning_rate": 1.3492161427041385e-05, "loss": 0.6293, "step": 5292 }, { "epoch": 0.8099464422341239, "grad_norm": 2.1942676446078466, "learning_rate": 1.3489839157732067e-05, "loss": 0.6146, "step": 5293 }, { "epoch": 0.8100994644223413, "grad_norm": 1.964131045316613, "learning_rate": 1.3487516674098076e-05, "loss": 0.6257, "step": 5294 }, { "epoch": 0.8102524866105585, "grad_norm": 2.192523042159634, "learning_rate": 1.3485193976282045e-05, "loss": 0.7152, "step": 5295 }, { "epoch": 0.8104055087987758, "grad_norm": 2.2224555260794325, "learning_rate": 1.3482871064426612e-05, "loss": 0.7834, "step": 5296 }, { "epoch": 0.8105585309869932, "grad_norm": 2.3486851929486092, "learning_rate": 1.3480547938674446e-05, "loss": 0.7778, "step": 5297 }, { "epoch": 0.8107115531752104, "grad_norm": 2.066083356496167, "learning_rate": 1.3478224599168215e-05, "loss": 0.6438, "step": 5298 }, { "epoch": 0.8108645753634277, "grad_norm": 2.172729196692573, "learning_rate": 1.3475901046050603e-05, "loss": 0.7957, "step": 5299 }, { "epoch": 0.811017597551645, "grad_norm": 2.0121964473406675, "learning_rate": 1.3473577279464309e-05, "loss": 0.7376, "step": 5300 }, { "epoch": 0.8111706197398623, "grad_norm": 2.5865207720030368, "learning_rate": 1.3471253299552049e-05, "loss": 0.7715, "step": 5301 }, { "epoch": 0.8113236419280796, "grad_norm": 2.2695859310630047, "learning_rate": 1.3468929106456543e-05, "loss": 0.7316, "step": 5302 }, { "epoch": 0.8114766641162968, "grad_norm": 2.212371993004032, "learning_rate": 1.3466604700320529e-05, "loss": 0.8111, "step": 5303 }, { "epoch": 0.8116296863045142, "grad_norm": 2.7667086132112297, "learning_rate": 1.346428008128676e-05, "loss": 0.7738, "step": 5304 }, { "epoch": 0.8117827084927315, "grad_norm": 2.242386698700782, "learning_rate": 1.3461955249497998e-05, "loss": 0.6502, "step": 5305 }, { "epoch": 0.8119357306809487, "grad_norm": 2.3940374260343606, "learning_rate": 1.3459630205097018e-05, "loss": 0.6801, "step": 5306 }, { "epoch": 0.812088752869166, "grad_norm": 2.4788937521650114, "learning_rate": 1.345730494822662e-05, "loss": 0.6994, "step": 5307 }, { "epoch": 0.8122417750573834, "grad_norm": 2.135678351797468, "learning_rate": 1.3454979479029595e-05, "loss": 0.7326, "step": 5308 }, { "epoch": 0.8123947972456006, "grad_norm": 2.380508428475471, "learning_rate": 1.3452653797648765e-05, "loss": 0.7547, "step": 5309 }, { "epoch": 0.8125478194338179, "grad_norm": 2.292228512939073, "learning_rate": 1.3450327904226956e-05, "loss": 0.7265, "step": 5310 }, { "epoch": 0.8127008416220352, "grad_norm": 2.6999034724742144, "learning_rate": 1.3448001798907017e-05, "loss": 0.6915, "step": 5311 }, { "epoch": 0.8128538638102525, "grad_norm": 2.277333877537579, "learning_rate": 1.3445675481831798e-05, "loss": 0.61, "step": 5312 }, { "epoch": 0.8130068859984698, "grad_norm": 2.179731789967011, "learning_rate": 1.3443348953144166e-05, "loss": 0.6815, "step": 5313 }, { "epoch": 0.813159908186687, "grad_norm": 2.1453547823586185, "learning_rate": 1.3441022212987008e-05, "loss": 0.6212, "step": 5314 }, { "epoch": 0.8133129303749044, "grad_norm": 2.0990096504871953, "learning_rate": 1.343869526150321e-05, "loss": 0.621, "step": 5315 }, { "epoch": 0.8134659525631217, "grad_norm": 2.181671391425356, "learning_rate": 1.3436368098835689e-05, "loss": 0.612, "step": 5316 }, { "epoch": 0.8136189747513389, "grad_norm": 2.16318928001326, "learning_rate": 1.3434040725127355e-05, "loss": 0.7547, "step": 5317 }, { "epoch": 0.8137719969395563, "grad_norm": 2.5676933895963674, "learning_rate": 1.3431713140521146e-05, "loss": 0.9013, "step": 5318 }, { "epoch": 0.8139250191277735, "grad_norm": 2.342905564848808, "learning_rate": 1.3429385345160012e-05, "loss": 0.7104, "step": 5319 }, { "epoch": 0.8140780413159908, "grad_norm": 2.142110911327244, "learning_rate": 1.3427057339186906e-05, "loss": 0.7011, "step": 5320 }, { "epoch": 0.8142310635042082, "grad_norm": 2.2067029947839423, "learning_rate": 1.34247291227448e-05, "loss": 0.6377, "step": 5321 }, { "epoch": 0.8143840856924254, "grad_norm": 2.2040511888427496, "learning_rate": 1.3422400695976685e-05, "loss": 0.5819, "step": 5322 }, { "epoch": 0.8145371078806427, "grad_norm": 2.1632943457971745, "learning_rate": 1.3420072059025552e-05, "loss": 0.6302, "step": 5323 }, { "epoch": 0.8146901300688599, "grad_norm": 2.2258275335537303, "learning_rate": 1.3417743212034415e-05, "loss": 0.8637, "step": 5324 }, { "epoch": 0.8148431522570773, "grad_norm": 2.3153241322316, "learning_rate": 1.3415414155146299e-05, "loss": 0.8324, "step": 5325 }, { "epoch": 0.8149961744452946, "grad_norm": 2.3974769360130863, "learning_rate": 1.3413084888504236e-05, "loss": 0.755, "step": 5326 }, { "epoch": 0.8151491966335118, "grad_norm": 2.2510661996362904, "learning_rate": 1.341075541225128e-05, "loss": 0.6549, "step": 5327 }, { "epoch": 0.8153022188217292, "grad_norm": 2.279833694007899, "learning_rate": 1.340842572653049e-05, "loss": 0.7515, "step": 5328 }, { "epoch": 0.8154552410099465, "grad_norm": 2.2168201833394465, "learning_rate": 1.3406095831484943e-05, "loss": 0.748, "step": 5329 }, { "epoch": 0.8156082631981637, "grad_norm": 2.302319672441321, "learning_rate": 1.3403765727257724e-05, "loss": 0.7128, "step": 5330 }, { "epoch": 0.815761285386381, "grad_norm": 2.3865943289140295, "learning_rate": 1.3401435413991936e-05, "loss": 0.7773, "step": 5331 }, { "epoch": 0.8159143075745983, "grad_norm": 2.2644122607612918, "learning_rate": 1.3399104891830698e-05, "loss": 0.6749, "step": 5332 }, { "epoch": 0.8160673297628156, "grad_norm": 2.220283646827807, "learning_rate": 1.3396774160917128e-05, "loss": 0.6206, "step": 5333 }, { "epoch": 0.8162203519510329, "grad_norm": 2.2493947518081145, "learning_rate": 1.3394443221394366e-05, "loss": 0.743, "step": 5334 }, { "epoch": 0.8163733741392502, "grad_norm": 2.4348049499278828, "learning_rate": 1.339211207340557e-05, "loss": 0.8518, "step": 5335 }, { "epoch": 0.8165263963274675, "grad_norm": 2.2335021156921036, "learning_rate": 1.33897807170939e-05, "loss": 0.8099, "step": 5336 }, { "epoch": 0.8166794185156848, "grad_norm": 2.2602615444390515, "learning_rate": 1.3387449152602536e-05, "loss": 0.744, "step": 5337 }, { "epoch": 0.816832440703902, "grad_norm": 2.483487434991995, "learning_rate": 1.3385117380074668e-05, "loss": 0.6676, "step": 5338 }, { "epoch": 0.8169854628921194, "grad_norm": 2.252465139032877, "learning_rate": 1.3382785399653498e-05, "loss": 0.6504, "step": 5339 }, { "epoch": 0.8171384850803366, "grad_norm": 2.1653703088602487, "learning_rate": 1.3380453211482243e-05, "loss": 0.6743, "step": 5340 }, { "epoch": 0.8172915072685539, "grad_norm": 2.36517463966179, "learning_rate": 1.3378120815704132e-05, "loss": 0.7109, "step": 5341 }, { "epoch": 0.8174445294567713, "grad_norm": 2.15790281403125, "learning_rate": 1.3375788212462408e-05, "loss": 0.6949, "step": 5342 }, { "epoch": 0.8175975516449885, "grad_norm": 2.0850769500930286, "learning_rate": 1.3373455401900323e-05, "loss": 0.6332, "step": 5343 }, { "epoch": 0.8177505738332058, "grad_norm": 2.1616905772965547, "learning_rate": 1.3371122384161144e-05, "loss": 0.7172, "step": 5344 }, { "epoch": 0.8179035960214232, "grad_norm": 2.4995256490816633, "learning_rate": 1.3368789159388151e-05, "loss": 0.8572, "step": 5345 }, { "epoch": 0.8180566182096404, "grad_norm": 2.178755972070649, "learning_rate": 1.3366455727724638e-05, "loss": 0.6471, "step": 5346 }, { "epoch": 0.8182096403978577, "grad_norm": 2.061965745724041, "learning_rate": 1.3364122089313907e-05, "loss": 0.6209, "step": 5347 }, { "epoch": 0.8183626625860749, "grad_norm": 2.0978327032670046, "learning_rate": 1.3361788244299281e-05, "loss": 0.6965, "step": 5348 }, { "epoch": 0.8185156847742923, "grad_norm": 2.1655873920099435, "learning_rate": 1.3359454192824088e-05, "loss": 0.7474, "step": 5349 }, { "epoch": 0.8186687069625096, "grad_norm": 2.1707296467790003, "learning_rate": 1.3357119935031668e-05, "loss": 0.6475, "step": 5350 }, { "epoch": 0.8188217291507268, "grad_norm": 2.4511219553404624, "learning_rate": 1.3354785471065382e-05, "loss": 0.7043, "step": 5351 }, { "epoch": 0.8189747513389442, "grad_norm": 2.5976694677220986, "learning_rate": 1.3352450801068595e-05, "loss": 0.86, "step": 5352 }, { "epoch": 0.8191277735271615, "grad_norm": 2.395094174010329, "learning_rate": 1.3350115925184688e-05, "loss": 0.7383, "step": 5353 }, { "epoch": 0.8192807957153787, "grad_norm": 2.3064301975807098, "learning_rate": 1.334778084355706e-05, "loss": 0.7484, "step": 5354 }, { "epoch": 0.819433817903596, "grad_norm": 2.2933955948779174, "learning_rate": 1.3345445556329111e-05, "loss": 0.6784, "step": 5355 }, { "epoch": 0.8195868400918133, "grad_norm": 2.2922480591575276, "learning_rate": 1.334311006364426e-05, "loss": 0.793, "step": 5356 }, { "epoch": 0.8197398622800306, "grad_norm": 2.157361043385925, "learning_rate": 1.3340774365645948e-05, "loss": 0.649, "step": 5357 }, { "epoch": 0.8198928844682479, "grad_norm": 2.4028534857255877, "learning_rate": 1.333843846247761e-05, "loss": 0.7454, "step": 5358 }, { "epoch": 0.8200459066564652, "grad_norm": 2.2779711254618897, "learning_rate": 1.3336102354282706e-05, "loss": 0.6664, "step": 5359 }, { "epoch": 0.8201989288446825, "grad_norm": 2.1582367759018704, "learning_rate": 1.3333766041204705e-05, "loss": 0.6144, "step": 5360 }, { "epoch": 0.8203519510328998, "grad_norm": 2.302800374343068, "learning_rate": 1.3331429523387091e-05, "loss": 0.5859, "step": 5361 }, { "epoch": 0.820504973221117, "grad_norm": 2.1501206310043766, "learning_rate": 1.3329092800973356e-05, "loss": 0.6121, "step": 5362 }, { "epoch": 0.8206579954093344, "grad_norm": 2.3631056301619995, "learning_rate": 1.332675587410701e-05, "loss": 0.7051, "step": 5363 }, { "epoch": 0.8208110175975516, "grad_norm": 2.3421798133262715, "learning_rate": 1.3324418742931568e-05, "loss": 0.6834, "step": 5364 }, { "epoch": 0.8209640397857689, "grad_norm": 2.438015095671974, "learning_rate": 1.332208140759057e-05, "loss": 0.686, "step": 5365 }, { "epoch": 0.8211170619739863, "grad_norm": 2.3411328591375584, "learning_rate": 1.3319743868227555e-05, "loss": 0.7772, "step": 5366 }, { "epoch": 0.8212700841622035, "grad_norm": 2.4243773349249, "learning_rate": 1.3317406124986083e-05, "loss": 0.7733, "step": 5367 }, { "epoch": 0.8214231063504208, "grad_norm": 2.6127991053589907, "learning_rate": 1.3315068178009722e-05, "loss": 0.611, "step": 5368 }, { "epoch": 0.8215761285386382, "grad_norm": 2.3139864560744186, "learning_rate": 1.3312730027442057e-05, "loss": 0.7795, "step": 5369 }, { "epoch": 0.8217291507268554, "grad_norm": 2.3885664294242295, "learning_rate": 1.3310391673426681e-05, "loss": 0.7107, "step": 5370 }, { "epoch": 0.8218821729150727, "grad_norm": 2.4219894732303304, "learning_rate": 1.3308053116107202e-05, "loss": 0.6879, "step": 5371 }, { "epoch": 0.8220351951032899, "grad_norm": 2.5090665576091546, "learning_rate": 1.3305714355627237e-05, "loss": 0.7902, "step": 5372 }, { "epoch": 0.8221882172915073, "grad_norm": 2.470907543441836, "learning_rate": 1.3303375392130427e-05, "loss": 0.6743, "step": 5373 }, { "epoch": 0.8223412394797246, "grad_norm": 1.9891713032992344, "learning_rate": 1.3301036225760408e-05, "loss": 0.5842, "step": 5374 }, { "epoch": 0.8224942616679418, "grad_norm": 2.1719841885826554, "learning_rate": 1.329869685666084e-05, "loss": 0.7156, "step": 5375 }, { "epoch": 0.8226472838561592, "grad_norm": 2.6093916724723134, "learning_rate": 1.3296357284975395e-05, "loss": 0.8111, "step": 5376 }, { "epoch": 0.8228003060443765, "grad_norm": 2.348866234796436, "learning_rate": 1.3294017510847754e-05, "loss": 0.7498, "step": 5377 }, { "epoch": 0.8229533282325937, "grad_norm": 2.4779202227153503, "learning_rate": 1.3291677534421608e-05, "loss": 0.7308, "step": 5378 }, { "epoch": 0.823106350420811, "grad_norm": 2.439470281437543, "learning_rate": 1.328933735584067e-05, "loss": 0.8282, "step": 5379 }, { "epoch": 0.8232593726090283, "grad_norm": 1.9490028637280303, "learning_rate": 1.3286996975248659e-05, "loss": 0.5773, "step": 5380 }, { "epoch": 0.8234123947972456, "grad_norm": 2.17675951263559, "learning_rate": 1.3284656392789301e-05, "loss": 0.6001, "step": 5381 }, { "epoch": 0.8235654169854629, "grad_norm": 2.5997970873418765, "learning_rate": 1.3282315608606347e-05, "loss": 0.6896, "step": 5382 }, { "epoch": 0.8237184391736802, "grad_norm": 2.1650822698710694, "learning_rate": 1.327997462284355e-05, "loss": 0.624, "step": 5383 }, { "epoch": 0.8238714613618975, "grad_norm": 2.1112801781705475, "learning_rate": 1.3277633435644681e-05, "loss": 0.6039, "step": 5384 }, { "epoch": 0.8240244835501148, "grad_norm": 2.3871543648388194, "learning_rate": 1.3275292047153516e-05, "loss": 0.7194, "step": 5385 }, { "epoch": 0.824177505738332, "grad_norm": 2.2010526920746276, "learning_rate": 1.327295045751386e-05, "loss": 0.7322, "step": 5386 }, { "epoch": 0.8243305279265494, "grad_norm": 2.775859697782073, "learning_rate": 1.3270608666869512e-05, "loss": 0.7952, "step": 5387 }, { "epoch": 0.8244835501147666, "grad_norm": 2.276716590063767, "learning_rate": 1.3268266675364285e-05, "loss": 0.6301, "step": 5388 }, { "epoch": 0.8246365723029839, "grad_norm": 2.2539606958094787, "learning_rate": 1.3265924483142021e-05, "loss": 0.726, "step": 5389 }, { "epoch": 0.8247895944912013, "grad_norm": 2.127389941524374, "learning_rate": 1.3263582090346559e-05, "loss": 0.607, "step": 5390 }, { "epoch": 0.8249426166794185, "grad_norm": 2.2618517630927593, "learning_rate": 1.3261239497121747e-05, "loss": 0.7103, "step": 5391 }, { "epoch": 0.8250956388676358, "grad_norm": 2.301198924576191, "learning_rate": 1.3258896703611465e-05, "loss": 0.674, "step": 5392 }, { "epoch": 0.8252486610558531, "grad_norm": 2.2519800323299823, "learning_rate": 1.3256553709959588e-05, "loss": 0.7118, "step": 5393 }, { "epoch": 0.8254016832440704, "grad_norm": 2.1158962008831104, "learning_rate": 1.3254210516310004e-05, "loss": 0.6324, "step": 5394 }, { "epoch": 0.8255547054322877, "grad_norm": 2.1458773057151816, "learning_rate": 1.3251867122806625e-05, "loss": 0.6327, "step": 5395 }, { "epoch": 0.8257077276205049, "grad_norm": 2.6875604486528704, "learning_rate": 1.3249523529593364e-05, "loss": 0.7069, "step": 5396 }, { "epoch": 0.8258607498087223, "grad_norm": 2.4063745136874592, "learning_rate": 1.3247179736814149e-05, "loss": 0.6958, "step": 5397 }, { "epoch": 0.8260137719969396, "grad_norm": 2.2108085408792446, "learning_rate": 1.3244835744612926e-05, "loss": 0.7354, "step": 5398 }, { "epoch": 0.8261667941851568, "grad_norm": 2.1647786138967207, "learning_rate": 1.3242491553133646e-05, "loss": 0.7654, "step": 5399 }, { "epoch": 0.8263198163733741, "grad_norm": 2.2781294249844355, "learning_rate": 1.3240147162520272e-05, "loss": 0.69, "step": 5400 }, { "epoch": 0.8264728385615915, "grad_norm": 2.205081890912215, "learning_rate": 1.3237802572916786e-05, "loss": 0.7011, "step": 5401 }, { "epoch": 0.8266258607498087, "grad_norm": 2.156765525326034, "learning_rate": 1.3235457784467183e-05, "loss": 0.7145, "step": 5402 }, { "epoch": 0.826778882938026, "grad_norm": 2.0555102520992077, "learning_rate": 1.3233112797315456e-05, "loss": 0.5552, "step": 5403 }, { "epoch": 0.8269319051262433, "grad_norm": 2.178522116336929, "learning_rate": 1.3230767611605622e-05, "loss": 0.7269, "step": 5404 }, { "epoch": 0.8270849273144606, "grad_norm": 2.0747364623531306, "learning_rate": 1.3228422227481712e-05, "loss": 0.6244, "step": 5405 }, { "epoch": 0.8272379495026779, "grad_norm": 2.2732975783479596, "learning_rate": 1.3226076645087765e-05, "loss": 0.6528, "step": 5406 }, { "epoch": 0.8273909716908951, "grad_norm": 2.032788279412093, "learning_rate": 1.3223730864567829e-05, "loss": 0.6503, "step": 5407 }, { "epoch": 0.8275439938791125, "grad_norm": 2.186247863682011, "learning_rate": 1.3221384886065968e-05, "loss": 0.7322, "step": 5408 }, { "epoch": 0.8276970160673298, "grad_norm": 2.048505319281304, "learning_rate": 1.3219038709726261e-05, "loss": 0.6557, "step": 5409 }, { "epoch": 0.827850038255547, "grad_norm": 2.1651304187113487, "learning_rate": 1.3216692335692796e-05, "loss": 0.6042, "step": 5410 }, { "epoch": 0.8280030604437644, "grad_norm": 2.3706714888571576, "learning_rate": 1.3214345764109668e-05, "loss": 0.7171, "step": 5411 }, { "epoch": 0.8281560826319816, "grad_norm": 2.144590330064656, "learning_rate": 1.321199899512099e-05, "loss": 0.5572, "step": 5412 }, { "epoch": 0.8283091048201989, "grad_norm": 2.3176192449312727, "learning_rate": 1.3209652028870891e-05, "loss": 0.6542, "step": 5413 }, { "epoch": 0.8284621270084163, "grad_norm": 2.4363610612490225, "learning_rate": 1.3207304865503508e-05, "loss": 0.6725, "step": 5414 }, { "epoch": 0.8286151491966335, "grad_norm": 2.239550593859622, "learning_rate": 1.3204957505162983e-05, "loss": 0.6656, "step": 5415 }, { "epoch": 0.8287681713848508, "grad_norm": 2.2066515473547743, "learning_rate": 1.3202609947993477e-05, "loss": 0.7237, "step": 5416 }, { "epoch": 0.8289211935730681, "grad_norm": 2.4805510088889613, "learning_rate": 1.3200262194139168e-05, "loss": 0.7478, "step": 5417 }, { "epoch": 0.8290742157612854, "grad_norm": 2.1960028509697733, "learning_rate": 1.3197914243744237e-05, "loss": 0.6824, "step": 5418 }, { "epoch": 0.8292272379495027, "grad_norm": 2.3511494818838456, "learning_rate": 1.3195566096952883e-05, "loss": 0.7456, "step": 5419 }, { "epoch": 0.8293802601377199, "grad_norm": 2.249239523564754, "learning_rate": 1.3193217753909316e-05, "loss": 0.7507, "step": 5420 }, { "epoch": 0.8295332823259373, "grad_norm": 1.9415800244217254, "learning_rate": 1.3190869214757751e-05, "loss": 0.684, "step": 5421 }, { "epoch": 0.8296863045141546, "grad_norm": 2.3205062630225353, "learning_rate": 1.3188520479642426e-05, "loss": 0.7321, "step": 5422 }, { "epoch": 0.8298393267023718, "grad_norm": 2.400336439716699, "learning_rate": 1.3186171548707587e-05, "loss": 0.6574, "step": 5423 }, { "epoch": 0.8299923488905891, "grad_norm": 2.0677198849039273, "learning_rate": 1.3183822422097486e-05, "loss": 0.634, "step": 5424 }, { "epoch": 0.8301453710788065, "grad_norm": 2.2584330809992315, "learning_rate": 1.3181473099956397e-05, "loss": 0.6519, "step": 5425 }, { "epoch": 0.8302983932670237, "grad_norm": 2.124240068152974, "learning_rate": 1.3179123582428594e-05, "loss": 0.6199, "step": 5426 }, { "epoch": 0.830451415455241, "grad_norm": 2.3273164439976473, "learning_rate": 1.3176773869658383e-05, "loss": 0.6734, "step": 5427 }, { "epoch": 0.8306044376434583, "grad_norm": 2.149535999528806, "learning_rate": 1.3174423961790057e-05, "loss": 0.6557, "step": 5428 }, { "epoch": 0.8307574598316756, "grad_norm": 2.481319214602762, "learning_rate": 1.3172073858967935e-05, "loss": 0.7535, "step": 5429 }, { "epoch": 0.8309104820198929, "grad_norm": 1.9994495591491446, "learning_rate": 1.3169723561336349e-05, "loss": 0.6636, "step": 5430 }, { "epoch": 0.8310635042081101, "grad_norm": 2.023320842609098, "learning_rate": 1.3167373069039644e-05, "loss": 0.6146, "step": 5431 }, { "epoch": 0.8312165263963275, "grad_norm": 2.3556441047611125, "learning_rate": 1.3165022382222161e-05, "loss": 0.6654, "step": 5432 }, { "epoch": 0.8313695485845447, "grad_norm": 2.325532575404804, "learning_rate": 1.3162671501028275e-05, "loss": 0.7858, "step": 5433 }, { "epoch": 0.831522570772762, "grad_norm": 2.230886816421429, "learning_rate": 1.316032042560236e-05, "loss": 0.7242, "step": 5434 }, { "epoch": 0.8316755929609794, "grad_norm": 2.1374285377576334, "learning_rate": 1.3157969156088806e-05, "loss": 0.6857, "step": 5435 }, { "epoch": 0.8318286151491966, "grad_norm": 2.2024531130944087, "learning_rate": 1.315561769263201e-05, "loss": 0.7603, "step": 5436 }, { "epoch": 0.8319816373374139, "grad_norm": 2.4725523502828186, "learning_rate": 1.3153266035376387e-05, "loss": 0.7892, "step": 5437 }, { "epoch": 0.8321346595256313, "grad_norm": 1.942259678346756, "learning_rate": 1.3150914184466359e-05, "loss": 0.6643, "step": 5438 }, { "epoch": 0.8322876817138485, "grad_norm": 1.9805552537307942, "learning_rate": 1.3148562140046368e-05, "loss": 0.6216, "step": 5439 }, { "epoch": 0.8324407039020658, "grad_norm": 2.0651368907886094, "learning_rate": 1.3146209902260858e-05, "loss": 0.6408, "step": 5440 }, { "epoch": 0.832593726090283, "grad_norm": 1.8716904461752568, "learning_rate": 1.3143857471254284e-05, "loss": 0.6518, "step": 5441 }, { "epoch": 0.8327467482785004, "grad_norm": 1.9940867590996474, "learning_rate": 1.3141504847171131e-05, "loss": 0.6708, "step": 5442 }, { "epoch": 0.8328997704667177, "grad_norm": 2.321143832807135, "learning_rate": 1.3139152030155875e-05, "loss": 0.6505, "step": 5443 }, { "epoch": 0.8330527926549349, "grad_norm": 2.3643297951532474, "learning_rate": 1.3136799020353009e-05, "loss": 0.7394, "step": 5444 }, { "epoch": 0.8332058148431523, "grad_norm": 2.4184612753665466, "learning_rate": 1.3134445817907047e-05, "loss": 0.6883, "step": 5445 }, { "epoch": 0.8333588370313696, "grad_norm": 2.3561896414391144, "learning_rate": 1.3132092422962503e-05, "loss": 0.7043, "step": 5446 }, { "epoch": 0.8335118592195868, "grad_norm": 2.362497581248584, "learning_rate": 1.3129738835663911e-05, "loss": 0.7542, "step": 5447 }, { "epoch": 0.8336648814078041, "grad_norm": 2.4107911094529677, "learning_rate": 1.3127385056155818e-05, "loss": 0.7097, "step": 5448 }, { "epoch": 0.8338179035960214, "grad_norm": 2.151079347885521, "learning_rate": 1.3125031084582769e-05, "loss": 0.677, "step": 5449 }, { "epoch": 0.8339709257842387, "grad_norm": 2.113665557525543, "learning_rate": 1.3122676921089338e-05, "loss": 0.6911, "step": 5450 }, { "epoch": 0.834123947972456, "grad_norm": 2.089972690894945, "learning_rate": 1.3120322565820098e-05, "loss": 0.6565, "step": 5451 }, { "epoch": 0.8342769701606733, "grad_norm": 2.3308970486598404, "learning_rate": 1.3117968018919646e-05, "loss": 0.8009, "step": 5452 }, { "epoch": 0.8344299923488906, "grad_norm": 2.233752104710811, "learning_rate": 1.3115613280532581e-05, "loss": 0.6564, "step": 5453 }, { "epoch": 0.8345830145371079, "grad_norm": 2.341965814021548, "learning_rate": 1.3113258350803513e-05, "loss": 0.7691, "step": 5454 }, { "epoch": 0.8347360367253251, "grad_norm": 2.495347947770681, "learning_rate": 1.3110903229877073e-05, "loss": 0.7702, "step": 5455 }, { "epoch": 0.8348890589135425, "grad_norm": 2.229124461211568, "learning_rate": 1.3108547917897896e-05, "loss": 0.779, "step": 5456 }, { "epoch": 0.8350420811017597, "grad_norm": 2.404664452229642, "learning_rate": 1.3106192415010628e-05, "loss": 0.7439, "step": 5457 }, { "epoch": 0.835195103289977, "grad_norm": 2.144379029046011, "learning_rate": 1.3103836721359934e-05, "loss": 0.7058, "step": 5458 }, { "epoch": 0.8353481254781944, "grad_norm": 2.2023641817367934, "learning_rate": 1.3101480837090485e-05, "loss": 0.6623, "step": 5459 }, { "epoch": 0.8355011476664116, "grad_norm": 2.0170490475868936, "learning_rate": 1.3099124762346965e-05, "loss": 0.6674, "step": 5460 }, { "epoch": 0.8356541698546289, "grad_norm": 1.9195685937200395, "learning_rate": 1.3096768497274069e-05, "loss": 0.6357, "step": 5461 }, { "epoch": 0.8358071920428463, "grad_norm": 2.024436830598696, "learning_rate": 1.3094412042016504e-05, "loss": 0.5846, "step": 5462 }, { "epoch": 0.8359602142310635, "grad_norm": 2.198755705976839, "learning_rate": 1.3092055396718992e-05, "loss": 0.7809, "step": 5463 }, { "epoch": 0.8361132364192808, "grad_norm": 2.2113261586270485, "learning_rate": 1.3089698561526261e-05, "loss": 0.7037, "step": 5464 }, { "epoch": 0.836266258607498, "grad_norm": 2.354039463335677, "learning_rate": 1.3087341536583054e-05, "loss": 0.8019, "step": 5465 }, { "epoch": 0.8364192807957154, "grad_norm": 2.2564806325771194, "learning_rate": 1.3084984322034124e-05, "loss": 0.9169, "step": 5466 }, { "epoch": 0.8365723029839327, "grad_norm": 3.4017957008245157, "learning_rate": 1.3082626918024239e-05, "loss": 0.6874, "step": 5467 }, { "epoch": 0.8367253251721499, "grad_norm": 2.2833142256607086, "learning_rate": 1.3080269324698179e-05, "loss": 0.6265, "step": 5468 }, { "epoch": 0.8368783473603673, "grad_norm": 2.057805293848059, "learning_rate": 1.3077911542200727e-05, "loss": 0.6179, "step": 5469 }, { "epoch": 0.8370313695485846, "grad_norm": 2.3137603844967924, "learning_rate": 1.3075553570676685e-05, "loss": 0.7012, "step": 5470 }, { "epoch": 0.8371843917368018, "grad_norm": 2.348687565853293, "learning_rate": 1.3073195410270869e-05, "loss": 0.8127, "step": 5471 }, { "epoch": 0.8373374139250191, "grad_norm": 2.1805314170172223, "learning_rate": 1.3070837061128101e-05, "loss": 0.7328, "step": 5472 }, { "epoch": 0.8374904361132364, "grad_norm": 2.298133329589823, "learning_rate": 1.3068478523393213e-05, "loss": 0.7008, "step": 5473 }, { "epoch": 0.8376434583014537, "grad_norm": 2.130736044585862, "learning_rate": 1.3066119797211056e-05, "loss": 0.5503, "step": 5474 }, { "epoch": 0.837796480489671, "grad_norm": 2.348461660163417, "learning_rate": 1.3063760882726488e-05, "loss": 0.6586, "step": 5475 }, { "epoch": 0.8379495026778883, "grad_norm": 2.457109691397983, "learning_rate": 1.3061401780084379e-05, "loss": 0.8545, "step": 5476 }, { "epoch": 0.8381025248661056, "grad_norm": 2.081413285430723, "learning_rate": 1.305904248942961e-05, "loss": 0.7044, "step": 5477 }, { "epoch": 0.8382555470543229, "grad_norm": 2.2761005336360443, "learning_rate": 1.3056683010907074e-05, "loss": 0.7146, "step": 5478 }, { "epoch": 0.8384085692425401, "grad_norm": 2.4427202264353096, "learning_rate": 1.3054323344661675e-05, "loss": 0.7789, "step": 5479 }, { "epoch": 0.8385615914307575, "grad_norm": 2.5737808915465648, "learning_rate": 1.3051963490838336e-05, "loss": 0.747, "step": 5480 }, { "epoch": 0.8387146136189747, "grad_norm": 2.3321630387102474, "learning_rate": 1.3049603449581976e-05, "loss": 0.7329, "step": 5481 }, { "epoch": 0.838867635807192, "grad_norm": 2.2436821204503996, "learning_rate": 1.3047243221037537e-05, "loss": 0.8297, "step": 5482 }, { "epoch": 0.8390206579954094, "grad_norm": 2.1236816351700916, "learning_rate": 1.3044882805349972e-05, "loss": 0.6615, "step": 5483 }, { "epoch": 0.8391736801836266, "grad_norm": 2.1357048998496286, "learning_rate": 1.3042522202664247e-05, "loss": 0.7149, "step": 5484 }, { "epoch": 0.8393267023718439, "grad_norm": 2.276524858878027, "learning_rate": 1.3040161413125325e-05, "loss": 0.7097, "step": 5485 }, { "epoch": 0.8394797245600613, "grad_norm": 2.20618942502793, "learning_rate": 1.3037800436878199e-05, "loss": 0.6871, "step": 5486 }, { "epoch": 0.8396327467482785, "grad_norm": 2.2991077527046513, "learning_rate": 1.3035439274067865e-05, "loss": 0.6966, "step": 5487 }, { "epoch": 0.8397857689364958, "grad_norm": 2.3943691068471344, "learning_rate": 1.303307792483933e-05, "loss": 0.774, "step": 5488 }, { "epoch": 0.839938791124713, "grad_norm": 2.4172819833948886, "learning_rate": 1.3030716389337614e-05, "loss": 0.7621, "step": 5489 }, { "epoch": 0.8400918133129304, "grad_norm": 2.0557590442430276, "learning_rate": 1.302835466770775e-05, "loss": 0.6875, "step": 5490 }, { "epoch": 0.8402448355011477, "grad_norm": 2.1172952300702517, "learning_rate": 1.3025992760094778e-05, "loss": 0.7656, "step": 5491 }, { "epoch": 0.8403978576893649, "grad_norm": 2.1069628676461667, "learning_rate": 1.302363066664375e-05, "loss": 0.6424, "step": 5492 }, { "epoch": 0.8405508798775823, "grad_norm": 2.0851667419856033, "learning_rate": 1.302126838749974e-05, "loss": 0.6485, "step": 5493 }, { "epoch": 0.8407039020657996, "grad_norm": 2.345547762432767, "learning_rate": 1.3018905922807814e-05, "loss": 0.7328, "step": 5494 }, { "epoch": 0.8408569242540168, "grad_norm": 2.418693674830027, "learning_rate": 1.3016543272713066e-05, "loss": 0.7418, "step": 5495 }, { "epoch": 0.8410099464422341, "grad_norm": 2.371023973660028, "learning_rate": 1.3014180437360595e-05, "loss": 0.5673, "step": 5496 }, { "epoch": 0.8411629686304514, "grad_norm": 2.265839060637262, "learning_rate": 1.3011817416895516e-05, "loss": 0.6141, "step": 5497 }, { "epoch": 0.8413159908186687, "grad_norm": 2.561937095874154, "learning_rate": 1.3009454211462941e-05, "loss": 0.7998, "step": 5498 }, { "epoch": 0.841469013006886, "grad_norm": 2.264911983777347, "learning_rate": 1.3007090821208013e-05, "loss": 0.6413, "step": 5499 }, { "epoch": 0.8416220351951033, "grad_norm": 2.38515727129013, "learning_rate": 1.3004727246275875e-05, "loss": 0.6904, "step": 5500 }, { "epoch": 0.8417750573833206, "grad_norm": 2.2272191122576475, "learning_rate": 1.300236348681168e-05, "loss": 0.67, "step": 5501 }, { "epoch": 0.8419280795715379, "grad_norm": 2.328983070075472, "learning_rate": 1.2999999542960597e-05, "loss": 0.7334, "step": 5502 }, { "epoch": 0.8420811017597551, "grad_norm": 2.2285007466959206, "learning_rate": 1.2997635414867808e-05, "loss": 0.7039, "step": 5503 }, { "epoch": 0.8422341239479725, "grad_norm": 2.475196568822425, "learning_rate": 1.2995271102678503e-05, "loss": 0.6824, "step": 5504 }, { "epoch": 0.8423871461361897, "grad_norm": 2.145354123730406, "learning_rate": 1.2992906606537878e-05, "loss": 0.6814, "step": 5505 }, { "epoch": 0.842540168324407, "grad_norm": 2.648650178635418, "learning_rate": 1.2990541926591153e-05, "loss": 0.7099, "step": 5506 }, { "epoch": 0.8426931905126244, "grad_norm": 2.2924356285560554, "learning_rate": 1.298817706298355e-05, "loss": 0.7504, "step": 5507 }, { "epoch": 0.8428462127008416, "grad_norm": 2.260371927743499, "learning_rate": 1.2985812015860297e-05, "loss": 0.8017, "step": 5508 }, { "epoch": 0.8429992348890589, "grad_norm": 2.1438114603897898, "learning_rate": 1.2983446785366656e-05, "loss": 0.6886, "step": 5509 }, { "epoch": 0.8431522570772763, "grad_norm": 2.328522698350745, "learning_rate": 1.2981081371647872e-05, "loss": 0.6653, "step": 5510 }, { "epoch": 0.8433052792654935, "grad_norm": 2.2418473468868627, "learning_rate": 1.297871577484922e-05, "loss": 0.7969, "step": 5511 }, { "epoch": 0.8434583014537108, "grad_norm": 2.079868183628365, "learning_rate": 1.2976349995115979e-05, "loss": 0.6576, "step": 5512 }, { "epoch": 0.843611323641928, "grad_norm": 2.2551328175381657, "learning_rate": 1.2973984032593446e-05, "loss": 0.7583, "step": 5513 }, { "epoch": 0.8437643458301454, "grad_norm": 1.9291727183519736, "learning_rate": 1.2971617887426911e-05, "loss": 0.6354, "step": 5514 }, { "epoch": 0.8439173680183627, "grad_norm": 2.0798484589827226, "learning_rate": 1.2969251559761702e-05, "loss": 0.653, "step": 5515 }, { "epoch": 0.8440703902065799, "grad_norm": 2.418587228020786, "learning_rate": 1.2966885049743136e-05, "loss": 0.6181, "step": 5516 }, { "epoch": 0.8442234123947973, "grad_norm": 2.19342314918509, "learning_rate": 1.2964518357516553e-05, "loss": 0.6955, "step": 5517 }, { "epoch": 0.8443764345830146, "grad_norm": 2.338148065171502, "learning_rate": 1.2962151483227303e-05, "loss": 0.7878, "step": 5518 }, { "epoch": 0.8445294567712318, "grad_norm": 1.9721749570907623, "learning_rate": 1.295978442702074e-05, "loss": 0.6078, "step": 5519 }, { "epoch": 0.8446824789594491, "grad_norm": 2.0817212942936845, "learning_rate": 1.2957417189042237e-05, "loss": 0.6564, "step": 5520 }, { "epoch": 0.8448355011476664, "grad_norm": 2.289786601333639, "learning_rate": 1.2955049769437173e-05, "loss": 0.6076, "step": 5521 }, { "epoch": 0.8449885233358837, "grad_norm": 2.400216562893098, "learning_rate": 1.2952682168350949e-05, "loss": 0.8167, "step": 5522 }, { "epoch": 0.845141545524101, "grad_norm": 2.423170432612747, "learning_rate": 1.2950314385928957e-05, "loss": 0.7366, "step": 5523 }, { "epoch": 0.8452945677123183, "grad_norm": 2.079069856562734, "learning_rate": 1.2947946422316617e-05, "loss": 0.6885, "step": 5524 }, { "epoch": 0.8454475899005356, "grad_norm": 2.314286472410869, "learning_rate": 1.2945578277659357e-05, "loss": 0.7336, "step": 5525 }, { "epoch": 0.8456006120887529, "grad_norm": 2.261984315977617, "learning_rate": 1.2943209952102608e-05, "loss": 0.6953, "step": 5526 }, { "epoch": 0.8457536342769701, "grad_norm": 2.0233508566664793, "learning_rate": 1.2940841445791826e-05, "loss": 0.6739, "step": 5527 }, { "epoch": 0.8459066564651875, "grad_norm": 2.288650722875104, "learning_rate": 1.2938472758872464e-05, "loss": 0.7251, "step": 5528 }, { "epoch": 0.8460596786534047, "grad_norm": 2.5941529869810522, "learning_rate": 1.2936103891489995e-05, "loss": 0.6892, "step": 5529 }, { "epoch": 0.846212700841622, "grad_norm": 2.488575302432984, "learning_rate": 1.29337348437899e-05, "loss": 0.6759, "step": 5530 }, { "epoch": 0.8463657230298394, "grad_norm": 2.1952089497466396, "learning_rate": 1.293136561591767e-05, "loss": 0.748, "step": 5531 }, { "epoch": 0.8465187452180566, "grad_norm": 2.2910733629818756, "learning_rate": 1.2928996208018813e-05, "loss": 0.6614, "step": 5532 }, { "epoch": 0.8466717674062739, "grad_norm": 2.2937953646424787, "learning_rate": 1.2926626620238836e-05, "loss": 0.7134, "step": 5533 }, { "epoch": 0.8468247895944913, "grad_norm": 2.260542552534389, "learning_rate": 1.2924256852723276e-05, "loss": 0.6862, "step": 5534 }, { "epoch": 0.8469778117827085, "grad_norm": 2.2815687375045615, "learning_rate": 1.2921886905617658e-05, "loss": 0.6566, "step": 5535 }, { "epoch": 0.8471308339709258, "grad_norm": 2.355462711241084, "learning_rate": 1.2919516779067533e-05, "loss": 0.7204, "step": 5536 }, { "epoch": 0.847283856159143, "grad_norm": 2.3383308189087186, "learning_rate": 1.2917146473218464e-05, "loss": 0.76, "step": 5537 }, { "epoch": 0.8474368783473604, "grad_norm": 2.294371471880201, "learning_rate": 1.2914775988216021e-05, "loss": 0.6685, "step": 5538 }, { "epoch": 0.8475899005355777, "grad_norm": 2.237221973534265, "learning_rate": 1.2912405324205776e-05, "loss": 0.7479, "step": 5539 }, { "epoch": 0.8477429227237949, "grad_norm": 2.4085148292399468, "learning_rate": 1.2910034481333331e-05, "loss": 0.6868, "step": 5540 }, { "epoch": 0.8478959449120123, "grad_norm": 2.1331468654542154, "learning_rate": 1.2907663459744283e-05, "loss": 0.6263, "step": 5541 }, { "epoch": 0.8480489671002295, "grad_norm": 2.416430462046604, "learning_rate": 1.2905292259584248e-05, "loss": 0.5775, "step": 5542 }, { "epoch": 0.8482019892884468, "grad_norm": 2.154148517983704, "learning_rate": 1.2902920880998848e-05, "loss": 0.704, "step": 5543 }, { "epoch": 0.8483550114766641, "grad_norm": 2.221789554064452, "learning_rate": 1.2900549324133722e-05, "loss": 0.6048, "step": 5544 }, { "epoch": 0.8485080336648814, "grad_norm": 2.48005927838095, "learning_rate": 1.2898177589134515e-05, "loss": 0.715, "step": 5545 }, { "epoch": 0.8486610558530987, "grad_norm": 2.144770996988103, "learning_rate": 1.2895805676146882e-05, "loss": 0.6509, "step": 5546 }, { "epoch": 0.848814078041316, "grad_norm": 1.9436185628607763, "learning_rate": 1.2893433585316497e-05, "loss": 0.5823, "step": 5547 }, { "epoch": 0.8489671002295333, "grad_norm": 2.3484164597566304, "learning_rate": 1.2891061316789036e-05, "loss": 0.6889, "step": 5548 }, { "epoch": 0.8491201224177506, "grad_norm": 2.4994443211846296, "learning_rate": 1.2888688870710186e-05, "loss": 0.7833, "step": 5549 }, { "epoch": 0.8492731446059678, "grad_norm": 2.156946521054402, "learning_rate": 1.2886316247225657e-05, "loss": 0.76, "step": 5550 }, { "epoch": 0.8494261667941851, "grad_norm": 2.1799637821710482, "learning_rate": 1.2883943446481153e-05, "loss": 0.6765, "step": 5551 }, { "epoch": 0.8495791889824025, "grad_norm": 2.0568551184076886, "learning_rate": 1.28815704686224e-05, "loss": 0.6281, "step": 5552 }, { "epoch": 0.8497322111706197, "grad_norm": 2.2126970022565726, "learning_rate": 1.2879197313795131e-05, "loss": 0.6563, "step": 5553 }, { "epoch": 0.849885233358837, "grad_norm": 2.522319909037303, "learning_rate": 1.2876823982145095e-05, "loss": 0.8035, "step": 5554 }, { "epoch": 0.8500382555470544, "grad_norm": 2.498837348760292, "learning_rate": 1.287445047381804e-05, "loss": 0.7085, "step": 5555 }, { "epoch": 0.8501912777352716, "grad_norm": 2.363108474252094, "learning_rate": 1.2872076788959738e-05, "loss": 0.706, "step": 5556 }, { "epoch": 0.8503442999234889, "grad_norm": 2.1018992973643207, "learning_rate": 1.2869702927715966e-05, "loss": 0.6308, "step": 5557 }, { "epoch": 0.8504973221117061, "grad_norm": 2.5754528738479117, "learning_rate": 1.2867328890232506e-05, "loss": 0.7338, "step": 5558 }, { "epoch": 0.8506503442999235, "grad_norm": 2.2465891179344237, "learning_rate": 1.286495467665517e-05, "loss": 0.7204, "step": 5559 }, { "epoch": 0.8508033664881408, "grad_norm": 2.282158755636477, "learning_rate": 1.2862580287129754e-05, "loss": 0.709, "step": 5560 }, { "epoch": 0.850956388676358, "grad_norm": 2.253648961989343, "learning_rate": 1.2860205721802083e-05, "loss": 0.6625, "step": 5561 }, { "epoch": 0.8511094108645754, "grad_norm": 2.224500447042734, "learning_rate": 1.2857830980817994e-05, "loss": 0.6685, "step": 5562 }, { "epoch": 0.8512624330527927, "grad_norm": 2.25169449960483, "learning_rate": 1.2855456064323323e-05, "loss": 0.6976, "step": 5563 }, { "epoch": 0.8514154552410099, "grad_norm": 2.254082007689388, "learning_rate": 1.2853080972463923e-05, "loss": 0.7009, "step": 5564 }, { "epoch": 0.8515684774292273, "grad_norm": 2.111325957451995, "learning_rate": 1.285070570538566e-05, "loss": 0.5968, "step": 5565 }, { "epoch": 0.8517214996174445, "grad_norm": 2.212773489566472, "learning_rate": 1.2848330263234408e-05, "loss": 0.6656, "step": 5566 }, { "epoch": 0.8518745218056618, "grad_norm": 2.354877615111615, "learning_rate": 1.2845954646156051e-05, "loss": 0.7605, "step": 5567 }, { "epoch": 0.8520275439938791, "grad_norm": 2.166128846465111, "learning_rate": 1.2843578854296485e-05, "loss": 0.7061, "step": 5568 }, { "epoch": 0.8521805661820964, "grad_norm": 2.3172994145565164, "learning_rate": 1.2841202887801618e-05, "loss": 0.7018, "step": 5569 }, { "epoch": 0.8523335883703137, "grad_norm": 2.4927482090533193, "learning_rate": 1.283882674681737e-05, "loss": 0.7816, "step": 5570 }, { "epoch": 0.852486610558531, "grad_norm": 2.327043385481717, "learning_rate": 1.2836450431489662e-05, "loss": 0.7731, "step": 5571 }, { "epoch": 0.8526396327467483, "grad_norm": 2.309999254781278, "learning_rate": 1.2834073941964438e-05, "loss": 0.756, "step": 5572 }, { "epoch": 0.8527926549349656, "grad_norm": 2.4280514724840363, "learning_rate": 1.2831697278387647e-05, "loss": 0.7067, "step": 5573 }, { "epoch": 0.8529456771231828, "grad_norm": 2.0447441695223914, "learning_rate": 1.2829320440905244e-05, "loss": 0.6752, "step": 5574 }, { "epoch": 0.8530986993114001, "grad_norm": 2.0921672371070223, "learning_rate": 1.2826943429663212e-05, "loss": 0.5819, "step": 5575 }, { "epoch": 0.8532517214996175, "grad_norm": 2.0878343760248512, "learning_rate": 1.2824566244807522e-05, "loss": 0.7016, "step": 5576 }, { "epoch": 0.8534047436878347, "grad_norm": 2.306674208187633, "learning_rate": 1.2822188886484165e-05, "loss": 0.754, "step": 5577 }, { "epoch": 0.853557765876052, "grad_norm": 2.4509282856897, "learning_rate": 1.2819811354839151e-05, "loss": 0.7932, "step": 5578 }, { "epoch": 0.8537107880642694, "grad_norm": 2.20326195730903, "learning_rate": 1.2817433650018493e-05, "loss": 0.6907, "step": 5579 }, { "epoch": 0.8538638102524866, "grad_norm": 2.267707095823843, "learning_rate": 1.2815055772168208e-05, "loss": 0.7413, "step": 5580 }, { "epoch": 0.8540168324407039, "grad_norm": 2.246442637791065, "learning_rate": 1.2812677721434338e-05, "loss": 0.6501, "step": 5581 }, { "epoch": 0.8541698546289211, "grad_norm": 2.344967709598487, "learning_rate": 1.2810299497962925e-05, "loss": 0.7843, "step": 5582 }, { "epoch": 0.8543228768171385, "grad_norm": 2.4466452469658604, "learning_rate": 1.280792110190003e-05, "loss": 0.6965, "step": 5583 }, { "epoch": 0.8544758990053558, "grad_norm": 2.405752794561989, "learning_rate": 1.280554253339171e-05, "loss": 0.7889, "step": 5584 }, { "epoch": 0.854628921193573, "grad_norm": 2.349194728354978, "learning_rate": 1.280316379258405e-05, "loss": 0.6264, "step": 5585 }, { "epoch": 0.8547819433817904, "grad_norm": 2.3163214380738117, "learning_rate": 1.2800784879623135e-05, "loss": 0.7097, "step": 5586 }, { "epoch": 0.8549349655700077, "grad_norm": 2.205955621353382, "learning_rate": 1.2798405794655064e-05, "loss": 0.7304, "step": 5587 }, { "epoch": 0.8550879877582249, "grad_norm": 2.2160837810993526, "learning_rate": 1.2796026537825947e-05, "loss": 0.5901, "step": 5588 }, { "epoch": 0.8552410099464423, "grad_norm": 2.076799915175384, "learning_rate": 1.2793647109281901e-05, "loss": 0.6659, "step": 5589 }, { "epoch": 0.8553940321346595, "grad_norm": 2.169267298600903, "learning_rate": 1.2791267509169054e-05, "loss": 0.707, "step": 5590 }, { "epoch": 0.8555470543228768, "grad_norm": 2.0806303549381364, "learning_rate": 1.2788887737633555e-05, "loss": 0.648, "step": 5591 }, { "epoch": 0.8557000765110941, "grad_norm": 2.1414594681949635, "learning_rate": 1.278650779482155e-05, "loss": 0.6572, "step": 5592 }, { "epoch": 0.8558530986993114, "grad_norm": 2.3353430053689435, "learning_rate": 1.2784127680879195e-05, "loss": 0.6605, "step": 5593 }, { "epoch": 0.8560061208875287, "grad_norm": 2.4855997648787653, "learning_rate": 1.278174739595267e-05, "loss": 0.6597, "step": 5594 }, { "epoch": 0.856159143075746, "grad_norm": 2.139860057474892, "learning_rate": 1.2779366940188159e-05, "loss": 0.6893, "step": 5595 }, { "epoch": 0.8563121652639633, "grad_norm": 2.229662131253198, "learning_rate": 1.2776986313731847e-05, "loss": 0.6621, "step": 5596 }, { "epoch": 0.8564651874521806, "grad_norm": 2.482093964955999, "learning_rate": 1.2774605516729944e-05, "loss": 0.8149, "step": 5597 }, { "epoch": 0.8566182096403978, "grad_norm": 2.3260474144446115, "learning_rate": 1.277222454932866e-05, "loss": 0.6988, "step": 5598 }, { "epoch": 0.8567712318286151, "grad_norm": 2.238022446025169, "learning_rate": 1.2769843411674221e-05, "loss": 0.7234, "step": 5599 }, { "epoch": 0.8569242540168325, "grad_norm": 1.9938349722793565, "learning_rate": 1.2767462103912864e-05, "loss": 0.597, "step": 5600 }, { "epoch": 0.8570772762050497, "grad_norm": 2.4546219970379726, "learning_rate": 1.2765080626190834e-05, "loss": 0.7452, "step": 5601 }, { "epoch": 0.857230298393267, "grad_norm": 2.3423061899227244, "learning_rate": 1.2762698978654381e-05, "loss": 0.7442, "step": 5602 }, { "epoch": 0.8573833205814844, "grad_norm": 2.5320781222404176, "learning_rate": 1.276031716144978e-05, "loss": 0.772, "step": 5603 }, { "epoch": 0.8575363427697016, "grad_norm": 2.3118002124605623, "learning_rate": 1.2757935174723306e-05, "loss": 0.6711, "step": 5604 }, { "epoch": 0.8576893649579189, "grad_norm": 2.400968927044133, "learning_rate": 1.2755553018621238e-05, "loss": 0.6895, "step": 5605 }, { "epoch": 0.8578423871461361, "grad_norm": 1.9936304706669457, "learning_rate": 1.2753170693289882e-05, "loss": 0.5834, "step": 5606 }, { "epoch": 0.8579954093343535, "grad_norm": 2.5147260168293384, "learning_rate": 1.2750788198875545e-05, "loss": 0.6648, "step": 5607 }, { "epoch": 0.8581484315225708, "grad_norm": 2.202869381008538, "learning_rate": 1.2748405535524541e-05, "loss": 0.6608, "step": 5608 }, { "epoch": 0.858301453710788, "grad_norm": 2.2267686720968394, "learning_rate": 1.2746022703383203e-05, "loss": 0.7458, "step": 5609 }, { "epoch": 0.8584544758990054, "grad_norm": 2.1783234217642153, "learning_rate": 1.2743639702597868e-05, "loss": 0.7113, "step": 5610 }, { "epoch": 0.8586074980872227, "grad_norm": 2.2367728821011785, "learning_rate": 1.2741256533314885e-05, "loss": 0.6847, "step": 5611 }, { "epoch": 0.8587605202754399, "grad_norm": 2.4930263847632457, "learning_rate": 1.2738873195680615e-05, "loss": 0.7769, "step": 5612 }, { "epoch": 0.8589135424636573, "grad_norm": 2.3443625908959858, "learning_rate": 1.2736489689841427e-05, "loss": 0.7359, "step": 5613 }, { "epoch": 0.8590665646518745, "grad_norm": 2.305054821326658, "learning_rate": 1.2734106015943702e-05, "loss": 0.6783, "step": 5614 }, { "epoch": 0.8592195868400918, "grad_norm": 1.9663379003968546, "learning_rate": 1.2731722174133827e-05, "loss": 0.6796, "step": 5615 }, { "epoch": 0.8593726090283091, "grad_norm": 2.092756435546732, "learning_rate": 1.2729338164558214e-05, "loss": 0.713, "step": 5616 }, { "epoch": 0.8595256312165264, "grad_norm": 2.24491708283434, "learning_rate": 1.2726953987363264e-05, "loss": 0.6345, "step": 5617 }, { "epoch": 0.8596786534047437, "grad_norm": 2.1594981539794276, "learning_rate": 1.2724569642695399e-05, "loss": 0.6694, "step": 5618 }, { "epoch": 0.859831675592961, "grad_norm": 2.2943585124865384, "learning_rate": 1.2722185130701057e-05, "loss": 0.7367, "step": 5619 }, { "epoch": 0.8599846977811783, "grad_norm": 2.3976683676703767, "learning_rate": 1.2719800451526676e-05, "loss": 0.7081, "step": 5620 }, { "epoch": 0.8601377199693956, "grad_norm": 2.2946408098661064, "learning_rate": 1.2717415605318708e-05, "loss": 0.7871, "step": 5621 }, { "epoch": 0.8602907421576128, "grad_norm": 2.3980471919531876, "learning_rate": 1.2715030592223619e-05, "loss": 0.758, "step": 5622 }, { "epoch": 0.8604437643458301, "grad_norm": 2.2339319776161037, "learning_rate": 1.2712645412387878e-05, "loss": 0.6481, "step": 5623 }, { "epoch": 0.8605967865340475, "grad_norm": 2.370078313360965, "learning_rate": 1.2710260065957973e-05, "loss": 0.8031, "step": 5624 }, { "epoch": 0.8607498087222647, "grad_norm": 2.361702795265424, "learning_rate": 1.2707874553080393e-05, "loss": 0.6998, "step": 5625 }, { "epoch": 0.860902830910482, "grad_norm": 2.185328558542397, "learning_rate": 1.2705488873901643e-05, "loss": 0.6882, "step": 5626 }, { "epoch": 0.8610558530986994, "grad_norm": 2.432654884382053, "learning_rate": 1.270310302856824e-05, "loss": 0.662, "step": 5627 }, { "epoch": 0.8612088752869166, "grad_norm": 2.4952824090910624, "learning_rate": 1.27007170172267e-05, "loss": 0.8529, "step": 5628 }, { "epoch": 0.8613618974751339, "grad_norm": 2.236267328529113, "learning_rate": 1.2698330840023569e-05, "loss": 0.7145, "step": 5629 }, { "epoch": 0.8615149196633511, "grad_norm": 2.2009751715707746, "learning_rate": 1.2695944497105383e-05, "loss": 0.7445, "step": 5630 }, { "epoch": 0.8616679418515685, "grad_norm": 2.671644999870953, "learning_rate": 1.2693557988618696e-05, "loss": 0.6987, "step": 5631 }, { "epoch": 0.8618209640397858, "grad_norm": 2.1200922727768194, "learning_rate": 1.2691171314710078e-05, "loss": 0.6588, "step": 5632 }, { "epoch": 0.861973986228003, "grad_norm": 2.20322726646221, "learning_rate": 1.2688784475526103e-05, "loss": 0.5955, "step": 5633 }, { "epoch": 0.8621270084162204, "grad_norm": 2.0969035364933095, "learning_rate": 1.2686397471213352e-05, "loss": 0.6224, "step": 5634 }, { "epoch": 0.8622800306044377, "grad_norm": 2.128200156708807, "learning_rate": 1.2684010301918424e-05, "loss": 0.6266, "step": 5635 }, { "epoch": 0.8624330527926549, "grad_norm": 2.1941487004202114, "learning_rate": 1.2681622967787925e-05, "loss": 0.6647, "step": 5636 }, { "epoch": 0.8625860749808723, "grad_norm": 1.9520577900142773, "learning_rate": 1.2679235468968468e-05, "loss": 0.6282, "step": 5637 }, { "epoch": 0.8627390971690895, "grad_norm": 2.36577545218808, "learning_rate": 1.267684780560668e-05, "loss": 0.6948, "step": 5638 }, { "epoch": 0.8628921193573068, "grad_norm": 2.030714318416365, "learning_rate": 1.2674459977849197e-05, "loss": 0.6446, "step": 5639 }, { "epoch": 0.8630451415455241, "grad_norm": 2.3029069648527463, "learning_rate": 1.267207198584266e-05, "loss": 0.7449, "step": 5640 }, { "epoch": 0.8631981637337414, "grad_norm": 2.240610425205807, "learning_rate": 1.2669683829733734e-05, "loss": 0.6766, "step": 5641 }, { "epoch": 0.8633511859219587, "grad_norm": 2.3727769689851743, "learning_rate": 1.2667295509669077e-05, "loss": 0.681, "step": 5642 }, { "epoch": 0.863504208110176, "grad_norm": 2.2735964610799932, "learning_rate": 1.2664907025795369e-05, "loss": 0.689, "step": 5643 }, { "epoch": 0.8636572302983933, "grad_norm": 2.3451657930943632, "learning_rate": 1.2662518378259297e-05, "loss": 0.7561, "step": 5644 }, { "epoch": 0.8638102524866106, "grad_norm": 2.3808378476510583, "learning_rate": 1.2660129567207555e-05, "loss": 0.7952, "step": 5645 }, { "epoch": 0.8639632746748278, "grad_norm": 2.264345473222062, "learning_rate": 1.2657740592786847e-05, "loss": 0.7002, "step": 5646 }, { "epoch": 0.8641162968630451, "grad_norm": 2.079793665356641, "learning_rate": 1.2655351455143896e-05, "loss": 0.6575, "step": 5647 }, { "epoch": 0.8642693190512625, "grad_norm": 2.4275830013363175, "learning_rate": 1.2652962154425423e-05, "loss": 0.7579, "step": 5648 }, { "epoch": 0.8644223412394797, "grad_norm": 2.2748583552247994, "learning_rate": 1.2650572690778164e-05, "loss": 0.6597, "step": 5649 }, { "epoch": 0.864575363427697, "grad_norm": 2.1601232807271673, "learning_rate": 1.2648183064348868e-05, "loss": 0.5999, "step": 5650 }, { "epoch": 0.8647283856159143, "grad_norm": 2.2760902133080467, "learning_rate": 1.2645793275284293e-05, "loss": 0.6744, "step": 5651 }, { "epoch": 0.8648814078041316, "grad_norm": 2.5328811384211405, "learning_rate": 1.2643403323731198e-05, "loss": 0.7486, "step": 5652 }, { "epoch": 0.8650344299923489, "grad_norm": 1.9191237464359976, "learning_rate": 1.2641013209836369e-05, "loss": 0.5882, "step": 5653 }, { "epoch": 0.8651874521805661, "grad_norm": 2.1442818040053564, "learning_rate": 1.2638622933746583e-05, "loss": 0.6574, "step": 5654 }, { "epoch": 0.8653404743687835, "grad_norm": 2.313223313915067, "learning_rate": 1.263623249560864e-05, "loss": 0.705, "step": 5655 }, { "epoch": 0.8654934965570008, "grad_norm": 2.1582983437738363, "learning_rate": 1.2633841895569346e-05, "loss": 0.6208, "step": 5656 }, { "epoch": 0.865646518745218, "grad_norm": 2.140699620669154, "learning_rate": 1.263145113377552e-05, "loss": 0.6216, "step": 5657 }, { "epoch": 0.8657995409334354, "grad_norm": 2.51001916251359, "learning_rate": 1.2629060210373986e-05, "loss": 0.8365, "step": 5658 }, { "epoch": 0.8659525631216526, "grad_norm": 2.1467206121687714, "learning_rate": 1.2626669125511578e-05, "loss": 0.6975, "step": 5659 }, { "epoch": 0.8661055853098699, "grad_norm": 2.1179526840995955, "learning_rate": 1.2624277879335142e-05, "loss": 0.6771, "step": 5660 }, { "epoch": 0.8662586074980873, "grad_norm": 2.242352909508502, "learning_rate": 1.2621886471991538e-05, "loss": 0.7168, "step": 5661 }, { "epoch": 0.8664116296863045, "grad_norm": 2.4601217594406193, "learning_rate": 1.261949490362763e-05, "loss": 0.6644, "step": 5662 }, { "epoch": 0.8665646518745218, "grad_norm": 2.1506121299772327, "learning_rate": 1.2617103174390289e-05, "loss": 0.6518, "step": 5663 }, { "epoch": 0.8667176740627391, "grad_norm": 2.535854595188026, "learning_rate": 1.2614711284426406e-05, "loss": 0.7968, "step": 5664 }, { "epoch": 0.8668706962509564, "grad_norm": 2.3806302754076936, "learning_rate": 1.2612319233882873e-05, "loss": 0.6961, "step": 5665 }, { "epoch": 0.8670237184391737, "grad_norm": 2.0825689145311705, "learning_rate": 1.2609927022906598e-05, "loss": 0.4917, "step": 5666 }, { "epoch": 0.8671767406273909, "grad_norm": 2.296561591485694, "learning_rate": 1.2607534651644493e-05, "loss": 0.6687, "step": 5667 }, { "epoch": 0.8673297628156083, "grad_norm": 2.0016062923376703, "learning_rate": 1.2605142120243485e-05, "loss": 0.5966, "step": 5668 }, { "epoch": 0.8674827850038256, "grad_norm": 2.2328943779206676, "learning_rate": 1.2602749428850506e-05, "loss": 0.6952, "step": 5669 }, { "epoch": 0.8676358071920428, "grad_norm": 2.1730370472915164, "learning_rate": 1.2600356577612507e-05, "loss": 0.6091, "step": 5670 }, { "epoch": 0.8677888293802601, "grad_norm": 2.30791377621154, "learning_rate": 1.2597963566676435e-05, "loss": 0.7486, "step": 5671 }, { "epoch": 0.8679418515684775, "grad_norm": 2.276237653997365, "learning_rate": 1.2595570396189254e-05, "loss": 0.7926, "step": 5672 }, { "epoch": 0.8680948737566947, "grad_norm": 2.3754804501301514, "learning_rate": 1.2593177066297945e-05, "loss": 0.6826, "step": 5673 }, { "epoch": 0.868247895944912, "grad_norm": 2.2435259533338314, "learning_rate": 1.2590783577149488e-05, "loss": 0.6384, "step": 5674 }, { "epoch": 0.8684009181331293, "grad_norm": 2.087817049910248, "learning_rate": 1.2588389928890873e-05, "loss": 0.6228, "step": 5675 }, { "epoch": 0.8685539403213466, "grad_norm": 2.3075610370217516, "learning_rate": 1.258599612166911e-05, "loss": 0.6741, "step": 5676 }, { "epoch": 0.8687069625095639, "grad_norm": 2.105883253531245, "learning_rate": 1.258360215563121e-05, "loss": 0.6242, "step": 5677 }, { "epoch": 0.8688599846977811, "grad_norm": 2.2765438530308564, "learning_rate": 1.258120803092419e-05, "loss": 0.7679, "step": 5678 }, { "epoch": 0.8690130068859985, "grad_norm": 2.4505312365492844, "learning_rate": 1.2578813747695091e-05, "loss": 0.6735, "step": 5679 }, { "epoch": 0.8691660290742158, "grad_norm": 2.3676073507608604, "learning_rate": 1.2576419306090948e-05, "loss": 0.6729, "step": 5680 }, { "epoch": 0.869319051262433, "grad_norm": 2.44965356160525, "learning_rate": 1.2574024706258817e-05, "loss": 0.7476, "step": 5681 }, { "epoch": 0.8694720734506504, "grad_norm": 2.1538693697394486, "learning_rate": 1.2571629948345763e-05, "loss": 0.6866, "step": 5682 }, { "epoch": 0.8696250956388676, "grad_norm": 2.0656645571127887, "learning_rate": 1.256923503249885e-05, "loss": 0.7174, "step": 5683 }, { "epoch": 0.8697781178270849, "grad_norm": 2.21545987856496, "learning_rate": 1.2566839958865162e-05, "loss": 0.6976, "step": 5684 }, { "epoch": 0.8699311400153023, "grad_norm": 2.2435429627080268, "learning_rate": 1.2564444727591791e-05, "loss": 0.7416, "step": 5685 }, { "epoch": 0.8700841622035195, "grad_norm": 2.288122679843375, "learning_rate": 1.2562049338825842e-05, "loss": 0.6695, "step": 5686 }, { "epoch": 0.8702371843917368, "grad_norm": 2.4078595060538452, "learning_rate": 1.2559653792714414e-05, "loss": 0.7819, "step": 5687 }, { "epoch": 0.8703902065799541, "grad_norm": 2.4138389650474097, "learning_rate": 1.2557258089404635e-05, "loss": 0.6232, "step": 5688 }, { "epoch": 0.8705432287681714, "grad_norm": 2.1081792762846945, "learning_rate": 1.2554862229043633e-05, "loss": 0.6831, "step": 5689 }, { "epoch": 0.8706962509563887, "grad_norm": 2.0366851001301596, "learning_rate": 1.255246621177855e-05, "loss": 0.7232, "step": 5690 }, { "epoch": 0.8708492731446059, "grad_norm": 2.0327500614427536, "learning_rate": 1.2550070037756527e-05, "loss": 0.6628, "step": 5691 }, { "epoch": 0.8710022953328233, "grad_norm": 2.361742724788852, "learning_rate": 1.254767370712473e-05, "loss": 0.8272, "step": 5692 }, { "epoch": 0.8711553175210406, "grad_norm": 2.18870554680478, "learning_rate": 1.2545277220030324e-05, "loss": 0.6787, "step": 5693 }, { "epoch": 0.8713083397092578, "grad_norm": 2.3066595929772613, "learning_rate": 1.2542880576620484e-05, "loss": 0.7596, "step": 5694 }, { "epoch": 0.8714613618974751, "grad_norm": 2.3582637357450023, "learning_rate": 1.2540483777042403e-05, "loss": 0.6326, "step": 5695 }, { "epoch": 0.8716143840856925, "grad_norm": 2.1035213548968312, "learning_rate": 1.2538086821443273e-05, "loss": 0.6634, "step": 5696 }, { "epoch": 0.8717674062739097, "grad_norm": 2.2947630934368837, "learning_rate": 1.25356897099703e-05, "loss": 0.7888, "step": 5697 }, { "epoch": 0.871920428462127, "grad_norm": 2.3707709917608204, "learning_rate": 1.2533292442770705e-05, "loss": 0.78, "step": 5698 }, { "epoch": 0.8720734506503443, "grad_norm": 2.1437011373550003, "learning_rate": 1.2530895019991714e-05, "loss": 0.7513, "step": 5699 }, { "epoch": 0.8722264728385616, "grad_norm": 2.132187427093857, "learning_rate": 1.2528497441780554e-05, "loss": 0.6085, "step": 5700 }, { "epoch": 0.8723794950267789, "grad_norm": 2.25455470161129, "learning_rate": 1.2526099708284476e-05, "loss": 0.7394, "step": 5701 }, { "epoch": 0.8725325172149961, "grad_norm": 2.5745994986985106, "learning_rate": 1.2523701819650733e-05, "loss": 0.7067, "step": 5702 }, { "epoch": 0.8726855394032135, "grad_norm": 2.139572994127622, "learning_rate": 1.252130377602659e-05, "loss": 0.6948, "step": 5703 }, { "epoch": 0.8728385615914308, "grad_norm": 2.336370835834088, "learning_rate": 1.2518905577559317e-05, "loss": 0.6016, "step": 5704 }, { "epoch": 0.872991583779648, "grad_norm": 2.212567829784752, "learning_rate": 1.2516507224396199e-05, "loss": 0.7168, "step": 5705 }, { "epoch": 0.8731446059678654, "grad_norm": 2.1173283124728144, "learning_rate": 1.2514108716684527e-05, "loss": 0.7108, "step": 5706 }, { "epoch": 0.8732976281560826, "grad_norm": 2.286564364188799, "learning_rate": 1.2511710054571603e-05, "loss": 0.6567, "step": 5707 }, { "epoch": 0.8734506503442999, "grad_norm": 2.4319669016285155, "learning_rate": 1.2509311238204742e-05, "loss": 0.7369, "step": 5708 }, { "epoch": 0.8736036725325173, "grad_norm": 2.3105859541550804, "learning_rate": 1.250691226773126e-05, "loss": 0.6155, "step": 5709 }, { "epoch": 0.8737566947207345, "grad_norm": 2.299770180808811, "learning_rate": 1.2504513143298485e-05, "loss": 0.7094, "step": 5710 }, { "epoch": 0.8739097169089518, "grad_norm": 2.0337893937536595, "learning_rate": 1.2502113865053764e-05, "loss": 0.6748, "step": 5711 }, { "epoch": 0.8740627390971691, "grad_norm": 2.385295588774363, "learning_rate": 1.2499714433144441e-05, "loss": 0.7138, "step": 5712 }, { "epoch": 0.8742157612853864, "grad_norm": 2.284489191336518, "learning_rate": 1.2497314847717875e-05, "loss": 0.743, "step": 5713 }, { "epoch": 0.8743687834736037, "grad_norm": 2.3990280322523367, "learning_rate": 1.2494915108921434e-05, "loss": 0.6998, "step": 5714 }, { "epoch": 0.8745218056618209, "grad_norm": 2.2891843264651905, "learning_rate": 1.2492515216902499e-05, "loss": 0.6992, "step": 5715 }, { "epoch": 0.8746748278500383, "grad_norm": 2.3026551519865683, "learning_rate": 1.249011517180845e-05, "loss": 0.7515, "step": 5716 }, { "epoch": 0.8748278500382556, "grad_norm": 2.337520385134906, "learning_rate": 1.248771497378669e-05, "loss": 0.7088, "step": 5717 }, { "epoch": 0.8749808722264728, "grad_norm": 2.1926541321569006, "learning_rate": 1.248531462298462e-05, "loss": 0.7873, "step": 5718 }, { "epoch": 0.8751338944146901, "grad_norm": 2.4455148838618244, "learning_rate": 1.2482914119549658e-05, "loss": 0.6451, "step": 5719 }, { "epoch": 0.8752869166029075, "grad_norm": 2.5339680205683743, "learning_rate": 1.2480513463629224e-05, "loss": 0.7793, "step": 5720 }, { "epoch": 0.8754399387911247, "grad_norm": 2.3813626401774775, "learning_rate": 1.2478112655370758e-05, "loss": 0.5941, "step": 5721 }, { "epoch": 0.875592960979342, "grad_norm": 2.4186647306912046, "learning_rate": 1.2475711694921695e-05, "loss": 0.723, "step": 5722 }, { "epoch": 0.8757459831675592, "grad_norm": 2.033642018028739, "learning_rate": 1.2473310582429496e-05, "loss": 0.6569, "step": 5723 }, { "epoch": 0.8758990053557766, "grad_norm": 2.050104551897003, "learning_rate": 1.2470909318041618e-05, "loss": 0.7118, "step": 5724 }, { "epoch": 0.8760520275439939, "grad_norm": 2.098302900404374, "learning_rate": 1.2468507901905527e-05, "loss": 0.5665, "step": 5725 }, { "epoch": 0.8762050497322111, "grad_norm": 2.238378059736463, "learning_rate": 1.2466106334168713e-05, "loss": 0.6099, "step": 5726 }, { "epoch": 0.8763580719204285, "grad_norm": 2.264652562759667, "learning_rate": 1.2463704614978664e-05, "loss": 0.7528, "step": 5727 }, { "epoch": 0.8765110941086458, "grad_norm": 2.315603460621223, "learning_rate": 1.2461302744482873e-05, "loss": 0.7416, "step": 5728 }, { "epoch": 0.876664116296863, "grad_norm": 1.9928379922302875, "learning_rate": 1.2458900722828852e-05, "loss": 0.6208, "step": 5729 }, { "epoch": 0.8768171384850804, "grad_norm": 2.196223804754041, "learning_rate": 1.245649855016412e-05, "loss": 0.789, "step": 5730 }, { "epoch": 0.8769701606732976, "grad_norm": 1.9968093015394444, "learning_rate": 1.24540962266362e-05, "loss": 0.6108, "step": 5731 }, { "epoch": 0.8771231828615149, "grad_norm": 2.1538731630302754, "learning_rate": 1.2451693752392634e-05, "loss": 0.6491, "step": 5732 }, { "epoch": 0.8772762050497322, "grad_norm": 2.336782860051827, "learning_rate": 1.244929112758096e-05, "loss": 0.6887, "step": 5733 }, { "epoch": 0.8774292272379495, "grad_norm": 2.0968379693636265, "learning_rate": 1.2446888352348738e-05, "loss": 0.6052, "step": 5734 }, { "epoch": 0.8775822494261668, "grad_norm": 2.8413364803514463, "learning_rate": 1.2444485426843527e-05, "loss": 0.7297, "step": 5735 }, { "epoch": 0.8777352716143841, "grad_norm": 2.414859416385035, "learning_rate": 1.2442082351212908e-05, "loss": 0.6594, "step": 5736 }, { "epoch": 0.8778882938026014, "grad_norm": 2.1397332273037857, "learning_rate": 1.2439679125604455e-05, "loss": 0.6262, "step": 5737 }, { "epoch": 0.8780413159908187, "grad_norm": 2.302960630598352, "learning_rate": 1.2437275750165763e-05, "loss": 0.6868, "step": 5738 }, { "epoch": 0.8781943381790359, "grad_norm": 2.159627088120593, "learning_rate": 1.2434872225044432e-05, "loss": 0.6593, "step": 5739 }, { "epoch": 0.8783473603672532, "grad_norm": 2.303777890381595, "learning_rate": 1.2432468550388078e-05, "loss": 0.7509, "step": 5740 }, { "epoch": 0.8785003825554706, "grad_norm": 2.082765533333303, "learning_rate": 1.2430064726344306e-05, "loss": 0.6463, "step": 5741 }, { "epoch": 0.8786534047436878, "grad_norm": 2.300303736175458, "learning_rate": 1.2427660753060758e-05, "loss": 0.7133, "step": 5742 }, { "epoch": 0.8788064269319051, "grad_norm": 2.2364378062050254, "learning_rate": 1.2425256630685063e-05, "loss": 0.6881, "step": 5743 }, { "epoch": 0.8789594491201225, "grad_norm": 2.0318322741252146, "learning_rate": 1.2422852359364873e-05, "loss": 0.6728, "step": 5744 }, { "epoch": 0.8791124713083397, "grad_norm": 2.2990306189375223, "learning_rate": 1.2420447939247841e-05, "loss": 0.623, "step": 5745 }, { "epoch": 0.879265493496557, "grad_norm": 2.4502465737162504, "learning_rate": 1.2418043370481631e-05, "loss": 0.6824, "step": 5746 }, { "epoch": 0.8794185156847742, "grad_norm": 2.2065393096891803, "learning_rate": 1.2415638653213919e-05, "loss": 0.631, "step": 5747 }, { "epoch": 0.8795715378729916, "grad_norm": 2.3645950652061565, "learning_rate": 1.2413233787592388e-05, "loss": 0.7174, "step": 5748 }, { "epoch": 0.8797245600612089, "grad_norm": 2.184415402062973, "learning_rate": 1.241082877376473e-05, "loss": 0.6933, "step": 5749 }, { "epoch": 0.8798775822494261, "grad_norm": 2.2344101746435574, "learning_rate": 1.2408423611878643e-05, "loss": 0.7379, "step": 5750 }, { "epoch": 0.8800306044376435, "grad_norm": 2.2362564806831107, "learning_rate": 1.2406018302081841e-05, "loss": 0.6887, "step": 5751 }, { "epoch": 0.8801836266258608, "grad_norm": 2.1149926363107814, "learning_rate": 1.2403612844522046e-05, "loss": 0.6424, "step": 5752 }, { "epoch": 0.880336648814078, "grad_norm": 2.310775641769557, "learning_rate": 1.2401207239346982e-05, "loss": 0.731, "step": 5753 }, { "epoch": 0.8804896710022954, "grad_norm": 2.254344287576404, "learning_rate": 1.2398801486704384e-05, "loss": 0.7344, "step": 5754 }, { "epoch": 0.8806426931905126, "grad_norm": 2.159264355383588, "learning_rate": 1.2396395586742005e-05, "loss": 0.7299, "step": 5755 }, { "epoch": 0.8807957153787299, "grad_norm": 2.4997945440793528, "learning_rate": 1.2393989539607601e-05, "loss": 0.6929, "step": 5756 }, { "epoch": 0.8809487375669472, "grad_norm": 2.019321161103983, "learning_rate": 1.239158334544893e-05, "loss": 0.5885, "step": 5757 }, { "epoch": 0.8811017597551645, "grad_norm": 2.4218191192077176, "learning_rate": 1.238917700441377e-05, "loss": 0.7094, "step": 5758 }, { "epoch": 0.8812547819433818, "grad_norm": 2.0924771769119808, "learning_rate": 1.2386770516649904e-05, "loss": 0.6709, "step": 5759 }, { "epoch": 0.881407804131599, "grad_norm": 2.48339718936483, "learning_rate": 1.2384363882305125e-05, "loss": 0.7068, "step": 5760 }, { "epoch": 0.8815608263198164, "grad_norm": 2.565722271968007, "learning_rate": 1.2381957101527233e-05, "loss": 0.6858, "step": 5761 }, { "epoch": 0.8817138485080337, "grad_norm": 2.2301004887844513, "learning_rate": 1.2379550174464034e-05, "loss": 0.5637, "step": 5762 }, { "epoch": 0.8818668706962509, "grad_norm": 2.1331296366939356, "learning_rate": 1.2377143101263351e-05, "loss": 0.6533, "step": 5763 }, { "epoch": 0.8820198928844682, "grad_norm": 2.0132594420000984, "learning_rate": 1.2374735882073014e-05, "loss": 0.6407, "step": 5764 }, { "epoch": 0.8821729150726856, "grad_norm": 2.11184387213977, "learning_rate": 1.2372328517040854e-05, "loss": 0.7467, "step": 5765 }, { "epoch": 0.8823259372609028, "grad_norm": 2.1063721445520156, "learning_rate": 1.2369921006314716e-05, "loss": 0.6223, "step": 5766 }, { "epoch": 0.8824789594491201, "grad_norm": 2.135649260419935, "learning_rate": 1.2367513350042461e-05, "loss": 0.7001, "step": 5767 }, { "epoch": 0.8826319816373374, "grad_norm": 2.4573013608444167, "learning_rate": 1.2365105548371949e-05, "loss": 0.7085, "step": 5768 }, { "epoch": 0.8827850038255547, "grad_norm": 2.4521500514536734, "learning_rate": 1.2362697601451055e-05, "loss": 0.6949, "step": 5769 }, { "epoch": 0.882938026013772, "grad_norm": 2.3315675840027112, "learning_rate": 1.2360289509427657e-05, "loss": 0.6517, "step": 5770 }, { "epoch": 0.8830910482019892, "grad_norm": 2.4804572529648365, "learning_rate": 1.2357881272449645e-05, "loss": 0.7527, "step": 5771 }, { "epoch": 0.8832440703902066, "grad_norm": 2.3055905996779162, "learning_rate": 1.2355472890664921e-05, "loss": 0.6958, "step": 5772 }, { "epoch": 0.8833970925784239, "grad_norm": 2.2077589919529896, "learning_rate": 1.2353064364221394e-05, "loss": 0.7385, "step": 5773 }, { "epoch": 0.8835501147666411, "grad_norm": 2.5673498996652393, "learning_rate": 1.2350655693266977e-05, "loss": 0.7746, "step": 5774 }, { "epoch": 0.8837031369548585, "grad_norm": 2.327867947254067, "learning_rate": 1.23482468779496e-05, "loss": 0.7871, "step": 5775 }, { "epoch": 0.8838561591430757, "grad_norm": 1.994862966333433, "learning_rate": 1.2345837918417192e-05, "loss": 0.5689, "step": 5776 }, { "epoch": 0.884009181331293, "grad_norm": 2.1628077023188577, "learning_rate": 1.2343428814817704e-05, "loss": 0.6696, "step": 5777 }, { "epoch": 0.8841622035195104, "grad_norm": 2.4281221978724177, "learning_rate": 1.2341019567299084e-05, "loss": 0.7309, "step": 5778 }, { "epoch": 0.8843152257077276, "grad_norm": 2.415771519746245, "learning_rate": 1.2338610176009294e-05, "loss": 0.7888, "step": 5779 }, { "epoch": 0.8844682478959449, "grad_norm": 2.0146648291440132, "learning_rate": 1.2336200641096302e-05, "loss": 0.7183, "step": 5780 }, { "epoch": 0.8846212700841622, "grad_norm": 2.101105174830713, "learning_rate": 1.2333790962708096e-05, "loss": 0.6244, "step": 5781 }, { "epoch": 0.8847742922723795, "grad_norm": 2.747095291584874, "learning_rate": 1.233138114099265e-05, "loss": 0.7718, "step": 5782 }, { "epoch": 0.8849273144605968, "grad_norm": 2.1914791612843842, "learning_rate": 1.2328971176097973e-05, "loss": 0.7515, "step": 5783 }, { "epoch": 0.885080336648814, "grad_norm": 2.2870028208167317, "learning_rate": 1.2326561068172063e-05, "loss": 0.6468, "step": 5784 }, { "epoch": 0.8852333588370314, "grad_norm": 2.27622510477052, "learning_rate": 1.2324150817362934e-05, "loss": 0.6705, "step": 5785 }, { "epoch": 0.8853863810252487, "grad_norm": 2.3932459194155125, "learning_rate": 1.2321740423818614e-05, "loss": 0.72, "step": 5786 }, { "epoch": 0.8855394032134659, "grad_norm": 2.0056690093491496, "learning_rate": 1.2319329887687132e-05, "loss": 0.6506, "step": 5787 }, { "epoch": 0.8856924254016832, "grad_norm": 2.3515431421733064, "learning_rate": 1.2316919209116527e-05, "loss": 0.718, "step": 5788 }, { "epoch": 0.8858454475899006, "grad_norm": 2.564976261897152, "learning_rate": 1.2314508388254848e-05, "loss": 0.7486, "step": 5789 }, { "epoch": 0.8859984697781178, "grad_norm": 2.286392560336342, "learning_rate": 1.2312097425250157e-05, "loss": 0.5844, "step": 5790 }, { "epoch": 0.8861514919663351, "grad_norm": 2.40196016256603, "learning_rate": 1.230968632025052e-05, "loss": 0.6324, "step": 5791 }, { "epoch": 0.8863045141545524, "grad_norm": 2.1219767397301377, "learning_rate": 1.2307275073404005e-05, "loss": 0.6549, "step": 5792 }, { "epoch": 0.8864575363427697, "grad_norm": 2.1545723623205353, "learning_rate": 1.2304863684858708e-05, "loss": 0.6666, "step": 5793 }, { "epoch": 0.886610558530987, "grad_norm": 2.1982785238169624, "learning_rate": 1.2302452154762711e-05, "loss": 0.7322, "step": 5794 }, { "epoch": 0.8867635807192042, "grad_norm": 1.9445798805354053, "learning_rate": 1.230004048326412e-05, "loss": 0.5734, "step": 5795 }, { "epoch": 0.8869166029074216, "grad_norm": 2.097172733466339, "learning_rate": 1.2297628670511046e-05, "loss": 0.5911, "step": 5796 }, { "epoch": 0.8870696250956389, "grad_norm": 2.2876608565036736, "learning_rate": 1.229521671665161e-05, "loss": 0.823, "step": 5797 }, { "epoch": 0.8872226472838561, "grad_norm": 2.130982677926774, "learning_rate": 1.2292804621833932e-05, "loss": 0.6003, "step": 5798 }, { "epoch": 0.8873756694720735, "grad_norm": 2.3899589697261012, "learning_rate": 1.2290392386206153e-05, "loss": 0.7014, "step": 5799 }, { "epoch": 0.8875286916602907, "grad_norm": 2.463939146997431, "learning_rate": 1.228798000991642e-05, "loss": 0.6992, "step": 5800 }, { "epoch": 0.887681713848508, "grad_norm": 2.405366156872562, "learning_rate": 1.228556749311288e-05, "loss": 0.7654, "step": 5801 }, { "epoch": 0.8878347360367254, "grad_norm": 1.8746932186360155, "learning_rate": 1.2283154835943704e-05, "loss": 0.5216, "step": 5802 }, { "epoch": 0.8879877582249426, "grad_norm": 2.1661546901574704, "learning_rate": 1.2280742038557056e-05, "loss": 0.7372, "step": 5803 }, { "epoch": 0.8881407804131599, "grad_norm": 2.493655678620059, "learning_rate": 1.2278329101101116e-05, "loss": 0.7774, "step": 5804 }, { "epoch": 0.8882938026013772, "grad_norm": 2.0972516435366106, "learning_rate": 1.2275916023724072e-05, "loss": 0.6212, "step": 5805 }, { "epoch": 0.8884468247895945, "grad_norm": 2.050606086919499, "learning_rate": 1.2273502806574126e-05, "loss": 0.7213, "step": 5806 }, { "epoch": 0.8885998469778118, "grad_norm": 2.2270183694432175, "learning_rate": 1.2271089449799476e-05, "loss": 0.7031, "step": 5807 }, { "epoch": 0.888752869166029, "grad_norm": 2.2753592897527097, "learning_rate": 1.2268675953548336e-05, "loss": 0.6464, "step": 5808 }, { "epoch": 0.8889058913542464, "grad_norm": 2.2711193654457777, "learning_rate": 1.2266262317968934e-05, "loss": 0.7809, "step": 5809 }, { "epoch": 0.8890589135424637, "grad_norm": 2.1135815709154793, "learning_rate": 1.2263848543209496e-05, "loss": 0.6955, "step": 5810 }, { "epoch": 0.8892119357306809, "grad_norm": 2.4245861015340573, "learning_rate": 1.2261434629418263e-05, "loss": 0.7699, "step": 5811 }, { "epoch": 0.8893649579188982, "grad_norm": 2.102041027083013, "learning_rate": 1.2259020576743484e-05, "loss": 0.6489, "step": 5812 }, { "epoch": 0.8895179801071156, "grad_norm": 2.0571284723371948, "learning_rate": 1.2256606385333411e-05, "loss": 0.7392, "step": 5813 }, { "epoch": 0.8896710022953328, "grad_norm": 2.232074088650882, "learning_rate": 1.2254192055336315e-05, "loss": 0.7079, "step": 5814 }, { "epoch": 0.8898240244835501, "grad_norm": 2.3912417483625164, "learning_rate": 1.2251777586900466e-05, "loss": 0.7092, "step": 5815 }, { "epoch": 0.8899770466717674, "grad_norm": 2.055983923638346, "learning_rate": 1.2249362980174144e-05, "loss": 0.6946, "step": 5816 }, { "epoch": 0.8901300688599847, "grad_norm": 2.4091786231449146, "learning_rate": 1.2246948235305642e-05, "loss": 0.7588, "step": 5817 }, { "epoch": 0.890283091048202, "grad_norm": 2.4550247748254446, "learning_rate": 1.2244533352443262e-05, "loss": 0.8143, "step": 5818 }, { "epoch": 0.8904361132364192, "grad_norm": 2.44563168398848, "learning_rate": 1.2242118331735306e-05, "loss": 0.7611, "step": 5819 }, { "epoch": 0.8905891354246366, "grad_norm": 2.0083209726168167, "learning_rate": 1.223970317333009e-05, "loss": 0.6456, "step": 5820 }, { "epoch": 0.8907421576128539, "grad_norm": 2.261824846327099, "learning_rate": 1.2237287877375943e-05, "loss": 0.659, "step": 5821 }, { "epoch": 0.8908951798010711, "grad_norm": 2.415318778732395, "learning_rate": 1.2234872444021197e-05, "loss": 0.6324, "step": 5822 }, { "epoch": 0.8910482019892885, "grad_norm": 2.491753442524876, "learning_rate": 1.223245687341419e-05, "loss": 0.8115, "step": 5823 }, { "epoch": 0.8912012241775057, "grad_norm": 2.4285888845880415, "learning_rate": 1.223004116570327e-05, "loss": 0.7954, "step": 5824 }, { "epoch": 0.891354246365723, "grad_norm": 2.238293963593015, "learning_rate": 1.22276253210368e-05, "loss": 0.7294, "step": 5825 }, { "epoch": 0.8915072685539404, "grad_norm": 2.326987572185359, "learning_rate": 1.2225209339563144e-05, "loss": 0.5903, "step": 5826 }, { "epoch": 0.8916602907421576, "grad_norm": 2.1780223917114836, "learning_rate": 1.222279322143068e-05, "loss": 0.6964, "step": 5827 }, { "epoch": 0.8918133129303749, "grad_norm": 2.048347926322804, "learning_rate": 1.2220376966787785e-05, "loss": 0.6111, "step": 5828 }, { "epoch": 0.8919663351185922, "grad_norm": 2.003970443987533, "learning_rate": 1.2217960575782856e-05, "loss": 0.6416, "step": 5829 }, { "epoch": 0.8921193573068095, "grad_norm": 2.55460997045148, "learning_rate": 1.2215544048564294e-05, "loss": 0.7348, "step": 5830 }, { "epoch": 0.8922723794950268, "grad_norm": 2.3815038136371496, "learning_rate": 1.22131273852805e-05, "loss": 0.7828, "step": 5831 }, { "epoch": 0.892425401683244, "grad_norm": 2.185261352231627, "learning_rate": 1.22107105860799e-05, "loss": 0.6699, "step": 5832 }, { "epoch": 0.8925784238714614, "grad_norm": 1.9530762615679256, "learning_rate": 1.220829365111091e-05, "loss": 0.5621, "step": 5833 }, { "epoch": 0.8927314460596787, "grad_norm": 2.5679065405768693, "learning_rate": 1.2205876580521971e-05, "loss": 0.7618, "step": 5834 }, { "epoch": 0.8928844682478959, "grad_norm": 1.9323644230313388, "learning_rate": 1.2203459374461522e-05, "loss": 0.7202, "step": 5835 }, { "epoch": 0.8930374904361132, "grad_norm": 2.1426859186740352, "learning_rate": 1.220104203307801e-05, "loss": 0.672, "step": 5836 }, { "epoch": 0.8931905126243306, "grad_norm": 2.4556090557348873, "learning_rate": 1.2198624556519899e-05, "loss": 0.6719, "step": 5837 }, { "epoch": 0.8933435348125478, "grad_norm": 2.411343782082261, "learning_rate": 1.219620694493565e-05, "loss": 0.8695, "step": 5838 }, { "epoch": 0.8934965570007651, "grad_norm": 1.9229137910731127, "learning_rate": 1.2193789198473743e-05, "loss": 0.5691, "step": 5839 }, { "epoch": 0.8936495791889824, "grad_norm": 2.7917967312533083, "learning_rate": 1.2191371317282659e-05, "loss": 0.9142, "step": 5840 }, { "epoch": 0.8938026013771997, "grad_norm": 2.7345209669423927, "learning_rate": 1.2188953301510891e-05, "loss": 0.7448, "step": 5841 }, { "epoch": 0.893955623565417, "grad_norm": 2.2727678554130857, "learning_rate": 1.2186535151306934e-05, "loss": 0.6421, "step": 5842 }, { "epoch": 0.8941086457536342, "grad_norm": 2.258694735574518, "learning_rate": 1.2184116866819303e-05, "loss": 0.7776, "step": 5843 }, { "epoch": 0.8942616679418516, "grad_norm": 2.098915575747473, "learning_rate": 1.2181698448196508e-05, "loss": 0.6931, "step": 5844 }, { "epoch": 0.8944146901300689, "grad_norm": 2.3098715398217706, "learning_rate": 1.2179279895587078e-05, "loss": 0.7966, "step": 5845 }, { "epoch": 0.8945677123182861, "grad_norm": 2.1839510482142255, "learning_rate": 1.2176861209139543e-05, "loss": 0.601, "step": 5846 }, { "epoch": 0.8947207345065035, "grad_norm": 2.186745048637725, "learning_rate": 1.217444238900245e-05, "loss": 0.6878, "step": 5847 }, { "epoch": 0.8948737566947207, "grad_norm": 2.3464306103822428, "learning_rate": 1.2172023435324336e-05, "loss": 0.6573, "step": 5848 }, { "epoch": 0.895026778882938, "grad_norm": 2.1034600448438043, "learning_rate": 1.2169604348253772e-05, "loss": 0.7129, "step": 5849 }, { "epoch": 0.8951798010711554, "grad_norm": 2.4272691681079164, "learning_rate": 1.2167185127939314e-05, "loss": 0.8234, "step": 5850 }, { "epoch": 0.8953328232593726, "grad_norm": 2.3142269494809873, "learning_rate": 1.2164765774529541e-05, "loss": 0.8082, "step": 5851 }, { "epoch": 0.8954858454475899, "grad_norm": 2.2118916809347557, "learning_rate": 1.2162346288173033e-05, "loss": 0.7194, "step": 5852 }, { "epoch": 0.8956388676358072, "grad_norm": 2.654478274331671, "learning_rate": 1.215992666901838e-05, "loss": 0.7258, "step": 5853 }, { "epoch": 0.8957918898240245, "grad_norm": 2.1917989705330174, "learning_rate": 1.2157506917214179e-05, "loss": 0.7053, "step": 5854 }, { "epoch": 0.8959449120122418, "grad_norm": 2.295237595115881, "learning_rate": 1.215508703290904e-05, "loss": 0.8176, "step": 5855 }, { "epoch": 0.896097934200459, "grad_norm": 2.0125203316125724, "learning_rate": 1.2152667016251575e-05, "loss": 0.6165, "step": 5856 }, { "epoch": 0.8962509563886764, "grad_norm": 2.137048919743919, "learning_rate": 1.2150246867390407e-05, "loss": 0.6266, "step": 5857 }, { "epoch": 0.8964039785768937, "grad_norm": 2.3382790666248825, "learning_rate": 1.2147826586474168e-05, "loss": 0.7635, "step": 5858 }, { "epoch": 0.8965570007651109, "grad_norm": 2.1318790415160183, "learning_rate": 1.2145406173651497e-05, "loss": 0.6476, "step": 5859 }, { "epoch": 0.8967100229533282, "grad_norm": 2.169522268623894, "learning_rate": 1.2142985629071037e-05, "loss": 0.6822, "step": 5860 }, { "epoch": 0.8968630451415456, "grad_norm": 2.0656815045352417, "learning_rate": 1.2140564952881446e-05, "loss": 0.5719, "step": 5861 }, { "epoch": 0.8970160673297628, "grad_norm": 2.0940298380060023, "learning_rate": 1.2138144145231387e-05, "loss": 0.6189, "step": 5862 }, { "epoch": 0.8971690895179801, "grad_norm": 2.1912680322446243, "learning_rate": 1.2135723206269535e-05, "loss": 0.7313, "step": 5863 }, { "epoch": 0.8973221117061974, "grad_norm": 2.042693256307619, "learning_rate": 1.2133302136144564e-05, "loss": 0.5883, "step": 5864 }, { "epoch": 0.8974751338944147, "grad_norm": 2.162572034162192, "learning_rate": 1.2130880935005165e-05, "loss": 0.781, "step": 5865 }, { "epoch": 0.897628156082632, "grad_norm": 2.0853415311565113, "learning_rate": 1.212845960300003e-05, "loss": 0.6414, "step": 5866 }, { "epoch": 0.8977811782708492, "grad_norm": 2.306821724711259, "learning_rate": 1.2126038140277863e-05, "loss": 0.6421, "step": 5867 }, { "epoch": 0.8979342004590666, "grad_norm": 2.3155477340665698, "learning_rate": 1.212361654698738e-05, "loss": 0.6216, "step": 5868 }, { "epoch": 0.8980872226472839, "grad_norm": 2.2402002639166936, "learning_rate": 1.2121194823277294e-05, "loss": 0.6306, "step": 5869 }, { "epoch": 0.8982402448355011, "grad_norm": 2.2497974374833793, "learning_rate": 1.2118772969296337e-05, "loss": 0.6934, "step": 5870 }, { "epoch": 0.8983932670237185, "grad_norm": 2.2281448098479264, "learning_rate": 1.2116350985193243e-05, "loss": 0.6245, "step": 5871 }, { "epoch": 0.8985462892119357, "grad_norm": 2.199538303015666, "learning_rate": 1.2113928871116758e-05, "loss": 0.7062, "step": 5872 }, { "epoch": 0.898699311400153, "grad_norm": 2.0709420388921207, "learning_rate": 1.211150662721563e-05, "loss": 0.6909, "step": 5873 }, { "epoch": 0.8988523335883704, "grad_norm": 2.5142622500922154, "learning_rate": 1.2109084253638617e-05, "loss": 0.8169, "step": 5874 }, { "epoch": 0.8990053557765876, "grad_norm": 2.140104777112769, "learning_rate": 1.210666175053449e-05, "loss": 0.6359, "step": 5875 }, { "epoch": 0.8991583779648049, "grad_norm": 2.164375390414777, "learning_rate": 1.2104239118052027e-05, "loss": 0.6977, "step": 5876 }, { "epoch": 0.8993114001530221, "grad_norm": 2.1929526697694, "learning_rate": 1.2101816356340004e-05, "loss": 0.6797, "step": 5877 }, { "epoch": 0.8994644223412395, "grad_norm": 2.3775107486140894, "learning_rate": 1.2099393465547218e-05, "loss": 0.8075, "step": 5878 }, { "epoch": 0.8996174445294568, "grad_norm": 2.19374146725586, "learning_rate": 1.2096970445822467e-05, "loss": 0.6913, "step": 5879 }, { "epoch": 0.899770466717674, "grad_norm": 2.356353536060419, "learning_rate": 1.209454729731456e-05, "loss": 0.6564, "step": 5880 }, { "epoch": 0.8999234889058914, "grad_norm": 2.2970947492536826, "learning_rate": 1.2092124020172304e-05, "loss": 0.7165, "step": 5881 }, { "epoch": 0.9000765110941087, "grad_norm": 2.2264397828108335, "learning_rate": 1.208970061454453e-05, "loss": 0.7145, "step": 5882 }, { "epoch": 0.9002295332823259, "grad_norm": 2.2895364047526754, "learning_rate": 1.2087277080580064e-05, "loss": 0.7901, "step": 5883 }, { "epoch": 0.9003825554705432, "grad_norm": 2.6582101773237494, "learning_rate": 1.2084853418427754e-05, "loss": 0.729, "step": 5884 }, { "epoch": 0.9005355776587605, "grad_norm": 2.4029001974784734, "learning_rate": 1.2082429628236433e-05, "loss": 0.663, "step": 5885 }, { "epoch": 0.9006885998469778, "grad_norm": 2.2788746677911753, "learning_rate": 1.2080005710154962e-05, "loss": 0.6018, "step": 5886 }, { "epoch": 0.9008416220351951, "grad_norm": 2.2037783198820864, "learning_rate": 1.2077581664332205e-05, "loss": 0.6541, "step": 5887 }, { "epoch": 0.9009946442234124, "grad_norm": 2.5472288046633587, "learning_rate": 1.2075157490917033e-05, "loss": 0.7121, "step": 5888 }, { "epoch": 0.9011476664116297, "grad_norm": 2.263169854265418, "learning_rate": 1.2072733190058319e-05, "loss": 0.723, "step": 5889 }, { "epoch": 0.901300688599847, "grad_norm": 2.565421276878311, "learning_rate": 1.2070308761904949e-05, "loss": 0.7627, "step": 5890 }, { "epoch": 0.9014537107880642, "grad_norm": 2.33516891773541, "learning_rate": 1.2067884206605821e-05, "loss": 0.6602, "step": 5891 }, { "epoch": 0.9016067329762816, "grad_norm": 2.369632550813917, "learning_rate": 1.2065459524309838e-05, "loss": 0.7074, "step": 5892 }, { "epoch": 0.9017597551644988, "grad_norm": 2.245894801979679, "learning_rate": 1.20630347151659e-05, "loss": 0.7945, "step": 5893 }, { "epoch": 0.9019127773527161, "grad_norm": 2.133776733386824, "learning_rate": 1.2060609779322932e-05, "loss": 0.6325, "step": 5894 }, { "epoch": 0.9020657995409335, "grad_norm": 2.42818076004384, "learning_rate": 1.2058184716929858e-05, "loss": 0.8039, "step": 5895 }, { "epoch": 0.9022188217291507, "grad_norm": 2.1864609949528937, "learning_rate": 1.2055759528135604e-05, "loss": 0.6626, "step": 5896 }, { "epoch": 0.902371843917368, "grad_norm": 2.3056942365670166, "learning_rate": 1.2053334213089119e-05, "loss": 0.5935, "step": 5897 }, { "epoch": 0.9025248661055854, "grad_norm": 2.4474426076241613, "learning_rate": 1.2050908771939347e-05, "loss": 0.6779, "step": 5898 }, { "epoch": 0.9026778882938026, "grad_norm": 2.245242913146272, "learning_rate": 1.204848320483524e-05, "loss": 0.672, "step": 5899 }, { "epoch": 0.9028309104820199, "grad_norm": 2.4002007892851376, "learning_rate": 1.2046057511925773e-05, "loss": 0.6885, "step": 5900 }, { "epoch": 0.9029839326702371, "grad_norm": 2.0513556880468804, "learning_rate": 1.2043631693359906e-05, "loss": 0.6375, "step": 5901 }, { "epoch": 0.9031369548584545, "grad_norm": 2.185620019488159, "learning_rate": 1.2041205749286617e-05, "loss": 0.6056, "step": 5902 }, { "epoch": 0.9032899770466718, "grad_norm": 2.5696711510264727, "learning_rate": 1.2038779679854905e-05, "loss": 0.8615, "step": 5903 }, { "epoch": 0.903442999234889, "grad_norm": 2.427270547334622, "learning_rate": 1.2036353485213756e-05, "loss": 0.7441, "step": 5904 }, { "epoch": 0.9035960214231064, "grad_norm": 2.6216911775274254, "learning_rate": 1.2033927165512171e-05, "loss": 0.6633, "step": 5905 }, { "epoch": 0.9037490436113237, "grad_norm": 2.3969872880103655, "learning_rate": 1.2031500720899164e-05, "loss": 0.7292, "step": 5906 }, { "epoch": 0.9039020657995409, "grad_norm": 2.3206810022930435, "learning_rate": 1.2029074151523747e-05, "loss": 0.6184, "step": 5907 }, { "epoch": 0.9040550879877582, "grad_norm": 2.1248876858842998, "learning_rate": 1.2026647457534953e-05, "loss": 0.6063, "step": 5908 }, { "epoch": 0.9042081101759755, "grad_norm": 2.105010540329758, "learning_rate": 1.2024220639081807e-05, "loss": 0.6701, "step": 5909 }, { "epoch": 0.9043611323641928, "grad_norm": 2.390961362332351, "learning_rate": 1.2021793696313355e-05, "loss": 0.8372, "step": 5910 }, { "epoch": 0.9045141545524101, "grad_norm": 2.3906835802026634, "learning_rate": 1.2019366629378642e-05, "loss": 0.6914, "step": 5911 }, { "epoch": 0.9046671767406274, "grad_norm": 2.331666879806483, "learning_rate": 1.2016939438426722e-05, "loss": 0.7861, "step": 5912 }, { "epoch": 0.9048201989288447, "grad_norm": 2.2358574061590173, "learning_rate": 1.2014512123606665e-05, "loss": 0.6219, "step": 5913 }, { "epoch": 0.904973221117062, "grad_norm": 2.106600490640814, "learning_rate": 1.2012084685067535e-05, "loss": 0.7043, "step": 5914 }, { "epoch": 0.9051262433052792, "grad_norm": 2.2539604314687325, "learning_rate": 1.2009657122958413e-05, "loss": 0.6648, "step": 5915 }, { "epoch": 0.9052792654934966, "grad_norm": 2.0671088263249464, "learning_rate": 1.2007229437428387e-05, "loss": 0.5475, "step": 5916 }, { "epoch": 0.9054322876817138, "grad_norm": 2.1790247947962507, "learning_rate": 1.200480162862655e-05, "loss": 0.735, "step": 5917 }, { "epoch": 0.9055853098699311, "grad_norm": 2.160285319705886, "learning_rate": 1.2002373696701999e-05, "loss": 0.6605, "step": 5918 }, { "epoch": 0.9057383320581485, "grad_norm": 2.669653172417057, "learning_rate": 1.1999945641803845e-05, "loss": 0.7651, "step": 5919 }, { "epoch": 0.9058913542463657, "grad_norm": 2.332008350846348, "learning_rate": 1.199751746408121e-05, "loss": 0.7304, "step": 5920 }, { "epoch": 0.906044376434583, "grad_norm": 2.3630854316639627, "learning_rate": 1.199508916368321e-05, "loss": 0.7975, "step": 5921 }, { "epoch": 0.9061973986228004, "grad_norm": 2.1925117093837962, "learning_rate": 1.1992660740758981e-05, "loss": 0.6704, "step": 5922 }, { "epoch": 0.9063504208110176, "grad_norm": 2.2723588771999528, "learning_rate": 1.1990232195457659e-05, "loss": 0.7037, "step": 5923 }, { "epoch": 0.9065034429992349, "grad_norm": 2.3270779882957937, "learning_rate": 1.1987803527928393e-05, "loss": 0.6958, "step": 5924 }, { "epoch": 0.9066564651874521, "grad_norm": 2.2060500688080227, "learning_rate": 1.1985374738320337e-05, "loss": 0.5766, "step": 5925 }, { "epoch": 0.9068094873756695, "grad_norm": 2.2317586164719, "learning_rate": 1.1982945826782651e-05, "loss": 0.6561, "step": 5926 }, { "epoch": 0.9069625095638868, "grad_norm": 2.322289561129461, "learning_rate": 1.19805167934645e-05, "loss": 0.6501, "step": 5927 }, { "epoch": 0.907115531752104, "grad_norm": 2.515443301034443, "learning_rate": 1.197808763851507e-05, "loss": 0.7345, "step": 5928 }, { "epoch": 0.9072685539403214, "grad_norm": 2.2527717426764338, "learning_rate": 1.1975658362083542e-05, "loss": 0.7293, "step": 5929 }, { "epoch": 0.9074215761285387, "grad_norm": 2.444160848812312, "learning_rate": 1.19732289643191e-05, "loss": 0.7744, "step": 5930 }, { "epoch": 0.9075745983167559, "grad_norm": 2.0008299026167546, "learning_rate": 1.1970799445370948e-05, "loss": 0.5623, "step": 5931 }, { "epoch": 0.9077276205049732, "grad_norm": 2.2752478064076023, "learning_rate": 1.1968369805388295e-05, "loss": 0.6978, "step": 5932 }, { "epoch": 0.9078806426931905, "grad_norm": 2.583491640352064, "learning_rate": 1.196594004452035e-05, "loss": 0.7084, "step": 5933 }, { "epoch": 0.9080336648814078, "grad_norm": 2.6861883035754706, "learning_rate": 1.1963510162916339e-05, "loss": 0.754, "step": 5934 }, { "epoch": 0.9081866870696251, "grad_norm": 2.094923336464147, "learning_rate": 1.1961080160725484e-05, "loss": 0.6658, "step": 5935 }, { "epoch": 0.9083397092578424, "grad_norm": 2.3754655574983934, "learning_rate": 1.1958650038097029e-05, "loss": 0.7497, "step": 5936 }, { "epoch": 0.9084927314460597, "grad_norm": 2.247236567509479, "learning_rate": 1.1956219795180209e-05, "loss": 0.6799, "step": 5937 }, { "epoch": 0.908645753634277, "grad_norm": 2.176455805806765, "learning_rate": 1.1953789432124279e-05, "loss": 0.7143, "step": 5938 }, { "epoch": 0.9087987758224942, "grad_norm": 2.1382985224906443, "learning_rate": 1.1951358949078497e-05, "loss": 0.6692, "step": 5939 }, { "epoch": 0.9089517980107116, "grad_norm": 2.2784821848914487, "learning_rate": 1.1948928346192128e-05, "loss": 0.7507, "step": 5940 }, { "epoch": 0.9091048201989288, "grad_norm": 2.094814546778022, "learning_rate": 1.1946497623614448e-05, "loss": 0.5985, "step": 5941 }, { "epoch": 0.9092578423871461, "grad_norm": 2.0659105778735283, "learning_rate": 1.194406678149473e-05, "loss": 0.5888, "step": 5942 }, { "epoch": 0.9094108645753635, "grad_norm": 2.529484983416761, "learning_rate": 1.1941635819982267e-05, "loss": 0.7306, "step": 5943 }, { "epoch": 0.9095638867635807, "grad_norm": 2.316500778173214, "learning_rate": 1.1939204739226353e-05, "loss": 0.6232, "step": 5944 }, { "epoch": 0.909716908951798, "grad_norm": 2.2416933756176545, "learning_rate": 1.193677353937629e-05, "loss": 0.6686, "step": 5945 }, { "epoch": 0.9098699311400154, "grad_norm": 2.296889261532724, "learning_rate": 1.1934342220581384e-05, "loss": 0.5793, "step": 5946 }, { "epoch": 0.9100229533282326, "grad_norm": 2.4069543424400677, "learning_rate": 1.193191078299096e-05, "loss": 0.8436, "step": 5947 }, { "epoch": 0.9101759755164499, "grad_norm": 2.162792615318407, "learning_rate": 1.1929479226754338e-05, "loss": 0.7032, "step": 5948 }, { "epoch": 0.9103289977046671, "grad_norm": 2.2406697992043254, "learning_rate": 1.1927047552020845e-05, "loss": 0.6408, "step": 5949 }, { "epoch": 0.9104820198928845, "grad_norm": 2.4229962303297103, "learning_rate": 1.1924615758939824e-05, "loss": 0.6942, "step": 5950 }, { "epoch": 0.9106350420811018, "grad_norm": 2.467996134227574, "learning_rate": 1.192218384766062e-05, "loss": 0.782, "step": 5951 }, { "epoch": 0.910788064269319, "grad_norm": 2.4342339180377603, "learning_rate": 1.1919751818332586e-05, "loss": 0.688, "step": 5952 }, { "epoch": 0.9109410864575364, "grad_norm": 2.1932437749285167, "learning_rate": 1.191731967110508e-05, "loss": 0.6378, "step": 5953 }, { "epoch": 0.9110941086457537, "grad_norm": 2.055801571139704, "learning_rate": 1.1914887406127478e-05, "loss": 0.6698, "step": 5954 }, { "epoch": 0.9112471308339709, "grad_norm": 2.2684464246899, "learning_rate": 1.1912455023549147e-05, "loss": 0.6851, "step": 5955 }, { "epoch": 0.9114001530221882, "grad_norm": 2.5037197092461922, "learning_rate": 1.1910022523519468e-05, "loss": 0.7673, "step": 5956 }, { "epoch": 0.9115531752104055, "grad_norm": 1.9131788391694138, "learning_rate": 1.1907589906187837e-05, "loss": 0.5243, "step": 5957 }, { "epoch": 0.9117061973986228, "grad_norm": 2.234134469025724, "learning_rate": 1.1905157171703651e-05, "loss": 0.6361, "step": 5958 }, { "epoch": 0.9118592195868401, "grad_norm": 2.210064411480152, "learning_rate": 1.1902724320216304e-05, "loss": 0.6753, "step": 5959 }, { "epoch": 0.9120122417750574, "grad_norm": 2.3378206504402086, "learning_rate": 1.1900291351875215e-05, "loss": 0.6271, "step": 5960 }, { "epoch": 0.9121652639632747, "grad_norm": 2.2103620527103556, "learning_rate": 1.18978582668298e-05, "loss": 0.6063, "step": 5961 }, { "epoch": 0.912318286151492, "grad_norm": 2.265094089348472, "learning_rate": 1.1895425065229487e-05, "loss": 0.6786, "step": 5962 }, { "epoch": 0.9124713083397092, "grad_norm": 2.1542940090981553, "learning_rate": 1.1892991747223704e-05, "loss": 0.6719, "step": 5963 }, { "epoch": 0.9126243305279266, "grad_norm": 2.1440181946848904, "learning_rate": 1.189055831296189e-05, "loss": 0.5932, "step": 5964 }, { "epoch": 0.9127773527161438, "grad_norm": 2.0963045032970906, "learning_rate": 1.1888124762593496e-05, "loss": 0.6104, "step": 5965 }, { "epoch": 0.9129303749043611, "grad_norm": 2.5262671669907633, "learning_rate": 1.1885691096267975e-05, "loss": 0.717, "step": 5966 }, { "epoch": 0.9130833970925785, "grad_norm": 2.2639544983140274, "learning_rate": 1.1883257314134787e-05, "loss": 0.6589, "step": 5967 }, { "epoch": 0.9132364192807957, "grad_norm": 2.020210103595384, "learning_rate": 1.1880823416343397e-05, "loss": 0.6291, "step": 5968 }, { "epoch": 0.913389441469013, "grad_norm": 2.1045346358957007, "learning_rate": 1.1878389403043284e-05, "loss": 0.6635, "step": 5969 }, { "epoch": 0.9135424636572304, "grad_norm": 2.0845494979682653, "learning_rate": 1.1875955274383934e-05, "loss": 0.6641, "step": 5970 }, { "epoch": 0.9136954858454476, "grad_norm": 2.449931136792063, "learning_rate": 1.1873521030514826e-05, "loss": 0.7608, "step": 5971 }, { "epoch": 0.9138485080336649, "grad_norm": 2.4427533981065226, "learning_rate": 1.1871086671585465e-05, "loss": 0.804, "step": 5972 }, { "epoch": 0.9140015302218821, "grad_norm": 2.196649531828342, "learning_rate": 1.1868652197745351e-05, "loss": 0.5977, "step": 5973 }, { "epoch": 0.9141545524100995, "grad_norm": 2.0401281352602343, "learning_rate": 1.1866217609143998e-05, "loss": 0.7304, "step": 5974 }, { "epoch": 0.9143075745983168, "grad_norm": 2.2225569238419274, "learning_rate": 1.1863782905930918e-05, "loss": 0.6879, "step": 5975 }, { "epoch": 0.914460596786534, "grad_norm": 2.415433043337009, "learning_rate": 1.186134808825564e-05, "loss": 0.7098, "step": 5976 }, { "epoch": 0.9146136189747514, "grad_norm": 2.1208798515694767, "learning_rate": 1.1858913156267694e-05, "loss": 0.638, "step": 5977 }, { "epoch": 0.9147666411629687, "grad_norm": 2.186872985982911, "learning_rate": 1.1856478110116613e-05, "loss": 0.6302, "step": 5978 }, { "epoch": 0.9149196633511859, "grad_norm": 2.153919248477717, "learning_rate": 1.1854042949951958e-05, "loss": 0.7054, "step": 5979 }, { "epoch": 0.9150726855394032, "grad_norm": 2.397388197874348, "learning_rate": 1.1851607675923269e-05, "loss": 0.7276, "step": 5980 }, { "epoch": 0.9152257077276205, "grad_norm": 2.0648750992816165, "learning_rate": 1.1849172288180106e-05, "loss": 0.6745, "step": 5981 }, { "epoch": 0.9153787299158378, "grad_norm": 2.067192519982237, "learning_rate": 1.1846736786872042e-05, "loss": 0.6398, "step": 5982 }, { "epoch": 0.9155317521040551, "grad_norm": 2.045318240934085, "learning_rate": 1.1844301172148649e-05, "loss": 0.6044, "step": 5983 }, { "epoch": 0.9156847742922724, "grad_norm": 2.1862827497217547, "learning_rate": 1.1841865444159502e-05, "loss": 0.69, "step": 5984 }, { "epoch": 0.9158377964804897, "grad_norm": 2.411569778709733, "learning_rate": 1.1839429603054195e-05, "loss": 0.6993, "step": 5985 }, { "epoch": 0.9159908186687069, "grad_norm": 2.163870964236157, "learning_rate": 1.183699364898232e-05, "loss": 0.7071, "step": 5986 }, { "epoch": 0.9161438408569242, "grad_norm": 2.2216540837646543, "learning_rate": 1.1834557582093478e-05, "loss": 0.7181, "step": 5987 }, { "epoch": 0.9162968630451416, "grad_norm": 2.4990352433843683, "learning_rate": 1.183212140253728e-05, "loss": 0.76, "step": 5988 }, { "epoch": 0.9164498852333588, "grad_norm": 2.4982223206565495, "learning_rate": 1.1829685110463339e-05, "loss": 0.7631, "step": 5989 }, { "epoch": 0.9166029074215761, "grad_norm": 2.5748308659023333, "learning_rate": 1.1827248706021274e-05, "loss": 0.6416, "step": 5990 }, { "epoch": 0.9167559296097935, "grad_norm": 2.3410439692766816, "learning_rate": 1.182481218936072e-05, "loss": 0.6934, "step": 5991 }, { "epoch": 0.9169089517980107, "grad_norm": 2.55217646983151, "learning_rate": 1.1822375560631311e-05, "loss": 0.6742, "step": 5992 }, { "epoch": 0.917061973986228, "grad_norm": 2.460463784312502, "learning_rate": 1.181993881998269e-05, "loss": 0.7407, "step": 5993 }, { "epoch": 0.9172149961744452, "grad_norm": 2.094408911904513, "learning_rate": 1.1817501967564503e-05, "loss": 0.5756, "step": 5994 }, { "epoch": 0.9173680183626626, "grad_norm": 2.3667932565329397, "learning_rate": 1.1815065003526417e-05, "loss": 0.6282, "step": 5995 }, { "epoch": 0.9175210405508799, "grad_norm": 2.259307626828805, "learning_rate": 1.1812627928018086e-05, "loss": 0.6107, "step": 5996 }, { "epoch": 0.9176740627390971, "grad_norm": 2.6719058170793133, "learning_rate": 1.1810190741189177e-05, "loss": 0.7363, "step": 5997 }, { "epoch": 0.9178270849273145, "grad_norm": 2.253953083863052, "learning_rate": 1.180775344318938e-05, "loss": 0.6705, "step": 5998 }, { "epoch": 0.9179801071155318, "grad_norm": 2.2485300966189326, "learning_rate": 1.1805316034168373e-05, "loss": 0.6667, "step": 5999 }, { "epoch": 0.918133129303749, "grad_norm": 2.1631800211031407, "learning_rate": 1.1802878514275839e-05, "loss": 0.6333, "step": 6000 }, { "epoch": 0.9182861514919664, "grad_norm": 2.3348928082724445, "learning_rate": 1.1800440883661485e-05, "loss": 0.7369, "step": 6001 }, { "epoch": 0.9184391736801836, "grad_norm": 2.592997532053126, "learning_rate": 1.1798003142475017e-05, "loss": 0.6496, "step": 6002 }, { "epoch": 0.9185921958684009, "grad_norm": 2.2719886310328548, "learning_rate": 1.1795565290866137e-05, "loss": 0.659, "step": 6003 }, { "epoch": 0.9187452180566182, "grad_norm": 2.1908026163259957, "learning_rate": 1.1793127328984572e-05, "loss": 0.6543, "step": 6004 }, { "epoch": 0.9188982402448355, "grad_norm": 2.2403925541148357, "learning_rate": 1.1790689256980042e-05, "loss": 0.7169, "step": 6005 }, { "epoch": 0.9190512624330528, "grad_norm": 2.1158419300219986, "learning_rate": 1.1788251075002277e-05, "loss": 0.6643, "step": 6006 }, { "epoch": 0.9192042846212701, "grad_norm": 2.256627339715977, "learning_rate": 1.178581278320102e-05, "loss": 0.6332, "step": 6007 }, { "epoch": 0.9193573068094874, "grad_norm": 2.2967103658681163, "learning_rate": 1.1783374381726016e-05, "loss": 0.6572, "step": 6008 }, { "epoch": 0.9195103289977047, "grad_norm": 2.3176786154668316, "learning_rate": 1.178093587072701e-05, "loss": 0.6278, "step": 6009 }, { "epoch": 0.9196633511859219, "grad_norm": 2.276335975164193, "learning_rate": 1.1778497250353767e-05, "loss": 0.7025, "step": 6010 }, { "epoch": 0.9198163733741392, "grad_norm": 2.198538269825206, "learning_rate": 1.1776058520756053e-05, "loss": 0.708, "step": 6011 }, { "epoch": 0.9199693955623566, "grad_norm": 2.0093992303204287, "learning_rate": 1.1773619682083632e-05, "loss": 0.5572, "step": 6012 }, { "epoch": 0.9201224177505738, "grad_norm": 2.147072844426607, "learning_rate": 1.1771180734486292e-05, "loss": 0.6686, "step": 6013 }, { "epoch": 0.9202754399387911, "grad_norm": 2.120096783175212, "learning_rate": 1.1768741678113814e-05, "loss": 0.692, "step": 6014 }, { "epoch": 0.9204284621270085, "grad_norm": 2.18494526069704, "learning_rate": 1.1766302513115988e-05, "loss": 0.6957, "step": 6015 }, { "epoch": 0.9205814843152257, "grad_norm": 2.183623529461029, "learning_rate": 1.1763863239642617e-05, "loss": 0.6199, "step": 6016 }, { "epoch": 0.920734506503443, "grad_norm": 2.2086171453443346, "learning_rate": 1.1761423857843504e-05, "loss": 0.6994, "step": 6017 }, { "epoch": 0.9208875286916602, "grad_norm": 2.175576432459934, "learning_rate": 1.175898436786846e-05, "loss": 0.6868, "step": 6018 }, { "epoch": 0.9210405508798776, "grad_norm": 2.18905913642669, "learning_rate": 1.1756544769867304e-05, "loss": 0.6596, "step": 6019 }, { "epoch": 0.9211935730680949, "grad_norm": 2.1552159728330467, "learning_rate": 1.1754105063989865e-05, "loss": 0.628, "step": 6020 }, { "epoch": 0.9213465952563121, "grad_norm": 2.352853585156651, "learning_rate": 1.175166525038597e-05, "loss": 0.7225, "step": 6021 }, { "epoch": 0.9214996174445295, "grad_norm": 2.2186163270481645, "learning_rate": 1.1749225329205458e-05, "loss": 0.5838, "step": 6022 }, { "epoch": 0.9216526396327468, "grad_norm": 2.316156419640685, "learning_rate": 1.1746785300598178e-05, "loss": 0.6525, "step": 6023 }, { "epoch": 0.921805661820964, "grad_norm": 2.5308854754596917, "learning_rate": 1.174434516471398e-05, "loss": 0.6507, "step": 6024 }, { "epoch": 0.9219586840091814, "grad_norm": 2.038668645225144, "learning_rate": 1.1741904921702718e-05, "loss": 0.7139, "step": 6025 }, { "epoch": 0.9221117061973986, "grad_norm": 2.118866474655756, "learning_rate": 1.1739464571714263e-05, "loss": 0.6408, "step": 6026 }, { "epoch": 0.9222647283856159, "grad_norm": 2.4219096622082366, "learning_rate": 1.1737024114898483e-05, "loss": 0.9817, "step": 6027 }, { "epoch": 0.9224177505738332, "grad_norm": 2.3920546815691632, "learning_rate": 1.1734583551405257e-05, "loss": 0.731, "step": 6028 }, { "epoch": 0.9225707727620505, "grad_norm": 2.2898608411847734, "learning_rate": 1.1732142881384469e-05, "loss": 0.7989, "step": 6029 }, { "epoch": 0.9227237949502678, "grad_norm": 2.275071139556073, "learning_rate": 1.1729702104986011e-05, "loss": 0.6128, "step": 6030 }, { "epoch": 0.9228768171384851, "grad_norm": 2.2660964406470048, "learning_rate": 1.1727261222359781e-05, "loss": 0.7475, "step": 6031 }, { "epoch": 0.9230298393267024, "grad_norm": 1.9638194324248897, "learning_rate": 1.172482023365568e-05, "loss": 0.6469, "step": 6032 }, { "epoch": 0.9231828615149197, "grad_norm": 2.061803171551883, "learning_rate": 1.1722379139023623e-05, "loss": 0.7059, "step": 6033 }, { "epoch": 0.9233358837031369, "grad_norm": 2.2447062635254897, "learning_rate": 1.1719937938613523e-05, "loss": 0.6911, "step": 6034 }, { "epoch": 0.9234889058913542, "grad_norm": 2.1651544199285295, "learning_rate": 1.1717496632575304e-05, "loss": 0.5678, "step": 6035 }, { "epoch": 0.9236419280795716, "grad_norm": 2.4604106984706773, "learning_rate": 1.1715055221058903e-05, "loss": 0.7333, "step": 6036 }, { "epoch": 0.9237949502677888, "grad_norm": 2.0659823412706104, "learning_rate": 1.171261370421425e-05, "loss": 0.5865, "step": 6037 }, { "epoch": 0.9239479724560061, "grad_norm": 2.2467918440005628, "learning_rate": 1.1710172082191288e-05, "loss": 0.6591, "step": 6038 }, { "epoch": 0.9241009946442235, "grad_norm": 2.1449493890857787, "learning_rate": 1.170773035513997e-05, "loss": 0.7061, "step": 6039 }, { "epoch": 0.9242540168324407, "grad_norm": 2.2560181924361755, "learning_rate": 1.1705288523210253e-05, "loss": 0.6814, "step": 6040 }, { "epoch": 0.924407039020658, "grad_norm": 2.141905549714062, "learning_rate": 1.1702846586552088e-05, "loss": 0.6385, "step": 6041 }, { "epoch": 0.9245600612088752, "grad_norm": 2.2548252680890135, "learning_rate": 1.1700404545315458e-05, "loss": 0.6793, "step": 6042 }, { "epoch": 0.9247130833970926, "grad_norm": 2.482889662318577, "learning_rate": 1.1697962399650333e-05, "loss": 0.7582, "step": 6043 }, { "epoch": 0.9248661055853099, "grad_norm": 2.5506460166887264, "learning_rate": 1.1695520149706693e-05, "loss": 0.7141, "step": 6044 }, { "epoch": 0.9250191277735271, "grad_norm": 2.145699244583287, "learning_rate": 1.1693077795634531e-05, "loss": 0.6328, "step": 6045 }, { "epoch": 0.9251721499617445, "grad_norm": 2.4752175224389577, "learning_rate": 1.1690635337583836e-05, "loss": 0.6299, "step": 6046 }, { "epoch": 0.9253251721499618, "grad_norm": 2.2683649200809675, "learning_rate": 1.168819277570461e-05, "loss": 0.6869, "step": 6047 }, { "epoch": 0.925478194338179, "grad_norm": 2.2004469435637457, "learning_rate": 1.168575011014686e-05, "loss": 0.6129, "step": 6048 }, { "epoch": 0.9256312165263963, "grad_norm": 2.4371132806076994, "learning_rate": 1.1683307341060608e-05, "loss": 0.7319, "step": 6049 }, { "epoch": 0.9257842387146136, "grad_norm": 2.2224151080045402, "learning_rate": 1.168086446859586e-05, "loss": 0.6627, "step": 6050 }, { "epoch": 0.9259372609028309, "grad_norm": 2.0430737085760287, "learning_rate": 1.1678421492902652e-05, "loss": 0.6371, "step": 6051 }, { "epoch": 0.9260902830910482, "grad_norm": 2.253282435206617, "learning_rate": 1.1675978414131013e-05, "loss": 0.6545, "step": 6052 }, { "epoch": 0.9262433052792655, "grad_norm": 2.2800025754564257, "learning_rate": 1.1673535232430986e-05, "loss": 0.6834, "step": 6053 }, { "epoch": 0.9263963274674828, "grad_norm": 2.107954815478626, "learning_rate": 1.1671091947952611e-05, "loss": 0.5648, "step": 6054 }, { "epoch": 0.9265493496557001, "grad_norm": 2.4385308339899248, "learning_rate": 1.1668648560845944e-05, "loss": 0.6888, "step": 6055 }, { "epoch": 0.9267023718439173, "grad_norm": 2.2467214330301633, "learning_rate": 1.1666205071261038e-05, "loss": 0.7371, "step": 6056 }, { "epoch": 0.9268553940321347, "grad_norm": 2.507942422969182, "learning_rate": 1.1663761479347964e-05, "loss": 0.7353, "step": 6057 }, { "epoch": 0.9270084162203519, "grad_norm": 2.196286306638163, "learning_rate": 1.1661317785256787e-05, "loss": 0.717, "step": 6058 }, { "epoch": 0.9271614384085692, "grad_norm": 2.2098159183285606, "learning_rate": 1.1658873989137585e-05, "loss": 0.7388, "step": 6059 }, { "epoch": 0.9273144605967866, "grad_norm": 2.539805991550103, "learning_rate": 1.165643009114044e-05, "loss": 0.7721, "step": 6060 }, { "epoch": 0.9274674827850038, "grad_norm": 2.6044090069624524, "learning_rate": 1.165398609141545e-05, "loss": 0.6935, "step": 6061 }, { "epoch": 0.9276205049732211, "grad_norm": 2.384150928114248, "learning_rate": 1.1651541990112698e-05, "loss": 0.7076, "step": 6062 }, { "epoch": 0.9277735271614385, "grad_norm": 2.4976738478057343, "learning_rate": 1.164909778738229e-05, "loss": 0.7126, "step": 6063 }, { "epoch": 0.9279265493496557, "grad_norm": 2.2947249137280923, "learning_rate": 1.164665348337434e-05, "loss": 0.6781, "step": 6064 }, { "epoch": 0.928079571537873, "grad_norm": 2.3117604725656293, "learning_rate": 1.164420907823896e-05, "loss": 0.6601, "step": 6065 }, { "epoch": 0.9282325937260902, "grad_norm": 2.1388291258950605, "learning_rate": 1.1641764572126262e-05, "loss": 0.66, "step": 6066 }, { "epoch": 0.9283856159143076, "grad_norm": 2.2545193918675506, "learning_rate": 1.1639319965186382e-05, "loss": 0.6792, "step": 6067 }, { "epoch": 0.9285386381025249, "grad_norm": 2.2752674938211506, "learning_rate": 1.1636875257569448e-05, "loss": 0.6095, "step": 6068 }, { "epoch": 0.9286916602907421, "grad_norm": 2.2575997598142723, "learning_rate": 1.1634430449425604e-05, "loss": 0.6722, "step": 6069 }, { "epoch": 0.9288446824789595, "grad_norm": 2.2098103933749904, "learning_rate": 1.1631985540904991e-05, "loss": 0.6803, "step": 6070 }, { "epoch": 0.9289977046671768, "grad_norm": 2.294225796155611, "learning_rate": 1.1629540532157758e-05, "loss": 0.7256, "step": 6071 }, { "epoch": 0.929150726855394, "grad_norm": 2.458554414220298, "learning_rate": 1.1627095423334072e-05, "loss": 0.802, "step": 6072 }, { "epoch": 0.9293037490436113, "grad_norm": 2.3884126063112157, "learning_rate": 1.1624650214584088e-05, "loss": 0.7631, "step": 6073 }, { "epoch": 0.9294567712318286, "grad_norm": 2.1963002613069618, "learning_rate": 1.1622204906057979e-05, "loss": 0.6559, "step": 6074 }, { "epoch": 0.9296097934200459, "grad_norm": 2.3884546744501933, "learning_rate": 1.161975949790592e-05, "loss": 0.6752, "step": 6075 }, { "epoch": 0.9297628156082632, "grad_norm": 2.2694496861180737, "learning_rate": 1.1617313990278093e-05, "loss": 0.6918, "step": 6076 }, { "epoch": 0.9299158377964805, "grad_norm": 2.3172171693412937, "learning_rate": 1.161486838332469e-05, "loss": 0.6784, "step": 6077 }, { "epoch": 0.9300688599846978, "grad_norm": 2.1329383279977665, "learning_rate": 1.16124226771959e-05, "loss": 0.5508, "step": 6078 }, { "epoch": 0.9302218821729151, "grad_norm": 2.293796679557291, "learning_rate": 1.1609976872041924e-05, "loss": 0.658, "step": 6079 }, { "epoch": 0.9303749043611323, "grad_norm": 2.210358770465247, "learning_rate": 1.1607530968012971e-05, "loss": 0.7028, "step": 6080 }, { "epoch": 0.9305279265493497, "grad_norm": 2.213691145452231, "learning_rate": 1.1605084965259256e-05, "loss": 0.615, "step": 6081 }, { "epoch": 0.9306809487375669, "grad_norm": 2.4649032540344344, "learning_rate": 1.160263886393099e-05, "loss": 0.7268, "step": 6082 }, { "epoch": 0.9308339709257842, "grad_norm": 2.017207465174122, "learning_rate": 1.1600192664178405e-05, "loss": 0.6671, "step": 6083 }, { "epoch": 0.9309869931140016, "grad_norm": 1.9259330149007499, "learning_rate": 1.1597746366151725e-05, "loss": 0.5948, "step": 6084 }, { "epoch": 0.9311400153022188, "grad_norm": 2.167872374879758, "learning_rate": 1.159529997000119e-05, "loss": 0.7672, "step": 6085 }, { "epoch": 0.9312930374904361, "grad_norm": 2.551354194085087, "learning_rate": 1.1592853475877049e-05, "loss": 0.7649, "step": 6086 }, { "epoch": 0.9314460596786535, "grad_norm": 2.3450320571655348, "learning_rate": 1.159040688392954e-05, "loss": 0.6592, "step": 6087 }, { "epoch": 0.9315990818668707, "grad_norm": 2.413844869725996, "learning_rate": 1.158796019430892e-05, "loss": 0.7055, "step": 6088 }, { "epoch": 0.931752104055088, "grad_norm": 2.0778539002805014, "learning_rate": 1.1585513407165456e-05, "loss": 0.578, "step": 6089 }, { "epoch": 0.9319051262433052, "grad_norm": 2.286133113069323, "learning_rate": 1.1583066522649413e-05, "loss": 0.7993, "step": 6090 }, { "epoch": 0.9320581484315226, "grad_norm": 2.1857984709529985, "learning_rate": 1.1580619540911058e-05, "loss": 0.7388, "step": 6091 }, { "epoch": 0.9322111706197399, "grad_norm": 2.4184829856903547, "learning_rate": 1.1578172462100676e-05, "loss": 0.6769, "step": 6092 }, { "epoch": 0.9323641928079571, "grad_norm": 2.2057962215002744, "learning_rate": 1.157572528636855e-05, "loss": 0.6771, "step": 6093 }, { "epoch": 0.9325172149961745, "grad_norm": 1.9624533908826989, "learning_rate": 1.1573278013864968e-05, "loss": 0.5544, "step": 6094 }, { "epoch": 0.9326702371843917, "grad_norm": 2.12121986199544, "learning_rate": 1.1570830644740227e-05, "loss": 0.6088, "step": 6095 }, { "epoch": 0.932823259372609, "grad_norm": 2.2944618881122474, "learning_rate": 1.1568383179144634e-05, "loss": 0.7788, "step": 6096 }, { "epoch": 0.9329762815608263, "grad_norm": 2.2593200428155256, "learning_rate": 1.1565935617228492e-05, "loss": 0.5957, "step": 6097 }, { "epoch": 0.9331293037490436, "grad_norm": 2.252447232896904, "learning_rate": 1.1563487959142121e-05, "loss": 0.6521, "step": 6098 }, { "epoch": 0.9332823259372609, "grad_norm": 2.5056513509402656, "learning_rate": 1.1561040205035835e-05, "loss": 0.8213, "step": 6099 }, { "epoch": 0.9334353481254782, "grad_norm": 2.247790538197072, "learning_rate": 1.1558592355059965e-05, "loss": 0.6064, "step": 6100 }, { "epoch": 0.9335883703136955, "grad_norm": 2.1827896418062407, "learning_rate": 1.1556144409364838e-05, "loss": 0.6262, "step": 6101 }, { "epoch": 0.9337413925019128, "grad_norm": 2.5213596631860202, "learning_rate": 1.15536963681008e-05, "loss": 0.714, "step": 6102 }, { "epoch": 0.93389441469013, "grad_norm": 2.139941754037212, "learning_rate": 1.1551248231418188e-05, "loss": 0.6674, "step": 6103 }, { "epoch": 0.9340474368783473, "grad_norm": 2.2803009133909513, "learning_rate": 1.154879999946735e-05, "loss": 0.6406, "step": 6104 }, { "epoch": 0.9342004590665647, "grad_norm": 2.317103896037347, "learning_rate": 1.154635167239865e-05, "loss": 0.7685, "step": 6105 }, { "epoch": 0.9343534812547819, "grad_norm": 2.1500102693630345, "learning_rate": 1.1543903250362446e-05, "loss": 0.6668, "step": 6106 }, { "epoch": 0.9345065034429992, "grad_norm": 2.132728448753486, "learning_rate": 1.1541454733509096e-05, "loss": 0.6863, "step": 6107 }, { "epoch": 0.9346595256312166, "grad_norm": 2.208420311449703, "learning_rate": 1.1539006121988984e-05, "loss": 0.751, "step": 6108 }, { "epoch": 0.9348125478194338, "grad_norm": 2.24257468362804, "learning_rate": 1.1536557415952488e-05, "loss": 0.6207, "step": 6109 }, { "epoch": 0.9349655700076511, "grad_norm": 2.504464488973117, "learning_rate": 1.1534108615549988e-05, "loss": 0.7274, "step": 6110 }, { "epoch": 0.9351185921958683, "grad_norm": 2.004035147085841, "learning_rate": 1.1531659720931877e-05, "loss": 0.6282, "step": 6111 }, { "epoch": 0.9352716143840857, "grad_norm": 2.095751079721034, "learning_rate": 1.152921073224855e-05, "loss": 0.587, "step": 6112 }, { "epoch": 0.935424636572303, "grad_norm": 2.0794742962572843, "learning_rate": 1.152676164965041e-05, "loss": 0.6929, "step": 6113 }, { "epoch": 0.9355776587605202, "grad_norm": 2.2462164854122606, "learning_rate": 1.1524312473287866e-05, "loss": 0.6546, "step": 6114 }, { "epoch": 0.9357306809487376, "grad_norm": 2.2331729878802467, "learning_rate": 1.1521863203311327e-05, "loss": 0.7246, "step": 6115 }, { "epoch": 0.9358837031369549, "grad_norm": 2.2690525367182928, "learning_rate": 1.1519413839871218e-05, "loss": 0.7247, "step": 6116 }, { "epoch": 0.9360367253251721, "grad_norm": 2.009458991477944, "learning_rate": 1.1516964383117957e-05, "loss": 0.6288, "step": 6117 }, { "epoch": 0.9361897475133895, "grad_norm": 2.456698580139736, "learning_rate": 1.1514514833201981e-05, "loss": 0.7207, "step": 6118 }, { "epoch": 0.9363427697016067, "grad_norm": 1.9635176193390491, "learning_rate": 1.1512065190273728e-05, "loss": 0.6222, "step": 6119 }, { "epoch": 0.936495791889824, "grad_norm": 2.2822880914918287, "learning_rate": 1.150961545448363e-05, "loss": 0.697, "step": 6120 }, { "epoch": 0.9366488140780413, "grad_norm": 2.1295174618645745, "learning_rate": 1.1507165625982144e-05, "loss": 0.6507, "step": 6121 }, { "epoch": 0.9368018362662586, "grad_norm": 2.1926894823041376, "learning_rate": 1.1504715704919722e-05, "loss": 0.6582, "step": 6122 }, { "epoch": 0.9369548584544759, "grad_norm": 2.5892530861553373, "learning_rate": 1.1502265691446821e-05, "loss": 0.6738, "step": 6123 }, { "epoch": 0.9371078806426932, "grad_norm": 2.068185855054658, "learning_rate": 1.1499815585713909e-05, "loss": 0.6418, "step": 6124 }, { "epoch": 0.9372609028309105, "grad_norm": 2.2803267411959394, "learning_rate": 1.1497365387871454e-05, "loss": 0.5819, "step": 6125 }, { "epoch": 0.9374139250191278, "grad_norm": 2.1349957197936713, "learning_rate": 1.1494915098069927e-05, "loss": 0.5879, "step": 6126 }, { "epoch": 0.937566947207345, "grad_norm": 2.5188656753058507, "learning_rate": 1.1492464716459824e-05, "loss": 0.6856, "step": 6127 }, { "epoch": 0.9377199693955623, "grad_norm": 2.0389553034383265, "learning_rate": 1.1490014243191621e-05, "loss": 0.5911, "step": 6128 }, { "epoch": 0.9378729915837797, "grad_norm": 2.356383652380313, "learning_rate": 1.1487563678415812e-05, "loss": 0.7431, "step": 6129 }, { "epoch": 0.9380260137719969, "grad_norm": 2.1593952450928033, "learning_rate": 1.14851130222829e-05, "loss": 0.6465, "step": 6130 }, { "epoch": 0.9381790359602142, "grad_norm": 2.4550090735416306, "learning_rate": 1.148266227494339e-05, "loss": 0.7766, "step": 6131 }, { "epoch": 0.9383320581484316, "grad_norm": 2.4549971534102175, "learning_rate": 1.1480211436547783e-05, "loss": 0.68, "step": 6132 }, { "epoch": 0.9384850803366488, "grad_norm": 2.3471098658149447, "learning_rate": 1.1477760507246606e-05, "loss": 0.6494, "step": 6133 }, { "epoch": 0.9386381025248661, "grad_norm": 2.173375519361347, "learning_rate": 1.1475309487190376e-05, "loss": 0.6001, "step": 6134 }, { "epoch": 0.9387911247130833, "grad_norm": 2.5263042796935973, "learning_rate": 1.1472858376529616e-05, "loss": 0.6864, "step": 6135 }, { "epoch": 0.9389441469013007, "grad_norm": 2.177731124432879, "learning_rate": 1.1470407175414864e-05, "loss": 0.6324, "step": 6136 }, { "epoch": 0.939097169089518, "grad_norm": 2.2483120867164, "learning_rate": 1.1467955883996653e-05, "loss": 0.7105, "step": 6137 }, { "epoch": 0.9392501912777352, "grad_norm": 2.3912382526507656, "learning_rate": 1.146550450242553e-05, "loss": 0.6568, "step": 6138 }, { "epoch": 0.9394032134659526, "grad_norm": 2.411089906961605, "learning_rate": 1.146305303085204e-05, "loss": 0.6886, "step": 6139 }, { "epoch": 0.9395562356541699, "grad_norm": 2.2962550676687505, "learning_rate": 1.1460601469426741e-05, "loss": 0.7108, "step": 6140 }, { "epoch": 0.9397092578423871, "grad_norm": 2.2516999284059, "learning_rate": 1.1458149818300192e-05, "loss": 0.6853, "step": 6141 }, { "epoch": 0.9398622800306045, "grad_norm": 2.3487910495210094, "learning_rate": 1.1455698077622959e-05, "loss": 0.6304, "step": 6142 }, { "epoch": 0.9400153022188217, "grad_norm": 2.3253614074896682, "learning_rate": 1.1453246247545612e-05, "loss": 0.7826, "step": 6143 }, { "epoch": 0.940168324407039, "grad_norm": 2.035455850686615, "learning_rate": 1.145079432821873e-05, "loss": 0.6026, "step": 6144 }, { "epoch": 0.9403213465952563, "grad_norm": 2.0392874454271714, "learning_rate": 1.1448342319792886e-05, "loss": 0.7886, "step": 6145 }, { "epoch": 0.9404743687834736, "grad_norm": 2.301481523837578, "learning_rate": 1.1445890222418681e-05, "loss": 0.6503, "step": 6146 }, { "epoch": 0.9406273909716909, "grad_norm": 2.307110238176336, "learning_rate": 1.1443438036246702e-05, "loss": 0.7039, "step": 6147 }, { "epoch": 0.9407804131599082, "grad_norm": 2.355842387023983, "learning_rate": 1.144098576142754e-05, "loss": 0.7409, "step": 6148 }, { "epoch": 0.9409334353481255, "grad_norm": 2.5326182737200673, "learning_rate": 1.1438533398111808e-05, "loss": 0.8251, "step": 6149 }, { "epoch": 0.9410864575363428, "grad_norm": 2.227199426196083, "learning_rate": 1.1436080946450115e-05, "loss": 0.6085, "step": 6150 }, { "epoch": 0.94123947972456, "grad_norm": 2.2345241158679165, "learning_rate": 1.1433628406593069e-05, "loss": 0.6675, "step": 6151 }, { "epoch": 0.9413925019127773, "grad_norm": 1.8715859371306767, "learning_rate": 1.1431175778691297e-05, "loss": 0.5941, "step": 6152 }, { "epoch": 0.9415455241009947, "grad_norm": 2.2216163917317675, "learning_rate": 1.1428723062895421e-05, "loss": 0.6047, "step": 6153 }, { "epoch": 0.9416985462892119, "grad_norm": 1.9294546956579186, "learning_rate": 1.1426270259356071e-05, "loss": 0.5409, "step": 6154 }, { "epoch": 0.9418515684774292, "grad_norm": 2.1209551146155405, "learning_rate": 1.1423817368223886e-05, "loss": 0.6277, "step": 6155 }, { "epoch": 0.9420045906656466, "grad_norm": 2.192111802183402, "learning_rate": 1.1421364389649508e-05, "loss": 0.5492, "step": 6156 }, { "epoch": 0.9421576128538638, "grad_norm": 2.19448860446009, "learning_rate": 1.141891132378358e-05, "loss": 0.6536, "step": 6157 }, { "epoch": 0.9423106350420811, "grad_norm": 2.071398629519424, "learning_rate": 1.1416458170776757e-05, "loss": 0.5741, "step": 6158 }, { "epoch": 0.9424636572302983, "grad_norm": 2.138162941124051, "learning_rate": 1.1414004930779697e-05, "loss": 0.642, "step": 6159 }, { "epoch": 0.9426166794185157, "grad_norm": 2.0968607838531805, "learning_rate": 1.1411551603943065e-05, "loss": 0.6482, "step": 6160 }, { "epoch": 0.942769701606733, "grad_norm": 2.2631243499184577, "learning_rate": 1.1409098190417523e-05, "loss": 0.5649, "step": 6161 }, { "epoch": 0.9429227237949502, "grad_norm": 2.2003646678049993, "learning_rate": 1.1406644690353752e-05, "loss": 0.6781, "step": 6162 }, { "epoch": 0.9430757459831676, "grad_norm": 2.2561519198823246, "learning_rate": 1.1404191103902425e-05, "loss": 0.57, "step": 6163 }, { "epoch": 0.9432287681713849, "grad_norm": 2.362404452628459, "learning_rate": 1.140173743121423e-05, "loss": 0.6441, "step": 6164 }, { "epoch": 0.9433817903596021, "grad_norm": 2.2230712076613153, "learning_rate": 1.1399283672439856e-05, "loss": 0.6328, "step": 6165 }, { "epoch": 0.9435348125478195, "grad_norm": 2.156629373945652, "learning_rate": 1.1396829827729998e-05, "loss": 0.5572, "step": 6166 }, { "epoch": 0.9436878347360367, "grad_norm": 2.184268144546899, "learning_rate": 1.1394375897235354e-05, "loss": 0.6756, "step": 6167 }, { "epoch": 0.943840856924254, "grad_norm": 2.2888378781567225, "learning_rate": 1.1391921881106636e-05, "loss": 0.7744, "step": 6168 }, { "epoch": 0.9439938791124713, "grad_norm": 2.2626375112314685, "learning_rate": 1.1389467779494545e-05, "loss": 0.672, "step": 6169 }, { "epoch": 0.9441469013006886, "grad_norm": 2.1640029277804635, "learning_rate": 1.1387013592549803e-05, "loss": 0.625, "step": 6170 }, { "epoch": 0.9442999234889059, "grad_norm": 2.37356580152094, "learning_rate": 1.1384559320423132e-05, "loss": 0.7834, "step": 6171 }, { "epoch": 0.9444529456771232, "grad_norm": 2.1480740870003228, "learning_rate": 1.1382104963265256e-05, "loss": 0.6343, "step": 6172 }, { "epoch": 0.9446059678653405, "grad_norm": 2.189878831615825, "learning_rate": 1.1379650521226907e-05, "loss": 0.7639, "step": 6173 }, { "epoch": 0.9447589900535578, "grad_norm": 2.2193507647046324, "learning_rate": 1.1377195994458823e-05, "loss": 0.6586, "step": 6174 }, { "epoch": 0.944912012241775, "grad_norm": 2.169829742772359, "learning_rate": 1.1374741383111745e-05, "loss": 0.6261, "step": 6175 }, { "epoch": 0.9450650344299923, "grad_norm": 2.5015537311138467, "learning_rate": 1.137228668733642e-05, "loss": 0.6167, "step": 6176 }, { "epoch": 0.9452180566182097, "grad_norm": 2.225124391565102, "learning_rate": 1.13698319072836e-05, "loss": 0.6276, "step": 6177 }, { "epoch": 0.9453710788064269, "grad_norm": 2.218346428912096, "learning_rate": 1.136737704310405e-05, "loss": 0.6192, "step": 6178 }, { "epoch": 0.9455241009946442, "grad_norm": 2.1116689466249743, "learning_rate": 1.1364922094948521e-05, "loss": 0.6749, "step": 6179 }, { "epoch": 0.9456771231828616, "grad_norm": 2.5467465934570375, "learning_rate": 1.1362467062967785e-05, "loss": 0.733, "step": 6180 }, { "epoch": 0.9458301453710788, "grad_norm": 2.159692074442581, "learning_rate": 1.1360011947312622e-05, "loss": 0.5801, "step": 6181 }, { "epoch": 0.9459831675592961, "grad_norm": 2.2593107197857982, "learning_rate": 1.13575567481338e-05, "loss": 0.7481, "step": 6182 }, { "epoch": 0.9461361897475133, "grad_norm": 2.7468321108243154, "learning_rate": 1.1355101465582109e-05, "loss": 0.6763, "step": 6183 }, { "epoch": 0.9462892119357307, "grad_norm": 2.4697235771987374, "learning_rate": 1.1352646099808338e-05, "loss": 0.7542, "step": 6184 }, { "epoch": 0.946442234123948, "grad_norm": 2.6351990269079355, "learning_rate": 1.1350190650963278e-05, "loss": 0.6509, "step": 6185 }, { "epoch": 0.9465952563121652, "grad_norm": 2.0847983344763823, "learning_rate": 1.1347735119197724e-05, "loss": 0.6195, "step": 6186 }, { "epoch": 0.9467482785003826, "grad_norm": 2.2628289082127613, "learning_rate": 1.1345279504662488e-05, "loss": 0.6789, "step": 6187 }, { "epoch": 0.9469013006885999, "grad_norm": 2.3024156670048446, "learning_rate": 1.1342823807508371e-05, "loss": 0.6697, "step": 6188 }, { "epoch": 0.9470543228768171, "grad_norm": 2.2869700353035065, "learning_rate": 1.1340368027886195e-05, "loss": 0.6546, "step": 6189 }, { "epoch": 0.9472073450650345, "grad_norm": 2.1321757720571153, "learning_rate": 1.1337912165946773e-05, "loss": 0.607, "step": 6190 }, { "epoch": 0.9473603672532517, "grad_norm": 2.0630142586951647, "learning_rate": 1.1335456221840932e-05, "loss": 0.6203, "step": 6191 }, { "epoch": 0.947513389441469, "grad_norm": 2.1348173216555995, "learning_rate": 1.1333000195719498e-05, "loss": 0.6734, "step": 6192 }, { "epoch": 0.9476664116296863, "grad_norm": 2.5299071976646723, "learning_rate": 1.1330544087733311e-05, "loss": 0.7018, "step": 6193 }, { "epoch": 0.9478194338179036, "grad_norm": 2.1658288076024137, "learning_rate": 1.1328087898033204e-05, "loss": 0.7224, "step": 6194 }, { "epoch": 0.9479724560061209, "grad_norm": 2.03617237782173, "learning_rate": 1.1325631626770024e-05, "loss": 0.6653, "step": 6195 }, { "epoch": 0.9481254781943382, "grad_norm": 2.28947007165544, "learning_rate": 1.1323175274094615e-05, "loss": 0.7274, "step": 6196 }, { "epoch": 0.9482785003825555, "grad_norm": 2.2920800270128603, "learning_rate": 1.1320718840157844e-05, "loss": 0.6127, "step": 6197 }, { "epoch": 0.9484315225707728, "grad_norm": 2.2807534856633374, "learning_rate": 1.131826232511056e-05, "loss": 0.7514, "step": 6198 }, { "epoch": 0.94858454475899, "grad_norm": 2.277274760847133, "learning_rate": 1.1315805729103626e-05, "loss": 0.7422, "step": 6199 }, { "epoch": 0.9487375669472073, "grad_norm": 2.3026912902212056, "learning_rate": 1.1313349052287918e-05, "loss": 0.7148, "step": 6200 }, { "epoch": 0.9488905891354247, "grad_norm": 2.27866912590338, "learning_rate": 1.1310892294814308e-05, "loss": 0.6766, "step": 6201 }, { "epoch": 0.9490436113236419, "grad_norm": 2.199628352082259, "learning_rate": 1.130843545683367e-05, "loss": 0.6551, "step": 6202 }, { "epoch": 0.9491966335118592, "grad_norm": 2.3123931122807866, "learning_rate": 1.1305978538496896e-05, "loss": 0.7475, "step": 6203 }, { "epoch": 0.9493496557000765, "grad_norm": 2.2427418927664564, "learning_rate": 1.130352153995487e-05, "loss": 0.7086, "step": 6204 }, { "epoch": 0.9495026778882938, "grad_norm": 2.131171988564861, "learning_rate": 1.1301064461358484e-05, "loss": 0.6465, "step": 6205 }, { "epoch": 0.9496557000765111, "grad_norm": 2.029476964393686, "learning_rate": 1.1298607302858643e-05, "loss": 0.5969, "step": 6206 }, { "epoch": 0.9498087222647283, "grad_norm": 2.352972110165139, "learning_rate": 1.1296150064606244e-05, "loss": 0.7222, "step": 6207 }, { "epoch": 0.9499617444529457, "grad_norm": 2.0387321968883874, "learning_rate": 1.1293692746752201e-05, "loss": 0.5474, "step": 6208 }, { "epoch": 0.950114766641163, "grad_norm": 2.1671336258881344, "learning_rate": 1.1291235349447427e-05, "loss": 0.6044, "step": 6209 }, { "epoch": 0.9502677888293802, "grad_norm": 2.11725262790491, "learning_rate": 1.1288777872842837e-05, "loss": 0.5895, "step": 6210 }, { "epoch": 0.9504208110175976, "grad_norm": 2.175716482784178, "learning_rate": 1.1286320317089354e-05, "loss": 0.673, "step": 6211 }, { "epoch": 0.9505738332058148, "grad_norm": 2.4334045623113507, "learning_rate": 1.1283862682337909e-05, "loss": 0.742, "step": 6212 }, { "epoch": 0.9507268553940321, "grad_norm": 2.2530770860240295, "learning_rate": 1.128140496873944e-05, "loss": 0.6588, "step": 6213 }, { "epoch": 0.9508798775822495, "grad_norm": 2.2343257508068772, "learning_rate": 1.1278947176444872e-05, "loss": 0.7206, "step": 6214 }, { "epoch": 0.9510328997704667, "grad_norm": 2.3492826056594915, "learning_rate": 1.1276489305605157e-05, "loss": 0.681, "step": 6215 }, { "epoch": 0.951185921958684, "grad_norm": 2.249954446637355, "learning_rate": 1.127403135637124e-05, "loss": 0.7103, "step": 6216 }, { "epoch": 0.9513389441469013, "grad_norm": 2.106521792271321, "learning_rate": 1.1271573328894074e-05, "loss": 0.64, "step": 6217 }, { "epoch": 0.9514919663351186, "grad_norm": 1.899570026742498, "learning_rate": 1.1269115223324615e-05, "loss": 0.6032, "step": 6218 }, { "epoch": 0.9516449885233359, "grad_norm": 2.279531297155367, "learning_rate": 1.1266657039813826e-05, "loss": 0.6868, "step": 6219 }, { "epoch": 0.9517980107115531, "grad_norm": 2.2086467285134965, "learning_rate": 1.1264198778512674e-05, "loss": 0.697, "step": 6220 }, { "epoch": 0.9519510328997705, "grad_norm": 2.0313857844134904, "learning_rate": 1.1261740439572126e-05, "loss": 0.6574, "step": 6221 }, { "epoch": 0.9521040550879878, "grad_norm": 1.930961524376918, "learning_rate": 1.1259282023143166e-05, "loss": 0.6359, "step": 6222 }, { "epoch": 0.952257077276205, "grad_norm": 2.2039196753961376, "learning_rate": 1.1256823529376768e-05, "loss": 0.6681, "step": 6223 }, { "epoch": 0.9524100994644223, "grad_norm": 2.455580268575347, "learning_rate": 1.1254364958423917e-05, "loss": 0.787, "step": 6224 }, { "epoch": 0.9525631216526397, "grad_norm": 2.2229067809145566, "learning_rate": 1.1251906310435611e-05, "loss": 0.6966, "step": 6225 }, { "epoch": 0.9527161438408569, "grad_norm": 2.233353532493785, "learning_rate": 1.1249447585562843e-05, "loss": 0.6934, "step": 6226 }, { "epoch": 0.9528691660290742, "grad_norm": 2.2308301822804255, "learning_rate": 1.1246988783956606e-05, "loss": 0.6645, "step": 6227 }, { "epoch": 0.9530221882172915, "grad_norm": 2.6020370006509546, "learning_rate": 1.124452990576791e-05, "loss": 0.7857, "step": 6228 }, { "epoch": 0.9531752104055088, "grad_norm": 2.408349890753668, "learning_rate": 1.1242070951147767e-05, "loss": 0.6858, "step": 6229 }, { "epoch": 0.9533282325937261, "grad_norm": 2.2188435241964544, "learning_rate": 1.1239611920247187e-05, "loss": 0.7378, "step": 6230 }, { "epoch": 0.9534812547819433, "grad_norm": 2.3730115015136746, "learning_rate": 1.123715281321719e-05, "loss": 0.6563, "step": 6231 }, { "epoch": 0.9536342769701607, "grad_norm": 2.2447044972291827, "learning_rate": 1.1234693630208798e-05, "loss": 0.7419, "step": 6232 }, { "epoch": 0.953787299158378, "grad_norm": 2.1414125465799687, "learning_rate": 1.1232234371373041e-05, "loss": 0.6364, "step": 6233 }, { "epoch": 0.9539403213465952, "grad_norm": 2.3749105034101388, "learning_rate": 1.1229775036860948e-05, "loss": 0.7654, "step": 6234 }, { "epoch": 0.9540933435348126, "grad_norm": 2.0829661403047433, "learning_rate": 1.1227315626823562e-05, "loss": 0.6018, "step": 6235 }, { "epoch": 0.9542463657230298, "grad_norm": 2.621822847733121, "learning_rate": 1.1224856141411921e-05, "loss": 0.652, "step": 6236 }, { "epoch": 0.9543993879112471, "grad_norm": 1.8273526527697626, "learning_rate": 1.122239658077707e-05, "loss": 0.6322, "step": 6237 }, { "epoch": 0.9545524100994645, "grad_norm": 2.0097539648147817, "learning_rate": 1.121993694507007e-05, "loss": 0.5612, "step": 6238 }, { "epoch": 0.9547054322876817, "grad_norm": 2.5389843690982983, "learning_rate": 1.1217477234441965e-05, "loss": 0.7735, "step": 6239 }, { "epoch": 0.954858454475899, "grad_norm": 2.1787684248203347, "learning_rate": 1.1215017449043817e-05, "loss": 0.7613, "step": 6240 }, { "epoch": 0.9550114766641163, "grad_norm": 2.2525442589068376, "learning_rate": 1.1212557589026699e-05, "loss": 0.6428, "step": 6241 }, { "epoch": 0.9551644988523336, "grad_norm": 2.3005374778193457, "learning_rate": 1.1210097654541676e-05, "loss": 0.7743, "step": 6242 }, { "epoch": 0.9553175210405509, "grad_norm": 2.2746574255072023, "learning_rate": 1.120763764573982e-05, "loss": 0.6967, "step": 6243 }, { "epoch": 0.9554705432287681, "grad_norm": 2.378397266502046, "learning_rate": 1.1205177562772212e-05, "loss": 0.7118, "step": 6244 }, { "epoch": 0.9556235654169855, "grad_norm": 1.9416013991409358, "learning_rate": 1.1202717405789936e-05, "loss": 0.4577, "step": 6245 }, { "epoch": 0.9557765876052028, "grad_norm": 2.3703928068791456, "learning_rate": 1.120025717494408e-05, "loss": 0.669, "step": 6246 }, { "epoch": 0.95592960979342, "grad_norm": 2.1911621815779734, "learning_rate": 1.1197796870385732e-05, "loss": 0.6874, "step": 6247 }, { "epoch": 0.9560826319816373, "grad_norm": 2.23570911339143, "learning_rate": 1.1195336492265997e-05, "loss": 0.5486, "step": 6248 }, { "epoch": 0.9562356541698547, "grad_norm": 2.1892871871968955, "learning_rate": 1.1192876040735968e-05, "loss": 0.6156, "step": 6249 }, { "epoch": 0.9563886763580719, "grad_norm": 2.5372633282546535, "learning_rate": 1.1190415515946757e-05, "loss": 0.7427, "step": 6250 }, { "epoch": 0.9565416985462892, "grad_norm": 2.1746459071347517, "learning_rate": 1.1187954918049473e-05, "loss": 0.6861, "step": 6251 }, { "epoch": 0.9566947207345065, "grad_norm": 2.3915477100098266, "learning_rate": 1.1185494247195227e-05, "loss": 0.6661, "step": 6252 }, { "epoch": 0.9568477429227238, "grad_norm": 2.301774197826807, "learning_rate": 1.1183033503535144e-05, "loss": 0.7023, "step": 6253 }, { "epoch": 0.9570007651109411, "grad_norm": 2.0690146112814616, "learning_rate": 1.1180572687220349e-05, "loss": 0.6382, "step": 6254 }, { "epoch": 0.9571537872991583, "grad_norm": 2.0792422582969805, "learning_rate": 1.1178111798401959e-05, "loss": 0.6192, "step": 6255 }, { "epoch": 0.9573068094873757, "grad_norm": 2.332152072423957, "learning_rate": 1.1175650837231118e-05, "loss": 0.7097, "step": 6256 }, { "epoch": 0.957459831675593, "grad_norm": 2.252289252328118, "learning_rate": 1.1173189803858961e-05, "loss": 0.69, "step": 6257 }, { "epoch": 0.9576128538638102, "grad_norm": 2.149486560835919, "learning_rate": 1.1170728698436629e-05, "loss": 0.6495, "step": 6258 }, { "epoch": 0.9577658760520276, "grad_norm": 2.4865796283824535, "learning_rate": 1.1168267521115266e-05, "loss": 0.6406, "step": 6259 }, { "epoch": 0.9579188982402448, "grad_norm": 2.562998136403146, "learning_rate": 1.1165806272046024e-05, "loss": 0.7459, "step": 6260 }, { "epoch": 0.9580719204284621, "grad_norm": 2.244474523121574, "learning_rate": 1.116334495138006e-05, "loss": 0.7253, "step": 6261 }, { "epoch": 0.9582249426166795, "grad_norm": 2.3237989302201343, "learning_rate": 1.116088355926853e-05, "loss": 0.7225, "step": 6262 }, { "epoch": 0.9583779648048967, "grad_norm": 2.296054403182734, "learning_rate": 1.11584220958626e-05, "loss": 0.6612, "step": 6263 }, { "epoch": 0.958530986993114, "grad_norm": 2.1853141461542034, "learning_rate": 1.1155960561313437e-05, "loss": 0.7537, "step": 6264 }, { "epoch": 0.9586840091813313, "grad_norm": 2.2041375129714615, "learning_rate": 1.1153498955772213e-05, "loss": 0.5933, "step": 6265 }, { "epoch": 0.9588370313695486, "grad_norm": 2.3673848542353024, "learning_rate": 1.1151037279390106e-05, "loss": 0.6677, "step": 6266 }, { "epoch": 0.9589900535577659, "grad_norm": 2.1283912180610756, "learning_rate": 1.11485755323183e-05, "loss": 0.6213, "step": 6267 }, { "epoch": 0.9591430757459831, "grad_norm": 2.104333165924923, "learning_rate": 1.1146113714707973e-05, "loss": 0.6682, "step": 6268 }, { "epoch": 0.9592960979342005, "grad_norm": 2.0811203535808054, "learning_rate": 1.114365182671032e-05, "loss": 0.6221, "step": 6269 }, { "epoch": 0.9594491201224178, "grad_norm": 2.1932532045760778, "learning_rate": 1.1141189868476536e-05, "loss": 0.7587, "step": 6270 }, { "epoch": 0.959602142310635, "grad_norm": 2.489560695641433, "learning_rate": 1.1138727840157817e-05, "loss": 0.7461, "step": 6271 }, { "epoch": 0.9597551644988523, "grad_norm": 2.2827480402995888, "learning_rate": 1.1136265741905367e-05, "loss": 0.6466, "step": 6272 }, { "epoch": 0.9599081866870697, "grad_norm": 2.5641799903735167, "learning_rate": 1.1133803573870392e-05, "loss": 0.7205, "step": 6273 }, { "epoch": 0.9600612088752869, "grad_norm": 2.242478983384006, "learning_rate": 1.1131341336204104e-05, "loss": 0.6067, "step": 6274 }, { "epoch": 0.9602142310635042, "grad_norm": 2.3288654475119297, "learning_rate": 1.112887902905772e-05, "loss": 0.6887, "step": 6275 }, { "epoch": 0.9603672532517215, "grad_norm": 2.304115755073521, "learning_rate": 1.1126416652582456e-05, "loss": 0.7624, "step": 6276 }, { "epoch": 0.9605202754399388, "grad_norm": 2.279433405641376, "learning_rate": 1.1123954206929542e-05, "loss": 0.6743, "step": 6277 }, { "epoch": 0.9606732976281561, "grad_norm": 2.331560845281564, "learning_rate": 1.11214916922502e-05, "loss": 0.6911, "step": 6278 }, { "epoch": 0.9608263198163733, "grad_norm": 2.273458434140446, "learning_rate": 1.111902910869567e-05, "loss": 0.6622, "step": 6279 }, { "epoch": 0.9609793420045907, "grad_norm": 2.154670876055646, "learning_rate": 1.1116566456417181e-05, "loss": 0.6577, "step": 6280 }, { "epoch": 0.961132364192808, "grad_norm": 2.0581058974508832, "learning_rate": 1.1114103735565977e-05, "loss": 0.5898, "step": 6281 }, { "epoch": 0.9612853863810252, "grad_norm": 2.2992654047403263, "learning_rate": 1.1111640946293308e-05, "loss": 0.7091, "step": 6282 }, { "epoch": 0.9614384085692426, "grad_norm": 2.315602918362659, "learning_rate": 1.1109178088750422e-05, "loss": 0.5043, "step": 6283 }, { "epoch": 0.9615914307574598, "grad_norm": 2.35266513608379, "learning_rate": 1.1106715163088561e-05, "loss": 0.6637, "step": 6284 }, { "epoch": 0.9617444529456771, "grad_norm": 2.1833480375393672, "learning_rate": 1.1104252169459e-05, "loss": 0.6923, "step": 6285 }, { "epoch": 0.9618974751338945, "grad_norm": 2.551652413206869, "learning_rate": 1.110178910801299e-05, "loss": 0.6136, "step": 6286 }, { "epoch": 0.9620504973221117, "grad_norm": 2.42416461169718, "learning_rate": 1.1099325978901802e-05, "loss": 0.6589, "step": 6287 }, { "epoch": 0.962203519510329, "grad_norm": 2.3115177880706854, "learning_rate": 1.1096862782276707e-05, "loss": 0.7251, "step": 6288 }, { "epoch": 0.9623565416985463, "grad_norm": 2.4133212993849997, "learning_rate": 1.1094399518288975e-05, "loss": 0.6165, "step": 6289 }, { "epoch": 0.9625095638867636, "grad_norm": 2.476384822568825, "learning_rate": 1.1091936187089886e-05, "loss": 0.7031, "step": 6290 }, { "epoch": 0.9626625860749809, "grad_norm": 2.497069760113835, "learning_rate": 1.1089472788830728e-05, "loss": 0.8166, "step": 6291 }, { "epoch": 0.9628156082631981, "grad_norm": 2.1440996425297296, "learning_rate": 1.1087009323662784e-05, "loss": 0.6813, "step": 6292 }, { "epoch": 0.9629686304514155, "grad_norm": 2.2455927624657135, "learning_rate": 1.108454579173734e-05, "loss": 0.6084, "step": 6293 }, { "epoch": 0.9631216526396328, "grad_norm": 2.240564503687054, "learning_rate": 1.1082082193205699e-05, "loss": 0.5922, "step": 6294 }, { "epoch": 0.96327467482785, "grad_norm": 2.232747072090611, "learning_rate": 1.1079618528219159e-05, "loss": 0.6345, "step": 6295 }, { "epoch": 0.9634276970160673, "grad_norm": 2.3703209346787446, "learning_rate": 1.107715479692902e-05, "loss": 0.6895, "step": 6296 }, { "epoch": 0.9635807192042847, "grad_norm": 2.241003272516828, "learning_rate": 1.1074690999486591e-05, "loss": 0.7439, "step": 6297 }, { "epoch": 0.9637337413925019, "grad_norm": 2.0491357691467074, "learning_rate": 1.1072227136043182e-05, "loss": 0.6752, "step": 6298 }, { "epoch": 0.9638867635807192, "grad_norm": 2.236282969158411, "learning_rate": 1.1069763206750113e-05, "loss": 0.616, "step": 6299 }, { "epoch": 0.9640397857689365, "grad_norm": 2.4976383991206865, "learning_rate": 1.10672992117587e-05, "loss": 0.6593, "step": 6300 }, { "epoch": 0.9641928079571538, "grad_norm": 2.2143008291617483, "learning_rate": 1.1064835151220265e-05, "loss": 0.6282, "step": 6301 }, { "epoch": 0.9643458301453711, "grad_norm": 2.4819206347775427, "learning_rate": 1.106237102528614e-05, "loss": 0.6849, "step": 6302 }, { "epoch": 0.9644988523335883, "grad_norm": 2.2845724829416985, "learning_rate": 1.1059906834107652e-05, "loss": 0.6554, "step": 6303 }, { "epoch": 0.9646518745218057, "grad_norm": 2.536708963196747, "learning_rate": 1.1057442577836141e-05, "loss": 0.7574, "step": 6304 }, { "epoch": 0.964804896710023, "grad_norm": 2.4234275083828565, "learning_rate": 1.1054978256622946e-05, "loss": 0.6173, "step": 6305 }, { "epoch": 0.9649579188982402, "grad_norm": 2.3943143908981805, "learning_rate": 1.1052513870619403e-05, "loss": 0.7037, "step": 6306 }, { "epoch": 0.9651109410864576, "grad_norm": 2.3606987033507507, "learning_rate": 1.1050049419976872e-05, "loss": 0.695, "step": 6307 }, { "epoch": 0.9652639632746748, "grad_norm": 2.5633579786892686, "learning_rate": 1.1047584904846697e-05, "loss": 0.6554, "step": 6308 }, { "epoch": 0.9654169854628921, "grad_norm": 2.2020850155001543, "learning_rate": 1.1045120325380233e-05, "loss": 0.6187, "step": 6309 }, { "epoch": 0.9655700076511095, "grad_norm": 2.1260293430856634, "learning_rate": 1.1042655681728842e-05, "loss": 0.6128, "step": 6310 }, { "epoch": 0.9657230298393267, "grad_norm": 2.3544765145165045, "learning_rate": 1.1040190974043887e-05, "loss": 0.6527, "step": 6311 }, { "epoch": 0.965876052027544, "grad_norm": 2.218314723977588, "learning_rate": 1.1037726202476735e-05, "loss": 0.7008, "step": 6312 }, { "epoch": 0.9660290742157613, "grad_norm": 2.403254705515555, "learning_rate": 1.1035261367178758e-05, "loss": 0.6956, "step": 6313 }, { "epoch": 0.9661820964039786, "grad_norm": 2.0814432448403006, "learning_rate": 1.103279646830133e-05, "loss": 0.6016, "step": 6314 }, { "epoch": 0.9663351185921959, "grad_norm": 2.379683650495674, "learning_rate": 1.103033150599583e-05, "loss": 0.6538, "step": 6315 }, { "epoch": 0.9664881407804131, "grad_norm": 2.286013236090055, "learning_rate": 1.1027866480413642e-05, "loss": 0.6498, "step": 6316 }, { "epoch": 0.9666411629686305, "grad_norm": 2.3171666595250646, "learning_rate": 1.1025401391706152e-05, "loss": 0.6939, "step": 6317 }, { "epoch": 0.9667941851568478, "grad_norm": 2.1385639093202724, "learning_rate": 1.1022936240024754e-05, "loss": 0.6075, "step": 6318 }, { "epoch": 0.966947207345065, "grad_norm": 2.038129807160247, "learning_rate": 1.1020471025520833e-05, "loss": 0.5521, "step": 6319 }, { "epoch": 0.9671002295332823, "grad_norm": 2.782764068823436, "learning_rate": 1.1018005748345802e-05, "loss": 0.7901, "step": 6320 }, { "epoch": 0.9672532517214996, "grad_norm": 1.9353389662910547, "learning_rate": 1.1015540408651051e-05, "loss": 0.5659, "step": 6321 }, { "epoch": 0.9674062739097169, "grad_norm": 2.364548658211014, "learning_rate": 1.101307500658799e-05, "loss": 0.6433, "step": 6322 }, { "epoch": 0.9675592960979342, "grad_norm": 2.14311294534737, "learning_rate": 1.101060954230803e-05, "loss": 0.6272, "step": 6323 }, { "epoch": 0.9677123182861515, "grad_norm": 2.3056064170443893, "learning_rate": 1.100814401596259e-05, "loss": 0.6459, "step": 6324 }, { "epoch": 0.9678653404743688, "grad_norm": 2.02182565029233, "learning_rate": 1.1005678427703075e-05, "loss": 0.6518, "step": 6325 }, { "epoch": 0.9680183626625861, "grad_norm": 2.3303131018908547, "learning_rate": 1.1003212777680916e-05, "loss": 0.6813, "step": 6326 }, { "epoch": 0.9681713848508033, "grad_norm": 2.114523748506382, "learning_rate": 1.1000747066047536e-05, "loss": 0.5194, "step": 6327 }, { "epoch": 0.9683244070390207, "grad_norm": 2.265418197230814, "learning_rate": 1.0998281292954362e-05, "loss": 0.6422, "step": 6328 }, { "epoch": 0.9684774292272379, "grad_norm": 2.338030038622006, "learning_rate": 1.0995815458552833e-05, "loss": 0.7219, "step": 6329 }, { "epoch": 0.9686304514154552, "grad_norm": 2.521459284136043, "learning_rate": 1.0993349562994378e-05, "loss": 0.6203, "step": 6330 }, { "epoch": 0.9687834736036726, "grad_norm": 2.0493537019726564, "learning_rate": 1.0990883606430439e-05, "loss": 0.593, "step": 6331 }, { "epoch": 0.9689364957918898, "grad_norm": 2.6405871157081777, "learning_rate": 1.0988417589012464e-05, "loss": 0.79, "step": 6332 }, { "epoch": 0.9690895179801071, "grad_norm": 2.096405636694104, "learning_rate": 1.0985951510891899e-05, "loss": 0.7241, "step": 6333 }, { "epoch": 0.9692425401683245, "grad_norm": 2.088604644544387, "learning_rate": 1.098348537222019e-05, "loss": 0.6886, "step": 6334 }, { "epoch": 0.9693955623565417, "grad_norm": 1.9904441066693555, "learning_rate": 1.0981019173148802e-05, "loss": 0.5716, "step": 6335 }, { "epoch": 0.969548584544759, "grad_norm": 2.062780568286232, "learning_rate": 1.0978552913829185e-05, "loss": 0.5858, "step": 6336 }, { "epoch": 0.9697016067329762, "grad_norm": 2.2337258488936538, "learning_rate": 1.0976086594412808e-05, "loss": 0.6003, "step": 6337 }, { "epoch": 0.9698546289211936, "grad_norm": 2.372510122203268, "learning_rate": 1.0973620215051132e-05, "loss": 0.7456, "step": 6338 }, { "epoch": 0.9700076511094109, "grad_norm": 2.2495942249697083, "learning_rate": 1.0971153775895633e-05, "loss": 0.6003, "step": 6339 }, { "epoch": 0.9701606732976281, "grad_norm": 2.239803426785077, "learning_rate": 1.096868727709778e-05, "loss": 0.5947, "step": 6340 }, { "epoch": 0.9703136954858455, "grad_norm": 2.0374315924280446, "learning_rate": 1.0966220718809054e-05, "loss": 0.6142, "step": 6341 }, { "epoch": 0.9704667176740628, "grad_norm": 2.1103707648171106, "learning_rate": 1.096375410118093e-05, "loss": 0.5306, "step": 6342 }, { "epoch": 0.97061973986228, "grad_norm": 2.07129788644162, "learning_rate": 1.0961287424364896e-05, "loss": 0.6902, "step": 6343 }, { "epoch": 0.9707727620504973, "grad_norm": 2.3083558399211164, "learning_rate": 1.0958820688512441e-05, "loss": 0.6831, "step": 6344 }, { "epoch": 0.9709257842387146, "grad_norm": 2.426716481951558, "learning_rate": 1.0956353893775062e-05, "loss": 0.7141, "step": 6345 }, { "epoch": 0.9710788064269319, "grad_norm": 2.162494654252418, "learning_rate": 1.0953887040304245e-05, "loss": 0.5416, "step": 6346 }, { "epoch": 0.9712318286151492, "grad_norm": 2.1205750952379447, "learning_rate": 1.0951420128251491e-05, "loss": 0.6947, "step": 6347 }, { "epoch": 0.9713848508033665, "grad_norm": 2.4172902478803358, "learning_rate": 1.0948953157768309e-05, "loss": 0.667, "step": 6348 }, { "epoch": 0.9715378729915838, "grad_norm": 2.238302293539362, "learning_rate": 1.0946486129006202e-05, "loss": 0.666, "step": 6349 }, { "epoch": 0.9716908951798011, "grad_norm": 2.2943963497156576, "learning_rate": 1.0944019042116673e-05, "loss": 0.6383, "step": 6350 }, { "epoch": 0.9718439173680183, "grad_norm": 2.0424734728227354, "learning_rate": 1.0941551897251248e-05, "loss": 0.6146, "step": 6351 }, { "epoch": 0.9719969395562357, "grad_norm": 2.348686525361142, "learning_rate": 1.0939084694561434e-05, "loss": 0.6622, "step": 6352 }, { "epoch": 0.9721499617444529, "grad_norm": 1.9904593663363055, "learning_rate": 1.0936617434198758e-05, "loss": 0.5622, "step": 6353 }, { "epoch": 0.9723029839326702, "grad_norm": 2.2619272391880254, "learning_rate": 1.093415011631474e-05, "loss": 0.7357, "step": 6354 }, { "epoch": 0.9724560061208876, "grad_norm": 2.142159801287082, "learning_rate": 1.0931682741060907e-05, "loss": 0.6831, "step": 6355 }, { "epoch": 0.9726090283091048, "grad_norm": 2.4643906335625196, "learning_rate": 1.0929215308588795e-05, "loss": 0.715, "step": 6356 }, { "epoch": 0.9727620504973221, "grad_norm": 2.0428082848603037, "learning_rate": 1.0926747819049935e-05, "loss": 0.6328, "step": 6357 }, { "epoch": 0.9729150726855395, "grad_norm": 2.253982118695505, "learning_rate": 1.0924280272595863e-05, "loss": 0.7491, "step": 6358 }, { "epoch": 0.9730680948737567, "grad_norm": 2.1914156685495443, "learning_rate": 1.0921812669378126e-05, "loss": 0.6725, "step": 6359 }, { "epoch": 0.973221117061974, "grad_norm": 2.26967414402112, "learning_rate": 1.0919345009548266e-05, "loss": 0.7652, "step": 6360 }, { "epoch": 0.9733741392501912, "grad_norm": 2.246918731454627, "learning_rate": 1.0916877293257837e-05, "loss": 0.635, "step": 6361 }, { "epoch": 0.9735271614384086, "grad_norm": 2.237823796273232, "learning_rate": 1.0914409520658382e-05, "loss": 0.6303, "step": 6362 }, { "epoch": 0.9736801836266259, "grad_norm": 2.3232073625174903, "learning_rate": 1.091194169190146e-05, "loss": 0.5327, "step": 6363 }, { "epoch": 0.9738332058148431, "grad_norm": 2.387396773346526, "learning_rate": 1.0909473807138633e-05, "loss": 0.6605, "step": 6364 }, { "epoch": 0.9739862280030605, "grad_norm": 2.3255204349093996, "learning_rate": 1.0907005866521462e-05, "loss": 0.6902, "step": 6365 }, { "epoch": 0.9741392501912778, "grad_norm": 2.350835243846951, "learning_rate": 1.0904537870201512e-05, "loss": 0.6583, "step": 6366 }, { "epoch": 0.974292272379495, "grad_norm": 2.0874447124765667, "learning_rate": 1.0902069818330354e-05, "loss": 0.6323, "step": 6367 }, { "epoch": 0.9744452945677123, "grad_norm": 2.4843168652978216, "learning_rate": 1.089960171105956e-05, "loss": 0.7759, "step": 6368 }, { "epoch": 0.9745983167559296, "grad_norm": 2.375106997044347, "learning_rate": 1.0897133548540701e-05, "loss": 0.6102, "step": 6369 }, { "epoch": 0.9747513389441469, "grad_norm": 2.343614659524406, "learning_rate": 1.0894665330925368e-05, "loss": 0.6207, "step": 6370 }, { "epoch": 0.9749043611323642, "grad_norm": 2.2805865460307237, "learning_rate": 1.0892197058365135e-05, "loss": 0.6677, "step": 6371 }, { "epoch": 0.9750573833205815, "grad_norm": 2.235287175901108, "learning_rate": 1.0889728731011587e-05, "loss": 0.7031, "step": 6372 }, { "epoch": 0.9752104055087988, "grad_norm": 2.104978197412232, "learning_rate": 1.0887260349016318e-05, "loss": 0.5624, "step": 6373 }, { "epoch": 0.9753634276970161, "grad_norm": 2.3101831680546323, "learning_rate": 1.0884791912530923e-05, "loss": 0.5689, "step": 6374 }, { "epoch": 0.9755164498852333, "grad_norm": 2.3371258325914255, "learning_rate": 1.0882323421706991e-05, "loss": 0.7777, "step": 6375 }, { "epoch": 0.9756694720734507, "grad_norm": 2.7997626109199745, "learning_rate": 1.087985487669613e-05, "loss": 0.8104, "step": 6376 }, { "epoch": 0.9758224942616679, "grad_norm": 2.412073839993447, "learning_rate": 1.0877386277649938e-05, "loss": 0.7437, "step": 6377 }, { "epoch": 0.9759755164498852, "grad_norm": 2.316799724033941, "learning_rate": 1.087491762472002e-05, "loss": 0.725, "step": 6378 }, { "epoch": 0.9761285386381026, "grad_norm": 2.2241974714922486, "learning_rate": 1.0872448918057989e-05, "loss": 0.768, "step": 6379 }, { "epoch": 0.9762815608263198, "grad_norm": 2.0678765281664386, "learning_rate": 1.0869980157815458e-05, "loss": 0.5852, "step": 6380 }, { "epoch": 0.9764345830145371, "grad_norm": 2.211865767184328, "learning_rate": 1.0867511344144042e-05, "loss": 0.6851, "step": 6381 }, { "epoch": 0.9765876052027544, "grad_norm": 2.113420351755564, "learning_rate": 1.0865042477195362e-05, "loss": 0.6036, "step": 6382 }, { "epoch": 0.9767406273909717, "grad_norm": 2.238136535568473, "learning_rate": 1.0862573557121036e-05, "loss": 0.6267, "step": 6383 }, { "epoch": 0.976893649579189, "grad_norm": 2.2475432663562107, "learning_rate": 1.0860104584072698e-05, "loss": 0.7343, "step": 6384 }, { "epoch": 0.9770466717674062, "grad_norm": 2.332824885622173, "learning_rate": 1.0857635558201966e-05, "loss": 0.6669, "step": 6385 }, { "epoch": 0.9771996939556236, "grad_norm": 2.1477623949536624, "learning_rate": 1.0855166479660487e-05, "loss": 0.5838, "step": 6386 }, { "epoch": 0.9773527161438409, "grad_norm": 2.2525788281704773, "learning_rate": 1.0852697348599883e-05, "loss": 0.7541, "step": 6387 }, { "epoch": 0.9775057383320581, "grad_norm": 2.2180595252269955, "learning_rate": 1.08502281651718e-05, "loss": 0.6829, "step": 6388 }, { "epoch": 0.9776587605202754, "grad_norm": 2.216679839529683, "learning_rate": 1.0847758929527881e-05, "loss": 0.6328, "step": 6389 }, { "epoch": 0.9778117827084928, "grad_norm": 2.094749687529566, "learning_rate": 1.0845289641819772e-05, "loss": 0.5704, "step": 6390 }, { "epoch": 0.97796480489671, "grad_norm": 2.158104895092101, "learning_rate": 1.0842820302199115e-05, "loss": 0.6678, "step": 6391 }, { "epoch": 0.9781178270849273, "grad_norm": 2.5011576786078495, "learning_rate": 1.0840350910817568e-05, "loss": 0.6058, "step": 6392 }, { "epoch": 0.9782708492731446, "grad_norm": 2.1857763444580067, "learning_rate": 1.0837881467826783e-05, "loss": 0.6703, "step": 6393 }, { "epoch": 0.9784238714613619, "grad_norm": 2.2003107696201027, "learning_rate": 1.0835411973378421e-05, "loss": 0.7169, "step": 6394 }, { "epoch": 0.9785768936495792, "grad_norm": 2.0337957252981096, "learning_rate": 1.083294242762414e-05, "loss": 0.5438, "step": 6395 }, { "epoch": 0.9787299158377964, "grad_norm": 2.150512225972815, "learning_rate": 1.0830472830715605e-05, "loss": 0.6302, "step": 6396 }, { "epoch": 0.9788829380260138, "grad_norm": 2.484740579652343, "learning_rate": 1.0828003182804488e-05, "loss": 0.6283, "step": 6397 }, { "epoch": 0.9790359602142311, "grad_norm": 2.5080302326286668, "learning_rate": 1.0825533484042451e-05, "loss": 0.7363, "step": 6398 }, { "epoch": 0.9791889824024483, "grad_norm": 2.4527364589897527, "learning_rate": 1.082306373458118e-05, "loss": 0.6692, "step": 6399 }, { "epoch": 0.9793420045906657, "grad_norm": 2.5425671821277587, "learning_rate": 1.0820593934572344e-05, "loss": 0.5979, "step": 6400 }, { "epoch": 0.9794950267788829, "grad_norm": 2.3776505208003607, "learning_rate": 1.0818124084167619e-05, "loss": 0.6738, "step": 6401 }, { "epoch": 0.9796480489671002, "grad_norm": 2.2778353809646887, "learning_rate": 1.0815654183518698e-05, "loss": 0.7114, "step": 6402 }, { "epoch": 0.9798010711553176, "grad_norm": 2.193755629471049, "learning_rate": 1.0813184232777262e-05, "loss": 0.6681, "step": 6403 }, { "epoch": 0.9799540933435348, "grad_norm": 1.9695219026147595, "learning_rate": 1.0810714232095001e-05, "loss": 0.6318, "step": 6404 }, { "epoch": 0.9801071155317521, "grad_norm": 2.279528997785273, "learning_rate": 1.0808244181623606e-05, "loss": 0.6502, "step": 6405 }, { "epoch": 0.9802601377199694, "grad_norm": 2.3554587060275716, "learning_rate": 1.0805774081514774e-05, "loss": 0.7645, "step": 6406 }, { "epoch": 0.9804131599081867, "grad_norm": 2.178746017369633, "learning_rate": 1.0803303931920206e-05, "loss": 0.5972, "step": 6407 }, { "epoch": 0.980566182096404, "grad_norm": 2.200882154882437, "learning_rate": 1.08008337329916e-05, "loss": 0.6289, "step": 6408 }, { "epoch": 0.9807192042846212, "grad_norm": 2.1322827299103064, "learning_rate": 1.079836348488066e-05, "loss": 0.5765, "step": 6409 }, { "epoch": 0.9808722264728386, "grad_norm": 2.264692330253768, "learning_rate": 1.0795893187739095e-05, "loss": 0.728, "step": 6410 }, { "epoch": 0.9810252486610559, "grad_norm": 2.2316755970527984, "learning_rate": 1.079342284171862e-05, "loss": 0.6295, "step": 6411 }, { "epoch": 0.9811782708492731, "grad_norm": 2.2053391147834756, "learning_rate": 1.0790952446970942e-05, "loss": 0.6607, "step": 6412 }, { "epoch": 0.9813312930374904, "grad_norm": 2.1109423143732897, "learning_rate": 1.0788482003647775e-05, "loss": 0.6516, "step": 6413 }, { "epoch": 0.9814843152257078, "grad_norm": 2.268297928669402, "learning_rate": 1.078601151190085e-05, "loss": 0.7403, "step": 6414 }, { "epoch": 0.981637337413925, "grad_norm": 2.2137529570759122, "learning_rate": 1.0783540971881882e-05, "loss": 0.6743, "step": 6415 }, { "epoch": 0.9817903596021423, "grad_norm": 2.0507644073612137, "learning_rate": 1.0781070383742595e-05, "loss": 0.4971, "step": 6416 }, { "epoch": 0.9819433817903596, "grad_norm": 2.088792875167074, "learning_rate": 1.0778599747634723e-05, "loss": 0.6713, "step": 6417 }, { "epoch": 0.9820964039785769, "grad_norm": 2.3516082787434063, "learning_rate": 1.0776129063709994e-05, "loss": 0.6598, "step": 6418 }, { "epoch": 0.9822494261667942, "grad_norm": 2.6734111206794915, "learning_rate": 1.0773658332120143e-05, "loss": 0.6645, "step": 6419 }, { "epoch": 0.9824024483550114, "grad_norm": 2.3219289364900053, "learning_rate": 1.0771187553016907e-05, "loss": 0.6809, "step": 6420 }, { "epoch": 0.9825554705432288, "grad_norm": 2.295162797643992, "learning_rate": 1.0768716726552027e-05, "loss": 0.6209, "step": 6421 }, { "epoch": 0.9827084927314461, "grad_norm": 2.1896216122076155, "learning_rate": 1.0766245852877244e-05, "loss": 0.633, "step": 6422 }, { "epoch": 0.9828615149196633, "grad_norm": 2.423235832841934, "learning_rate": 1.0763774932144306e-05, "loss": 0.634, "step": 6423 }, { "epoch": 0.9830145371078807, "grad_norm": 2.352333616434348, "learning_rate": 1.0761303964504963e-05, "loss": 0.6987, "step": 6424 }, { "epoch": 0.9831675592960979, "grad_norm": 2.223375288114711, "learning_rate": 1.0758832950110965e-05, "loss": 0.5861, "step": 6425 }, { "epoch": 0.9833205814843152, "grad_norm": 2.2183349950150646, "learning_rate": 1.0756361889114065e-05, "loss": 0.6365, "step": 6426 }, { "epoch": 0.9834736036725326, "grad_norm": 1.9636195416029705, "learning_rate": 1.0753890781666025e-05, "loss": 0.6346, "step": 6427 }, { "epoch": 0.9836266258607498, "grad_norm": 2.2297263596321404, "learning_rate": 1.0751419627918602e-05, "loss": 0.666, "step": 6428 }, { "epoch": 0.9837796480489671, "grad_norm": 2.4350159907453146, "learning_rate": 1.0748948428023557e-05, "loss": 0.6205, "step": 6429 }, { "epoch": 0.9839326702371843, "grad_norm": 2.181247738797785, "learning_rate": 1.0746477182132665e-05, "loss": 0.6318, "step": 6430 }, { "epoch": 0.9840856924254017, "grad_norm": 2.646466395978632, "learning_rate": 1.0744005890397686e-05, "loss": 0.6104, "step": 6431 }, { "epoch": 0.984238714613619, "grad_norm": 2.2751928031916804, "learning_rate": 1.0741534552970393e-05, "loss": 0.6352, "step": 6432 }, { "epoch": 0.9843917368018362, "grad_norm": 19.9658720243908, "learning_rate": 1.0739063170002564e-05, "loss": 0.6467, "step": 6433 }, { "epoch": 0.9845447589900536, "grad_norm": 2.084851787789992, "learning_rate": 1.0736591741645974e-05, "loss": 0.4974, "step": 6434 }, { "epoch": 0.9846977811782709, "grad_norm": 2.0840003737302095, "learning_rate": 1.0734120268052403e-05, "loss": 0.6906, "step": 6435 }, { "epoch": 0.9848508033664881, "grad_norm": 2.2573509449638096, "learning_rate": 1.0731648749373637e-05, "loss": 0.6702, "step": 6436 }, { "epoch": 0.9850038255547054, "grad_norm": 2.105354426708069, "learning_rate": 1.0729177185761457e-05, "loss": 0.571, "step": 6437 }, { "epoch": 0.9851568477429227, "grad_norm": 2.288305079827146, "learning_rate": 1.0726705577367652e-05, "loss": 0.7155, "step": 6438 }, { "epoch": 0.98530986993114, "grad_norm": 2.2850243056547734, "learning_rate": 1.0724233924344016e-05, "loss": 0.6159, "step": 6439 }, { "epoch": 0.9854628921193573, "grad_norm": 1.9649566640224054, "learning_rate": 1.0721762226842344e-05, "loss": 0.5237, "step": 6440 }, { "epoch": 0.9856159143075746, "grad_norm": 1.9893769537451282, "learning_rate": 1.0719290485014428e-05, "loss": 0.5814, "step": 6441 }, { "epoch": 0.9857689364957919, "grad_norm": 2.564034080142649, "learning_rate": 1.0716818699012067e-05, "loss": 0.7568, "step": 6442 }, { "epoch": 0.9859219586840092, "grad_norm": 2.295834576636577, "learning_rate": 1.0714346868987068e-05, "loss": 0.7564, "step": 6443 }, { "epoch": 0.9860749808722264, "grad_norm": 2.0971214868536894, "learning_rate": 1.0711874995091239e-05, "loss": 0.6822, "step": 6444 }, { "epoch": 0.9862280030604438, "grad_norm": 2.21508227273722, "learning_rate": 1.0709403077476372e-05, "loss": 0.6229, "step": 6445 }, { "epoch": 0.986381025248661, "grad_norm": 2.043342706115372, "learning_rate": 1.070693111629429e-05, "loss": 0.5323, "step": 6446 }, { "epoch": 0.9865340474368783, "grad_norm": 2.2277205626682277, "learning_rate": 1.0704459111696805e-05, "loss": 0.6203, "step": 6447 }, { "epoch": 0.9866870696250957, "grad_norm": 2.5719639860424133, "learning_rate": 1.0701987063835728e-05, "loss": 0.784, "step": 6448 }, { "epoch": 0.9868400918133129, "grad_norm": 2.4296363914023047, "learning_rate": 1.069951497286288e-05, "loss": 0.6575, "step": 6449 }, { "epoch": 0.9869931140015302, "grad_norm": 2.7202807521864147, "learning_rate": 1.069704283893008e-05, "loss": 0.6461, "step": 6450 }, { "epoch": 0.9871461361897476, "grad_norm": 2.143158373415149, "learning_rate": 1.0694570662189152e-05, "loss": 0.6412, "step": 6451 }, { "epoch": 0.9872991583779648, "grad_norm": 2.3625004784021257, "learning_rate": 1.0692098442791925e-05, "loss": 0.6861, "step": 6452 }, { "epoch": 0.9874521805661821, "grad_norm": 2.5257222065048013, "learning_rate": 1.0689626180890225e-05, "loss": 0.7539, "step": 6453 }, { "epoch": 0.9876052027543993, "grad_norm": 2.1250124084674864, "learning_rate": 1.0687153876635878e-05, "loss": 0.7075, "step": 6454 }, { "epoch": 0.9877582249426167, "grad_norm": 2.409259571306803, "learning_rate": 1.068468153018073e-05, "loss": 0.6375, "step": 6455 }, { "epoch": 0.987911247130834, "grad_norm": 2.32874679222682, "learning_rate": 1.068220914167661e-05, "loss": 0.6387, "step": 6456 }, { "epoch": 0.9880642693190512, "grad_norm": 2.2644969331295517, "learning_rate": 1.0679736711275352e-05, "loss": 0.6875, "step": 6457 }, { "epoch": 0.9882172915072686, "grad_norm": 1.9025109306992631, "learning_rate": 1.0677264239128809e-05, "loss": 0.5653, "step": 6458 }, { "epoch": 0.9883703136954859, "grad_norm": 2.1561627079866503, "learning_rate": 1.0674791725388818e-05, "loss": 0.6281, "step": 6459 }, { "epoch": 0.9885233358837031, "grad_norm": 2.061516117776862, "learning_rate": 1.0672319170207227e-05, "loss": 0.6177, "step": 6460 }, { "epoch": 0.9886763580719204, "grad_norm": 2.2521575998416066, "learning_rate": 1.0669846573735885e-05, "loss": 0.6979, "step": 6461 }, { "epoch": 0.9888293802601377, "grad_norm": 2.1209188840044573, "learning_rate": 1.0667373936126646e-05, "loss": 0.7254, "step": 6462 }, { "epoch": 0.988982402448355, "grad_norm": 2.4464366863064515, "learning_rate": 1.0664901257531362e-05, "loss": 0.6411, "step": 6463 }, { "epoch": 0.9891354246365723, "grad_norm": 2.2092005460424895, "learning_rate": 1.0662428538101893e-05, "loss": 0.6615, "step": 6464 }, { "epoch": 0.9892884468247896, "grad_norm": 2.298112700131103, "learning_rate": 1.0659955777990092e-05, "loss": 0.6517, "step": 6465 }, { "epoch": 0.9894414690130069, "grad_norm": 2.51742007501707, "learning_rate": 1.0657482977347826e-05, "loss": 0.6929, "step": 6466 }, { "epoch": 0.9895944912012242, "grad_norm": 2.2902881205759833, "learning_rate": 1.0655010136326957e-05, "loss": 0.6197, "step": 6467 }, { "epoch": 0.9897475133894414, "grad_norm": 2.0662944894518684, "learning_rate": 1.0652537255079359e-05, "loss": 0.6396, "step": 6468 }, { "epoch": 0.9899005355776588, "grad_norm": 2.3815178778296033, "learning_rate": 1.0650064333756892e-05, "loss": 0.7265, "step": 6469 }, { "epoch": 0.990053557765876, "grad_norm": 2.332863841758195, "learning_rate": 1.0647591372511427e-05, "loss": 0.6633, "step": 6470 }, { "epoch": 0.9902065799540933, "grad_norm": 2.289934714096384, "learning_rate": 1.0645118371494847e-05, "loss": 0.6172, "step": 6471 }, { "epoch": 0.9903596021423107, "grad_norm": 2.2808303258671625, "learning_rate": 1.0642645330859023e-05, "loss": 0.647, "step": 6472 }, { "epoch": 0.9905126243305279, "grad_norm": 2.001627220573405, "learning_rate": 1.0640172250755836e-05, "loss": 0.6933, "step": 6473 }, { "epoch": 0.9906656465187452, "grad_norm": 2.133895070860213, "learning_rate": 1.0637699131337167e-05, "loss": 0.6485, "step": 6474 }, { "epoch": 0.9908186687069626, "grad_norm": 2.306956253050283, "learning_rate": 1.06352259727549e-05, "loss": 0.6595, "step": 6475 }, { "epoch": 0.9909716908951798, "grad_norm": 2.1617438230826407, "learning_rate": 1.0632752775160917e-05, "loss": 0.5378, "step": 6476 }, { "epoch": 0.9911247130833971, "grad_norm": 2.5429299085792296, "learning_rate": 1.0630279538707116e-05, "loss": 0.7616, "step": 6477 }, { "epoch": 0.9912777352716143, "grad_norm": 2.252167453852857, "learning_rate": 1.0627806263545379e-05, "loss": 0.6494, "step": 6478 }, { "epoch": 0.9914307574598317, "grad_norm": 2.2880745559100775, "learning_rate": 1.0625332949827604e-05, "loss": 0.6506, "step": 6479 }, { "epoch": 0.991583779648049, "grad_norm": 2.476915922169972, "learning_rate": 1.0622859597705686e-05, "loss": 0.7807, "step": 6480 }, { "epoch": 0.9917368018362662, "grad_norm": 2.4263691944067034, "learning_rate": 1.0620386207331528e-05, "loss": 0.6673, "step": 6481 }, { "epoch": 0.9918898240244836, "grad_norm": 2.288249879251959, "learning_rate": 1.0617912778857022e-05, "loss": 0.5908, "step": 6482 }, { "epoch": 0.9920428462127009, "grad_norm": 2.1329413418096625, "learning_rate": 1.0615439312434073e-05, "loss": 0.6263, "step": 6483 }, { "epoch": 0.9921958684009181, "grad_norm": 2.3367729594747804, "learning_rate": 1.061296580821459e-05, "loss": 0.6701, "step": 6484 }, { "epoch": 0.9923488905891354, "grad_norm": 2.0871602655046506, "learning_rate": 1.0610492266350484e-05, "loss": 0.5942, "step": 6485 }, { "epoch": 0.9925019127773527, "grad_norm": 2.1715923797764236, "learning_rate": 1.0608018686993651e-05, "loss": 0.592, "step": 6486 }, { "epoch": 0.99265493496557, "grad_norm": 2.1720410847528266, "learning_rate": 1.0605545070296017e-05, "loss": 0.6236, "step": 6487 }, { "epoch": 0.9928079571537873, "grad_norm": 2.199533292871803, "learning_rate": 1.060307141640949e-05, "loss": 0.5939, "step": 6488 }, { "epoch": 0.9929609793420046, "grad_norm": 2.314051801230297, "learning_rate": 1.0600597725485988e-05, "loss": 0.6454, "step": 6489 }, { "epoch": 0.9931140015302219, "grad_norm": 2.414577584604856, "learning_rate": 1.0598123997677432e-05, "loss": 0.5892, "step": 6490 }, { "epoch": 0.9932670237184392, "grad_norm": 2.220440454756989, "learning_rate": 1.059565023313574e-05, "loss": 0.6345, "step": 6491 }, { "epoch": 0.9934200459066564, "grad_norm": 2.359228123391197, "learning_rate": 1.0593176432012836e-05, "loss": 0.6315, "step": 6492 }, { "epoch": 0.9935730680948738, "grad_norm": 2.2007100542708797, "learning_rate": 1.0590702594460649e-05, "loss": 0.672, "step": 6493 }, { "epoch": 0.993726090283091, "grad_norm": 2.162309831580927, "learning_rate": 1.0588228720631104e-05, "loss": 0.6506, "step": 6494 }, { "epoch": 0.9938791124713083, "grad_norm": 2.5394139530332698, "learning_rate": 1.0585754810676127e-05, "loss": 0.8196, "step": 6495 }, { "epoch": 0.9940321346595257, "grad_norm": 2.1750634220281104, "learning_rate": 1.0583280864747663e-05, "loss": 0.5415, "step": 6496 }, { "epoch": 0.9941851568477429, "grad_norm": 2.056184817961102, "learning_rate": 1.0580806882997638e-05, "loss": 0.5811, "step": 6497 }, { "epoch": 0.9943381790359602, "grad_norm": 1.9628993588402721, "learning_rate": 1.0578332865577987e-05, "loss": 0.5865, "step": 6498 }, { "epoch": 0.9944912012241776, "grad_norm": 2.0909315968389572, "learning_rate": 1.0575858812640653e-05, "loss": 0.6559, "step": 6499 }, { "epoch": 0.9946442234123948, "grad_norm": 2.1491534931389547, "learning_rate": 1.057338472433758e-05, "loss": 0.5262, "step": 6500 }, { "epoch": 0.9947972456006121, "grad_norm": 2.25180793663901, "learning_rate": 1.0570910600820703e-05, "loss": 0.7352, "step": 6501 }, { "epoch": 0.9949502677888293, "grad_norm": 2.1499742816028613, "learning_rate": 1.0568436442241975e-05, "loss": 0.6087, "step": 6502 }, { "epoch": 0.9951032899770467, "grad_norm": 2.202787694955412, "learning_rate": 1.0565962248753344e-05, "loss": 0.6215, "step": 6503 }, { "epoch": 0.995256312165264, "grad_norm": 2.225784673710967, "learning_rate": 1.0563488020506754e-05, "loss": 0.7141, "step": 6504 }, { "epoch": 0.9954093343534812, "grad_norm": 2.3329954360329843, "learning_rate": 1.056101375765416e-05, "loss": 0.7444, "step": 6505 }, { "epoch": 0.9955623565416986, "grad_norm": 2.0139835559989505, "learning_rate": 1.0558539460347518e-05, "loss": 0.5935, "step": 6506 }, { "epoch": 0.9957153787299159, "grad_norm": 2.263146610045272, "learning_rate": 1.0556065128738782e-05, "loss": 0.6425, "step": 6507 }, { "epoch": 0.9958684009181331, "grad_norm": 2.2635096597755426, "learning_rate": 1.055359076297991e-05, "loss": 0.6783, "step": 6508 }, { "epoch": 0.9960214231063504, "grad_norm": 2.2452785958982764, "learning_rate": 1.0551116363222864e-05, "loss": 0.8419, "step": 6509 }, { "epoch": 0.9961744452945677, "grad_norm": 2.447402852546272, "learning_rate": 1.054864192961961e-05, "loss": 0.6773, "step": 6510 }, { "epoch": 0.996327467482785, "grad_norm": 1.9661543729169815, "learning_rate": 1.0546167462322103e-05, "loss": 0.5886, "step": 6511 }, { "epoch": 0.9964804896710023, "grad_norm": 2.0809576482884693, "learning_rate": 1.054369296148232e-05, "loss": 0.6409, "step": 6512 }, { "epoch": 0.9966335118592196, "grad_norm": 1.9729856131001111, "learning_rate": 1.0541218427252222e-05, "loss": 0.6166, "step": 6513 }, { "epoch": 0.9967865340474369, "grad_norm": 2.206970740539544, "learning_rate": 1.0538743859783787e-05, "loss": 0.6654, "step": 6514 }, { "epoch": 0.9969395562356542, "grad_norm": 2.0815380641893793, "learning_rate": 1.053626925922898e-05, "loss": 0.564, "step": 6515 }, { "epoch": 0.9970925784238714, "grad_norm": 2.043032926313095, "learning_rate": 1.0533794625739782e-05, "loss": 0.6125, "step": 6516 }, { "epoch": 0.9972456006120888, "grad_norm": 2.08135336365385, "learning_rate": 1.0531319959468167e-05, "loss": 0.5659, "step": 6517 }, { "epoch": 0.997398622800306, "grad_norm": 2.0383774964273287, "learning_rate": 1.0528845260566116e-05, "loss": 0.5637, "step": 6518 }, { "epoch": 0.9975516449885233, "grad_norm": 2.066268610798444, "learning_rate": 1.0526370529185608e-05, "loss": 0.6478, "step": 6519 }, { "epoch": 0.9977046671767407, "grad_norm": 2.5050073606635817, "learning_rate": 1.0523895765478627e-05, "loss": 0.6566, "step": 6520 }, { "epoch": 0.9978576893649579, "grad_norm": 2.5001747882359084, "learning_rate": 1.0521420969597156e-05, "loss": 0.6971, "step": 6521 }, { "epoch": 0.9980107115531752, "grad_norm": 2.21713217988516, "learning_rate": 1.0518946141693185e-05, "loss": 0.5671, "step": 6522 }, { "epoch": 0.9981637337413926, "grad_norm": 2.216049429630443, "learning_rate": 1.0516471281918702e-05, "loss": 0.6914, "step": 6523 }, { "epoch": 0.9983167559296098, "grad_norm": 2.2994572356248106, "learning_rate": 1.0513996390425694e-05, "loss": 0.6335, "step": 6524 }, { "epoch": 0.9984697781178271, "grad_norm": 2.1282739516172238, "learning_rate": 1.051152146736616e-05, "loss": 0.5977, "step": 6525 }, { "epoch": 0.9986228003060443, "grad_norm": 2.4591726916231615, "learning_rate": 1.0509046512892092e-05, "loss": 0.684, "step": 6526 }, { "epoch": 0.9987758224942617, "grad_norm": 2.64549225493789, "learning_rate": 1.0506571527155482e-05, "loss": 0.6533, "step": 6527 }, { "epoch": 0.998928844682479, "grad_norm": 2.3069809469331384, "learning_rate": 1.0504096510308335e-05, "loss": 0.601, "step": 6528 }, { "epoch": 0.9990818668706962, "grad_norm": 2.144064608208212, "learning_rate": 1.0501621462502651e-05, "loss": 0.6259, "step": 6529 }, { "epoch": 0.9992348890589136, "grad_norm": 2.4104646760425394, "learning_rate": 1.049914638389043e-05, "loss": 0.7642, "step": 6530 }, { "epoch": 0.9993879112471309, "grad_norm": 2.048851027289111, "learning_rate": 1.0496671274623675e-05, "loss": 0.6663, "step": 6531 }, { "epoch": 0.9995409334353481, "grad_norm": 2.531917512682566, "learning_rate": 1.0494196134854395e-05, "loss": 0.8121, "step": 6532 }, { "epoch": 0.9996939556235654, "grad_norm": 2.7016640424743534, "learning_rate": 1.0491720964734595e-05, "loss": 0.7759, "step": 6533 }, { "epoch": 0.9998469778117827, "grad_norm": 2.227959878473994, "learning_rate": 1.0489245764416294e-05, "loss": 0.6892, "step": 6534 }, { "epoch": 1.0, "grad_norm": 2.2604243565912374, "learning_rate": 1.0486770534051492e-05, "loss": 0.7176, "step": 6535 }, { "epoch": 1.0001530221882173, "grad_norm": 2.033100428541894, "learning_rate": 1.0484295273792204e-05, "loss": 0.6903, "step": 6536 }, { "epoch": 1.0003060443764347, "grad_norm": 2.1314696539623275, "learning_rate": 1.0481819983790454e-05, "loss": 0.5955, "step": 6537 }, { "epoch": 1.0004590665646518, "grad_norm": 2.5135837640358405, "learning_rate": 1.0479344664198251e-05, "loss": 0.6917, "step": 6538 }, { "epoch": 1.000612088752869, "grad_norm": 2.133925287881583, "learning_rate": 1.0476869315167617e-05, "loss": 0.616, "step": 6539 }, { "epoch": 1.0007651109410864, "grad_norm": 2.1679524566869377, "learning_rate": 1.0474393936850573e-05, "loss": 0.7227, "step": 6540 }, { "epoch": 1.0009181331293038, "grad_norm": 2.3980955664439105, "learning_rate": 1.0471918529399143e-05, "loss": 0.7499, "step": 6541 }, { "epoch": 1.001071155317521, "grad_norm": 2.3699351005706255, "learning_rate": 1.046944309296535e-05, "loss": 0.6967, "step": 6542 }, { "epoch": 1.0012241775057382, "grad_norm": 2.647323810066508, "learning_rate": 1.0466967627701219e-05, "loss": 0.7663, "step": 6543 }, { "epoch": 1.0013771996939556, "grad_norm": 2.137859552412195, "learning_rate": 1.0464492133758779e-05, "loss": 0.5863, "step": 6544 }, { "epoch": 1.0015302218821729, "grad_norm": 2.0219765558236285, "learning_rate": 1.046201661129006e-05, "loss": 0.6693, "step": 6545 }, { "epoch": 1.0016832440703902, "grad_norm": 2.4366531420683137, "learning_rate": 1.0459541060447091e-05, "loss": 0.703, "step": 6546 }, { "epoch": 1.0018362662586076, "grad_norm": 1.9828507027857656, "learning_rate": 1.0457065481381913e-05, "loss": 0.582, "step": 6547 }, { "epoch": 1.0019892884468249, "grad_norm": 2.0692223786471624, "learning_rate": 1.0454589874246555e-05, "loss": 0.5675, "step": 6548 }, { "epoch": 1.002142310635042, "grad_norm": 1.9859837618858298, "learning_rate": 1.0452114239193049e-05, "loss": 0.5558, "step": 6549 }, { "epoch": 1.0022953328232593, "grad_norm": 2.272954348610601, "learning_rate": 1.0449638576373445e-05, "loss": 0.6769, "step": 6550 }, { "epoch": 1.0024483550114767, "grad_norm": 2.420602351894704, "learning_rate": 1.0447162885939776e-05, "loss": 0.6877, "step": 6551 }, { "epoch": 1.002601377199694, "grad_norm": 2.494825351434444, "learning_rate": 1.0444687168044082e-05, "loss": 0.7181, "step": 6552 }, { "epoch": 1.0027543993879113, "grad_norm": 2.285517189923609, "learning_rate": 1.044221142283841e-05, "loss": 0.6558, "step": 6553 }, { "epoch": 1.0029074215761284, "grad_norm": 2.044150801571359, "learning_rate": 1.0439735650474807e-05, "loss": 0.5108, "step": 6554 }, { "epoch": 1.0030604437643458, "grad_norm": 2.26517016730885, "learning_rate": 1.0437259851105318e-05, "loss": 0.6558, "step": 6555 }, { "epoch": 1.003213465952563, "grad_norm": 2.24291348116444, "learning_rate": 1.0434784024881988e-05, "loss": 0.6491, "step": 6556 }, { "epoch": 1.0033664881407804, "grad_norm": 2.187640461685535, "learning_rate": 1.043230817195687e-05, "loss": 0.6019, "step": 6557 }, { "epoch": 1.0035195103289978, "grad_norm": 2.3819794343778278, "learning_rate": 1.0429832292482019e-05, "loss": 0.7496, "step": 6558 }, { "epoch": 1.0036725325172149, "grad_norm": 2.1489073395127356, "learning_rate": 1.0427356386609482e-05, "loss": 0.6125, "step": 6559 }, { "epoch": 1.0038255547054322, "grad_norm": 2.0082206501261686, "learning_rate": 1.042488045449132e-05, "loss": 0.5703, "step": 6560 }, { "epoch": 1.0039785768936496, "grad_norm": 2.142001534829241, "learning_rate": 1.0422404496279585e-05, "loss": 0.5988, "step": 6561 }, { "epoch": 1.0041315990818669, "grad_norm": 2.1385641923885648, "learning_rate": 1.0419928512126337e-05, "loss": 0.6956, "step": 6562 }, { "epoch": 1.0042846212700842, "grad_norm": 2.3824901717985436, "learning_rate": 1.041745250218364e-05, "loss": 0.6451, "step": 6563 }, { "epoch": 1.0044376434583016, "grad_norm": 2.4015296436629776, "learning_rate": 1.0414976466603551e-05, "loss": 0.7278, "step": 6564 }, { "epoch": 1.0045906656465187, "grad_norm": 2.1534435564300347, "learning_rate": 1.0412500405538129e-05, "loss": 0.6363, "step": 6565 }, { "epoch": 1.004743687834736, "grad_norm": 2.48153539686987, "learning_rate": 1.0410024319139447e-05, "loss": 0.6363, "step": 6566 }, { "epoch": 1.0048967100229533, "grad_norm": 2.172810737488868, "learning_rate": 1.040754820755957e-05, "loss": 0.6484, "step": 6567 }, { "epoch": 1.0050497322111707, "grad_norm": 2.5460489768001144, "learning_rate": 1.040507207095056e-05, "loss": 0.8215, "step": 6568 }, { "epoch": 1.005202754399388, "grad_norm": 2.16079862891005, "learning_rate": 1.0402595909464489e-05, "loss": 0.5477, "step": 6569 }, { "epoch": 1.005355776587605, "grad_norm": 2.202814891087014, "learning_rate": 1.0400119723253428e-05, "loss": 0.5666, "step": 6570 }, { "epoch": 1.0055087987758224, "grad_norm": 2.212828269877084, "learning_rate": 1.039764351246945e-05, "loss": 0.5264, "step": 6571 }, { "epoch": 1.0056618209640398, "grad_norm": 2.117103171907091, "learning_rate": 1.039516727726463e-05, "loss": 0.5496, "step": 6572 }, { "epoch": 1.005814843152257, "grad_norm": 2.2084826072189996, "learning_rate": 1.0392691017791041e-05, "loss": 0.5141, "step": 6573 }, { "epoch": 1.0059678653404744, "grad_norm": 2.32130852843778, "learning_rate": 1.0390214734200758e-05, "loss": 0.6334, "step": 6574 }, { "epoch": 1.0061208875286916, "grad_norm": 2.2964584356167284, "learning_rate": 1.0387738426645865e-05, "loss": 0.6497, "step": 6575 }, { "epoch": 1.0062739097169089, "grad_norm": 2.4158826799520434, "learning_rate": 1.0385262095278438e-05, "loss": 0.6641, "step": 6576 }, { "epoch": 1.0064269319051262, "grad_norm": 2.167843641637399, "learning_rate": 1.0382785740250557e-05, "loss": 0.5314, "step": 6577 }, { "epoch": 1.0065799540933436, "grad_norm": 2.283231101433695, "learning_rate": 1.0380309361714306e-05, "loss": 0.6207, "step": 6578 }, { "epoch": 1.0067329762815609, "grad_norm": 2.536910953794421, "learning_rate": 1.0377832959821773e-05, "loss": 0.7732, "step": 6579 }, { "epoch": 1.0068859984697782, "grad_norm": 2.194479016477561, "learning_rate": 1.0375356534725036e-05, "loss": 0.6016, "step": 6580 }, { "epoch": 1.0070390206579953, "grad_norm": 2.568553979829469, "learning_rate": 1.0372880086576187e-05, "loss": 0.5968, "step": 6581 }, { "epoch": 1.0071920428462127, "grad_norm": 2.2861022154819017, "learning_rate": 1.0370403615527311e-05, "loss": 0.6048, "step": 6582 }, { "epoch": 1.00734506503443, "grad_norm": 2.302797866266313, "learning_rate": 1.0367927121730503e-05, "loss": 0.622, "step": 6583 }, { "epoch": 1.0074980872226473, "grad_norm": 2.0879458077730715, "learning_rate": 1.036545060533785e-05, "loss": 0.5533, "step": 6584 }, { "epoch": 1.0076511094108647, "grad_norm": 2.557603243001996, "learning_rate": 1.0362974066501445e-05, "loss": 0.7397, "step": 6585 }, { "epoch": 1.0078041315990818, "grad_norm": 2.312252735085079, "learning_rate": 1.0360497505373386e-05, "loss": 0.7411, "step": 6586 }, { "epoch": 1.007957153787299, "grad_norm": 2.3173357806675736, "learning_rate": 1.035802092210576e-05, "loss": 0.6168, "step": 6587 }, { "epoch": 1.0081101759755164, "grad_norm": 2.2189725542214678, "learning_rate": 1.0355544316850671e-05, "loss": 0.6329, "step": 6588 }, { "epoch": 1.0082631981637338, "grad_norm": 2.204599066701292, "learning_rate": 1.0353067689760217e-05, "loss": 0.5969, "step": 6589 }, { "epoch": 1.008416220351951, "grad_norm": 2.6794556741279414, "learning_rate": 1.0350591040986489e-05, "loss": 0.6986, "step": 6590 }, { "epoch": 1.0085692425401682, "grad_norm": 2.3559628832205797, "learning_rate": 1.03481143706816e-05, "loss": 0.6133, "step": 6591 }, { "epoch": 1.0087222647283856, "grad_norm": 2.759684199213884, "learning_rate": 1.0345637678997642e-05, "loss": 0.6948, "step": 6592 }, { "epoch": 1.0088752869166029, "grad_norm": 2.051050778484884, "learning_rate": 1.0343160966086723e-05, "loss": 0.6322, "step": 6593 }, { "epoch": 1.0090283091048202, "grad_norm": 2.375738470682142, "learning_rate": 1.0340684232100946e-05, "loss": 0.6488, "step": 6594 }, { "epoch": 1.0091813312930376, "grad_norm": 2.2479561575153966, "learning_rate": 1.0338207477192417e-05, "loss": 0.6915, "step": 6595 }, { "epoch": 1.0093343534812549, "grad_norm": 2.1508005625889686, "learning_rate": 1.0335730701513245e-05, "loss": 0.5837, "step": 6596 }, { "epoch": 1.009487375669472, "grad_norm": 2.4929163407137604, "learning_rate": 1.0333253905215537e-05, "loss": 0.7049, "step": 6597 }, { "epoch": 1.0096403978576893, "grad_norm": 2.322330911445932, "learning_rate": 1.0330777088451402e-05, "loss": 0.6233, "step": 6598 }, { "epoch": 1.0097934200459067, "grad_norm": 2.283433120066431, "learning_rate": 1.0328300251372954e-05, "loss": 0.5305, "step": 6599 }, { "epoch": 1.009946442234124, "grad_norm": 2.17015984089956, "learning_rate": 1.0325823394132301e-05, "loss": 0.5958, "step": 6600 }, { "epoch": 1.0100994644223413, "grad_norm": 2.436120696696377, "learning_rate": 1.032334651688156e-05, "loss": 0.6444, "step": 6601 }, { "epoch": 1.0102524866105584, "grad_norm": 2.3607087470720938, "learning_rate": 1.0320869619772847e-05, "loss": 0.6319, "step": 6602 }, { "epoch": 1.0104055087987758, "grad_norm": 2.2905924990988273, "learning_rate": 1.0318392702958268e-05, "loss": 0.6301, "step": 6603 }, { "epoch": 1.010558530986993, "grad_norm": 2.44559146610296, "learning_rate": 1.0315915766589957e-05, "loss": 0.6883, "step": 6604 }, { "epoch": 1.0107115531752104, "grad_norm": 2.4650413110292977, "learning_rate": 1.0313438810820018e-05, "loss": 0.5647, "step": 6605 }, { "epoch": 1.0108645753634278, "grad_norm": 2.1910260377940567, "learning_rate": 1.0310961835800572e-05, "loss": 0.577, "step": 6606 }, { "epoch": 1.0110175975516449, "grad_norm": 2.38892339041073, "learning_rate": 1.0308484841683749e-05, "loss": 0.5909, "step": 6607 }, { "epoch": 1.0111706197398622, "grad_norm": 2.4041673025346832, "learning_rate": 1.0306007828621665e-05, "loss": 0.6755, "step": 6608 }, { "epoch": 1.0113236419280796, "grad_norm": 2.2844546507484287, "learning_rate": 1.0303530796766439e-05, "loss": 0.5774, "step": 6609 }, { "epoch": 1.0114766641162969, "grad_norm": 2.116403540663527, "learning_rate": 1.03010537462702e-05, "loss": 0.515, "step": 6610 }, { "epoch": 1.0116296863045142, "grad_norm": 2.1836805111004374, "learning_rate": 1.0298576677285076e-05, "loss": 0.5105, "step": 6611 }, { "epoch": 1.0117827084927316, "grad_norm": 2.507796655419912, "learning_rate": 1.0296099589963185e-05, "loss": 0.6309, "step": 6612 }, { "epoch": 1.0119357306809487, "grad_norm": 2.0527464335910413, "learning_rate": 1.0293622484456665e-05, "loss": 0.6004, "step": 6613 }, { "epoch": 1.012088752869166, "grad_norm": 2.334226311405419, "learning_rate": 1.0291145360917639e-05, "loss": 0.5387, "step": 6614 }, { "epoch": 1.0122417750573833, "grad_norm": 2.204170110208716, "learning_rate": 1.0288668219498232e-05, "loss": 0.6759, "step": 6615 }, { "epoch": 1.0123947972456007, "grad_norm": 2.3108595745215776, "learning_rate": 1.0286191060350586e-05, "loss": 0.6272, "step": 6616 }, { "epoch": 1.012547819433818, "grad_norm": 2.2782166898829077, "learning_rate": 1.0283713883626829e-05, "loss": 0.6998, "step": 6617 }, { "epoch": 1.012700841622035, "grad_norm": 2.2993109109204903, "learning_rate": 1.0281236689479086e-05, "loss": 0.6204, "step": 6618 }, { "epoch": 1.0128538638102524, "grad_norm": 2.3404054714266582, "learning_rate": 1.0278759478059502e-05, "loss": 0.6709, "step": 6619 }, { "epoch": 1.0130068859984698, "grad_norm": 2.5884362282825637, "learning_rate": 1.0276282249520207e-05, "loss": 0.7326, "step": 6620 }, { "epoch": 1.013159908186687, "grad_norm": 1.9300570634216025, "learning_rate": 1.0273805004013337e-05, "loss": 0.5012, "step": 6621 }, { "epoch": 1.0133129303749044, "grad_norm": 2.377275021865992, "learning_rate": 1.0271327741691032e-05, "loss": 0.6814, "step": 6622 }, { "epoch": 1.0134659525631216, "grad_norm": 2.182477995481934, "learning_rate": 1.0268850462705431e-05, "loss": 0.5598, "step": 6623 }, { "epoch": 1.0136189747513389, "grad_norm": 2.3066689578762207, "learning_rate": 1.0266373167208668e-05, "loss": 0.6129, "step": 6624 }, { "epoch": 1.0137719969395562, "grad_norm": 2.307693232138134, "learning_rate": 1.0263895855352887e-05, "loss": 0.5754, "step": 6625 }, { "epoch": 1.0139250191277736, "grad_norm": 2.292009668838204, "learning_rate": 1.0261418527290233e-05, "loss": 0.6121, "step": 6626 }, { "epoch": 1.0140780413159909, "grad_norm": 2.492568937733668, "learning_rate": 1.025894118317284e-05, "loss": 0.6629, "step": 6627 }, { "epoch": 1.0142310635042082, "grad_norm": 2.2340661216832096, "learning_rate": 1.0256463823152855e-05, "loss": 0.5866, "step": 6628 }, { "epoch": 1.0143840856924253, "grad_norm": 2.110334526976937, "learning_rate": 1.0253986447382429e-05, "loss": 0.5203, "step": 6629 }, { "epoch": 1.0145371078806427, "grad_norm": 2.692658979279218, "learning_rate": 1.0251509056013697e-05, "loss": 0.5705, "step": 6630 }, { "epoch": 1.01469013006886, "grad_norm": 2.428970298715567, "learning_rate": 1.024903164919881e-05, "loss": 0.6109, "step": 6631 }, { "epoch": 1.0148431522570773, "grad_norm": 2.238146074078266, "learning_rate": 1.0246554227089918e-05, "loss": 0.6261, "step": 6632 }, { "epoch": 1.0149961744452947, "grad_norm": 2.573325893114674, "learning_rate": 1.0244076789839169e-05, "loss": 0.6369, "step": 6633 }, { "epoch": 1.0151491966335118, "grad_norm": 2.3159256199731053, "learning_rate": 1.0241599337598702e-05, "loss": 0.5916, "step": 6634 }, { "epoch": 1.015302218821729, "grad_norm": 2.2308322352798444, "learning_rate": 1.0239121870520679e-05, "loss": 0.5703, "step": 6635 }, { "epoch": 1.0154552410099464, "grad_norm": 2.329808400919156, "learning_rate": 1.0236644388757245e-05, "loss": 0.6378, "step": 6636 }, { "epoch": 1.0156082631981638, "grad_norm": 2.435679508309255, "learning_rate": 1.0234166892460554e-05, "loss": 0.6254, "step": 6637 }, { "epoch": 1.015761285386381, "grad_norm": 2.4224361592010295, "learning_rate": 1.0231689381782756e-05, "loss": 0.5599, "step": 6638 }, { "epoch": 1.0159143075745982, "grad_norm": 2.1885969206768032, "learning_rate": 1.0229211856876011e-05, "loss": 0.5952, "step": 6639 }, { "epoch": 1.0160673297628156, "grad_norm": 2.3897857156470255, "learning_rate": 1.0226734317892466e-05, "loss": 0.5323, "step": 6640 }, { "epoch": 1.0162203519510329, "grad_norm": 2.092213659667365, "learning_rate": 1.0224256764984282e-05, "loss": 0.4703, "step": 6641 }, { "epoch": 1.0163733741392502, "grad_norm": 2.1854404332671185, "learning_rate": 1.022177919830361e-05, "loss": 0.6041, "step": 6642 }, { "epoch": 1.0165263963274676, "grad_norm": 2.4022816997942593, "learning_rate": 1.0219301618002611e-05, "loss": 0.64, "step": 6643 }, { "epoch": 1.0166794185156847, "grad_norm": 2.3801526159134982, "learning_rate": 1.021682402423344e-05, "loss": 0.5524, "step": 6644 }, { "epoch": 1.016832440703902, "grad_norm": 2.218696255320501, "learning_rate": 1.021434641714826e-05, "loss": 0.6162, "step": 6645 }, { "epoch": 1.0169854628921193, "grad_norm": 2.7939201783520056, "learning_rate": 1.0211868796899229e-05, "loss": 0.6066, "step": 6646 }, { "epoch": 1.0171384850803367, "grad_norm": 2.264811330544672, "learning_rate": 1.0209391163638503e-05, "loss": 0.539, "step": 6647 }, { "epoch": 1.017291507268554, "grad_norm": 2.318419121835664, "learning_rate": 1.0206913517518246e-05, "loss": 0.6371, "step": 6648 }, { "epoch": 1.0174445294567713, "grad_norm": 2.206422409859083, "learning_rate": 1.0204435858690625e-05, "loss": 0.5619, "step": 6649 }, { "epoch": 1.0175975516449884, "grad_norm": 2.626006555589893, "learning_rate": 1.0201958187307794e-05, "loss": 0.5531, "step": 6650 }, { "epoch": 1.0177505738332058, "grad_norm": 2.344140763614317, "learning_rate": 1.0199480503521924e-05, "loss": 0.6345, "step": 6651 }, { "epoch": 1.017903596021423, "grad_norm": 2.2889519250935177, "learning_rate": 1.0197002807485175e-05, "loss": 0.5799, "step": 6652 }, { "epoch": 1.0180566182096404, "grad_norm": 2.6253194916546154, "learning_rate": 1.0194525099349708e-05, "loss": 0.6793, "step": 6653 }, { "epoch": 1.0182096403978578, "grad_norm": 2.3472924349764344, "learning_rate": 1.01920473792677e-05, "loss": 0.5215, "step": 6654 }, { "epoch": 1.0183626625860749, "grad_norm": 2.22200896592304, "learning_rate": 1.0189569647391308e-05, "loss": 0.5543, "step": 6655 }, { "epoch": 1.0185156847742922, "grad_norm": 2.0095456114314585, "learning_rate": 1.0187091903872703e-05, "loss": 0.5121, "step": 6656 }, { "epoch": 1.0186687069625096, "grad_norm": 2.297998178865159, "learning_rate": 1.0184614148864052e-05, "loss": 0.584, "step": 6657 }, { "epoch": 1.0188217291507269, "grad_norm": 2.332497200625412, "learning_rate": 1.0182136382517526e-05, "loss": 0.5356, "step": 6658 }, { "epoch": 1.0189747513389442, "grad_norm": 2.4417067324261064, "learning_rate": 1.017965860498529e-05, "loss": 0.543, "step": 6659 }, { "epoch": 1.0191277735271613, "grad_norm": 2.15499036744487, "learning_rate": 1.0177180816419516e-05, "loss": 0.5874, "step": 6660 }, { "epoch": 1.0192807957153787, "grad_norm": 2.108874428758097, "learning_rate": 1.0174703016972376e-05, "loss": 0.4502, "step": 6661 }, { "epoch": 1.019433817903596, "grad_norm": 2.548469773857961, "learning_rate": 1.017222520679604e-05, "loss": 0.5865, "step": 6662 }, { "epoch": 1.0195868400918133, "grad_norm": 2.513582400490902, "learning_rate": 1.0169747386042681e-05, "loss": 0.653, "step": 6663 }, { "epoch": 1.0197398622800307, "grad_norm": 2.492061054460402, "learning_rate": 1.0167269554864472e-05, "loss": 0.6406, "step": 6664 }, { "epoch": 1.019892884468248, "grad_norm": 2.180717896254185, "learning_rate": 1.0164791713413583e-05, "loss": 0.5883, "step": 6665 }, { "epoch": 1.020045906656465, "grad_norm": 2.2006717979763786, "learning_rate": 1.016231386184219e-05, "loss": 0.5509, "step": 6666 }, { "epoch": 1.0201989288446824, "grad_norm": 2.3252357391678586, "learning_rate": 1.015983600030247e-05, "loss": 0.6702, "step": 6667 }, { "epoch": 1.0203519510328998, "grad_norm": 2.14665930839897, "learning_rate": 1.0157358128946596e-05, "loss": 0.5438, "step": 6668 }, { "epoch": 1.020504973221117, "grad_norm": 2.3069969574975002, "learning_rate": 1.0154880247926739e-05, "loss": 0.5291, "step": 6669 }, { "epoch": 1.0206579954093344, "grad_norm": 2.413058002612041, "learning_rate": 1.0152402357395086e-05, "loss": 0.5105, "step": 6670 }, { "epoch": 1.0208110175975516, "grad_norm": 2.272849768241214, "learning_rate": 1.0149924457503806e-05, "loss": 0.6272, "step": 6671 }, { "epoch": 1.0209640397857689, "grad_norm": 2.122826286435337, "learning_rate": 1.0147446548405076e-05, "loss": 0.516, "step": 6672 }, { "epoch": 1.0211170619739862, "grad_norm": 2.329647854250823, "learning_rate": 1.0144968630251078e-05, "loss": 0.5394, "step": 6673 }, { "epoch": 1.0212700841622036, "grad_norm": 2.1569395923518138, "learning_rate": 1.0142490703193992e-05, "loss": 0.6182, "step": 6674 }, { "epoch": 1.0214231063504209, "grad_norm": 2.241545575430639, "learning_rate": 1.014001276738599e-05, "loss": 0.5988, "step": 6675 }, { "epoch": 1.021576128538638, "grad_norm": 2.129116623058144, "learning_rate": 1.0137534822979258e-05, "loss": 0.539, "step": 6676 }, { "epoch": 1.0217291507268553, "grad_norm": 2.256824403314073, "learning_rate": 1.0135056870125976e-05, "loss": 0.6206, "step": 6677 }, { "epoch": 1.0218821729150727, "grad_norm": 2.354857212906966, "learning_rate": 1.0132578908978323e-05, "loss": 0.6072, "step": 6678 }, { "epoch": 1.02203519510329, "grad_norm": 2.2770225052597994, "learning_rate": 1.0130100939688478e-05, "loss": 0.6071, "step": 6679 }, { "epoch": 1.0221882172915073, "grad_norm": 2.1691193316787336, "learning_rate": 1.012762296240863e-05, "loss": 0.5668, "step": 6680 }, { "epoch": 1.0223412394797247, "grad_norm": 2.138908409021962, "learning_rate": 1.0125144977290952e-05, "loss": 0.5591, "step": 6681 }, { "epoch": 1.0224942616679418, "grad_norm": 2.3379186543767854, "learning_rate": 1.0122666984487632e-05, "loss": 0.5591, "step": 6682 }, { "epoch": 1.022647283856159, "grad_norm": 2.480112583086363, "learning_rate": 1.0120188984150857e-05, "loss": 0.6374, "step": 6683 }, { "epoch": 1.0228003060443764, "grad_norm": 2.2457431345148935, "learning_rate": 1.0117710976432802e-05, "loss": 0.603, "step": 6684 }, { "epoch": 1.0229533282325938, "grad_norm": 2.3766699997888945, "learning_rate": 1.0115232961485655e-05, "loss": 0.553, "step": 6685 }, { "epoch": 1.023106350420811, "grad_norm": 2.1244860650080652, "learning_rate": 1.0112754939461603e-05, "loss": 0.5574, "step": 6686 }, { "epoch": 1.0232593726090282, "grad_norm": 2.5816730909218815, "learning_rate": 1.011027691051283e-05, "loss": 0.6624, "step": 6687 }, { "epoch": 1.0234123947972456, "grad_norm": 2.2465134024423703, "learning_rate": 1.010779887479152e-05, "loss": 0.6497, "step": 6688 }, { "epoch": 1.0235654169854629, "grad_norm": 2.193620461985959, "learning_rate": 1.0105320832449856e-05, "loss": 0.5496, "step": 6689 }, { "epoch": 1.0237184391736802, "grad_norm": 2.3333263624843212, "learning_rate": 1.0102842783640032e-05, "loss": 0.6004, "step": 6690 }, { "epoch": 1.0238714613618976, "grad_norm": 2.3841598412894442, "learning_rate": 1.0100364728514228e-05, "loss": 0.6371, "step": 6691 }, { "epoch": 1.0240244835501147, "grad_norm": 2.223066765452285, "learning_rate": 1.0097886667224634e-05, "loss": 0.5576, "step": 6692 }, { "epoch": 1.024177505738332, "grad_norm": 2.5087968900231012, "learning_rate": 1.0095408599923438e-05, "loss": 0.5812, "step": 6693 }, { "epoch": 1.0243305279265493, "grad_norm": 2.57215618551181, "learning_rate": 1.0092930526762824e-05, "loss": 0.6023, "step": 6694 }, { "epoch": 1.0244835501147667, "grad_norm": 2.402925023331007, "learning_rate": 1.0090452447894985e-05, "loss": 0.5092, "step": 6695 }, { "epoch": 1.024636572302984, "grad_norm": 2.2994948689718275, "learning_rate": 1.0087974363472107e-05, "loss": 0.6011, "step": 6696 }, { "epoch": 1.0247895944912013, "grad_norm": 2.220827506450029, "learning_rate": 1.0085496273646377e-05, "loss": 0.5272, "step": 6697 }, { "epoch": 1.0249426166794184, "grad_norm": 2.337313218700674, "learning_rate": 1.0083018178569987e-05, "loss": 0.5645, "step": 6698 }, { "epoch": 1.0250956388676358, "grad_norm": 2.3675969691011254, "learning_rate": 1.008054007839513e-05, "loss": 0.6337, "step": 6699 }, { "epoch": 1.025248661055853, "grad_norm": 2.150314678408855, "learning_rate": 1.0078061973273986e-05, "loss": 0.5149, "step": 6700 }, { "epoch": 1.0254016832440704, "grad_norm": 2.5780589470023942, "learning_rate": 1.0075583863358756e-05, "loss": 0.608, "step": 6701 }, { "epoch": 1.0255547054322878, "grad_norm": 2.147503852334118, "learning_rate": 1.0073105748801622e-05, "loss": 0.4818, "step": 6702 }, { "epoch": 1.0257077276205049, "grad_norm": 2.4672879810343504, "learning_rate": 1.0070627629754778e-05, "loss": 0.4973, "step": 6703 }, { "epoch": 1.0258607498087222, "grad_norm": 2.2872325728792804, "learning_rate": 1.0068149506370418e-05, "loss": 0.4786, "step": 6704 }, { "epoch": 1.0260137719969395, "grad_norm": 2.8109264658399375, "learning_rate": 1.0065671378800725e-05, "loss": 0.5947, "step": 6705 }, { "epoch": 1.0261667941851569, "grad_norm": 2.12874627307908, "learning_rate": 1.00631932471979e-05, "loss": 0.475, "step": 6706 }, { "epoch": 1.0263198163733742, "grad_norm": 2.3004995092458516, "learning_rate": 1.006071511171413e-05, "loss": 0.577, "step": 6707 }, { "epoch": 1.0264728385615913, "grad_norm": 2.530976996073714, "learning_rate": 1.0058236972501607e-05, "loss": 0.7009, "step": 6708 }, { "epoch": 1.0266258607498087, "grad_norm": 2.3849208819910586, "learning_rate": 1.0055758829712522e-05, "loss": 0.5976, "step": 6709 }, { "epoch": 1.026778882938026, "grad_norm": 2.141201510237259, "learning_rate": 1.0053280683499069e-05, "loss": 0.5255, "step": 6710 }, { "epoch": 1.0269319051262433, "grad_norm": 2.3014505047534985, "learning_rate": 1.0050802534013444e-05, "loss": 0.5084, "step": 6711 }, { "epoch": 1.0270849273144607, "grad_norm": 2.6769632245570394, "learning_rate": 1.0048324381407837e-05, "loss": 0.6121, "step": 6712 }, { "epoch": 1.027237949502678, "grad_norm": 2.2107203457710756, "learning_rate": 1.0045846225834434e-05, "loss": 0.5219, "step": 6713 }, { "epoch": 1.027390971690895, "grad_norm": 2.2878697948454403, "learning_rate": 1.004336806744544e-05, "loss": 0.5553, "step": 6714 }, { "epoch": 1.0275439938791124, "grad_norm": 2.1499839688324327, "learning_rate": 1.0040889906393044e-05, "loss": 0.5307, "step": 6715 }, { "epoch": 1.0276970160673298, "grad_norm": 2.3165989907378037, "learning_rate": 1.0038411742829437e-05, "loss": 0.5045, "step": 6716 }, { "epoch": 1.027850038255547, "grad_norm": 2.1737988536205797, "learning_rate": 1.0035933576906815e-05, "loss": 0.5147, "step": 6717 }, { "epoch": 1.0280030604437644, "grad_norm": 2.489338280509698, "learning_rate": 1.0033455408777374e-05, "loss": 0.5733, "step": 6718 }, { "epoch": 1.0281560826319815, "grad_norm": 2.3878592780046497, "learning_rate": 1.0030977238593303e-05, "loss": 0.5415, "step": 6719 }, { "epoch": 1.0283091048201989, "grad_norm": 2.4479340330867587, "learning_rate": 1.0028499066506799e-05, "loss": 0.6103, "step": 6720 }, { "epoch": 1.0284621270084162, "grad_norm": 2.3518568222133633, "learning_rate": 1.0026020892670056e-05, "loss": 0.5775, "step": 6721 }, { "epoch": 1.0286151491966335, "grad_norm": 2.938195775116116, "learning_rate": 1.0023542717235268e-05, "loss": 0.5382, "step": 6722 }, { "epoch": 1.0287681713848509, "grad_norm": 2.3623126529241554, "learning_rate": 1.0021064540354627e-05, "loss": 0.5309, "step": 6723 }, { "epoch": 1.028921193573068, "grad_norm": 2.4496299533549166, "learning_rate": 1.0018586362180335e-05, "loss": 0.6377, "step": 6724 }, { "epoch": 1.0290742157612853, "grad_norm": 2.573262792528309, "learning_rate": 1.0016108182864578e-05, "loss": 0.5198, "step": 6725 }, { "epoch": 1.0292272379495027, "grad_norm": 2.29374203363862, "learning_rate": 1.0013630002559556e-05, "loss": 0.5467, "step": 6726 }, { "epoch": 1.02938026013772, "grad_norm": 2.544986422031994, "learning_rate": 1.0011151821417462e-05, "loss": 0.6458, "step": 6727 }, { "epoch": 1.0295332823259373, "grad_norm": 2.355349246426756, "learning_rate": 1.0008673639590493e-05, "loss": 0.533, "step": 6728 }, { "epoch": 1.0296863045141547, "grad_norm": 2.4051355343169605, "learning_rate": 1.0006195457230838e-05, "loss": 0.5813, "step": 6729 }, { "epoch": 1.0298393267023718, "grad_norm": 2.2601662708897052, "learning_rate": 1.0003717274490698e-05, "loss": 0.503, "step": 6730 }, { "epoch": 1.029992348890589, "grad_norm": 2.6026331972423145, "learning_rate": 1.0001239091522266e-05, "loss": 0.6445, "step": 6731 }, { "epoch": 1.0301453710788064, "grad_norm": 2.4502665700638664, "learning_rate": 9.998760908477734e-06, "loss": 0.6071, "step": 6732 }, { "epoch": 1.0302983932670238, "grad_norm": 2.313243201112418, "learning_rate": 9.996282725509305e-06, "loss": 0.6863, "step": 6733 }, { "epoch": 1.030451415455241, "grad_norm": 2.1587677649613357, "learning_rate": 9.993804542769167e-06, "loss": 0.5032, "step": 6734 }, { "epoch": 1.0306044376434582, "grad_norm": 2.4333278255649176, "learning_rate": 9.991326360409509e-06, "loss": 0.5529, "step": 6735 }, { "epoch": 1.0307574598316755, "grad_norm": 2.4443357441123292, "learning_rate": 9.988848178582541e-06, "loss": 0.5748, "step": 6736 }, { "epoch": 1.0309104820198929, "grad_norm": 2.190378352896407, "learning_rate": 9.986369997440445e-06, "loss": 0.5912, "step": 6737 }, { "epoch": 1.0310635042081102, "grad_norm": 2.2856778913735143, "learning_rate": 9.983891817135423e-06, "loss": 0.6169, "step": 6738 }, { "epoch": 1.0312165263963275, "grad_norm": 2.3383660191650604, "learning_rate": 9.98141363781967e-06, "loss": 0.5175, "step": 6739 }, { "epoch": 1.0313695485845447, "grad_norm": 2.4669727398099264, "learning_rate": 9.978935459645374e-06, "loss": 0.5764, "step": 6740 }, { "epoch": 1.031522570772762, "grad_norm": 2.454183738970548, "learning_rate": 9.976457282764735e-06, "loss": 0.5029, "step": 6741 }, { "epoch": 1.0316755929609793, "grad_norm": 2.146600754225679, "learning_rate": 9.97397910732995e-06, "loss": 0.4988, "step": 6742 }, { "epoch": 1.0318286151491967, "grad_norm": 2.544317459747779, "learning_rate": 9.971500933493203e-06, "loss": 0.5676, "step": 6743 }, { "epoch": 1.031981637337414, "grad_norm": 2.129239502795901, "learning_rate": 9.9690227614067e-06, "loss": 0.5151, "step": 6744 }, { "epoch": 1.032134659525631, "grad_norm": 2.5537722341810354, "learning_rate": 9.966544591222626e-06, "loss": 0.515, "step": 6745 }, { "epoch": 1.0322876817138484, "grad_norm": 2.4961159403188464, "learning_rate": 9.964066423093186e-06, "loss": 0.6098, "step": 6746 }, { "epoch": 1.0324407039020658, "grad_norm": 2.26264700212384, "learning_rate": 9.961588257170565e-06, "loss": 0.4847, "step": 6747 }, { "epoch": 1.032593726090283, "grad_norm": 2.3122842418111667, "learning_rate": 9.959110093606956e-06, "loss": 0.5332, "step": 6748 }, { "epoch": 1.0327467482785004, "grad_norm": 2.3473237403635676, "learning_rate": 9.95663193255456e-06, "loss": 0.4923, "step": 6749 }, { "epoch": 1.0328997704667178, "grad_norm": 2.493445708562281, "learning_rate": 9.954153774165564e-06, "loss": 0.6176, "step": 6750 }, { "epoch": 1.0330527926549349, "grad_norm": 2.0928945035756823, "learning_rate": 9.951675618592168e-06, "loss": 0.4642, "step": 6751 }, { "epoch": 1.0332058148431522, "grad_norm": 2.3615436131724716, "learning_rate": 9.94919746598656e-06, "loss": 0.4763, "step": 6752 }, { "epoch": 1.0333588370313695, "grad_norm": 2.4754545731602575, "learning_rate": 9.946719316500931e-06, "loss": 0.5336, "step": 6753 }, { "epoch": 1.0335118592195869, "grad_norm": 2.442943084544504, "learning_rate": 9.94424117028748e-06, "loss": 0.5174, "step": 6754 }, { "epoch": 1.0336648814078042, "grad_norm": 2.682564815574864, "learning_rate": 9.941763027498398e-06, "loss": 0.6083, "step": 6755 }, { "epoch": 1.0338179035960213, "grad_norm": 2.6595816044960965, "learning_rate": 9.939284888285872e-06, "loss": 0.5766, "step": 6756 }, { "epoch": 1.0339709257842387, "grad_norm": 2.5083499459558656, "learning_rate": 9.936806752802103e-06, "loss": 0.5509, "step": 6757 }, { "epoch": 1.034123947972456, "grad_norm": 2.275555021513342, "learning_rate": 9.934328621199273e-06, "loss": 0.5047, "step": 6758 }, { "epoch": 1.0342769701606733, "grad_norm": 2.028981076722719, "learning_rate": 9.931850493629587e-06, "loss": 0.4789, "step": 6759 }, { "epoch": 1.0344299923488907, "grad_norm": 2.0211581388756037, "learning_rate": 9.929372370245225e-06, "loss": 0.5024, "step": 6760 }, { "epoch": 1.034583014537108, "grad_norm": 2.324035265435848, "learning_rate": 9.92689425119838e-06, "loss": 0.5728, "step": 6761 }, { "epoch": 1.034736036725325, "grad_norm": 2.242721972544215, "learning_rate": 9.924416136641249e-06, "loss": 0.4953, "step": 6762 }, { "epoch": 1.0348890589135424, "grad_norm": 2.432779427177171, "learning_rate": 9.921938026726015e-06, "loss": 0.6302, "step": 6763 }, { "epoch": 1.0350420811017598, "grad_norm": 2.2598617265081153, "learning_rate": 9.919459921604872e-06, "loss": 0.5825, "step": 6764 }, { "epoch": 1.035195103289977, "grad_norm": 2.5631858944269412, "learning_rate": 9.916981821430016e-06, "loss": 0.6006, "step": 6765 }, { "epoch": 1.0353481254781944, "grad_norm": 2.50597329793168, "learning_rate": 9.914503726353623e-06, "loss": 0.5106, "step": 6766 }, { "epoch": 1.0355011476664115, "grad_norm": 2.5551518698686464, "learning_rate": 9.912025636527897e-06, "loss": 0.5799, "step": 6767 }, { "epoch": 1.0356541698546289, "grad_norm": 2.716633047615582, "learning_rate": 9.90954755210502e-06, "loss": 0.5963, "step": 6768 }, { "epoch": 1.0358071920428462, "grad_norm": 2.187872786560717, "learning_rate": 9.907069473237178e-06, "loss": 0.5286, "step": 6769 }, { "epoch": 1.0359602142310635, "grad_norm": 2.27583781573365, "learning_rate": 9.904591400076567e-06, "loss": 0.5259, "step": 6770 }, { "epoch": 1.0361132364192809, "grad_norm": 2.2572498780404486, "learning_rate": 9.902113332775372e-06, "loss": 0.507, "step": 6771 }, { "epoch": 1.036266258607498, "grad_norm": 2.270017680287196, "learning_rate": 9.899635271485774e-06, "loss": 0.5607, "step": 6772 }, { "epoch": 1.0364192807957153, "grad_norm": 2.23521431485531, "learning_rate": 9.897157216359972e-06, "loss": 0.5248, "step": 6773 }, { "epoch": 1.0365723029839327, "grad_norm": 2.3389129894008693, "learning_rate": 9.894679167550143e-06, "loss": 0.4359, "step": 6774 }, { "epoch": 1.03672532517215, "grad_norm": 2.2620396345267344, "learning_rate": 9.892201125208484e-06, "loss": 0.5479, "step": 6775 }, { "epoch": 1.0368783473603673, "grad_norm": 2.728001867195493, "learning_rate": 9.889723089487175e-06, "loss": 0.6709, "step": 6776 }, { "epoch": 1.0370313695485844, "grad_norm": 2.1827657385278445, "learning_rate": 9.887245060538397e-06, "loss": 0.496, "step": 6777 }, { "epoch": 1.0371843917368018, "grad_norm": 2.3223435649613973, "learning_rate": 9.884767038514348e-06, "loss": 0.5999, "step": 6778 }, { "epoch": 1.037337413925019, "grad_norm": 2.2614536615688663, "learning_rate": 9.882289023567203e-06, "loss": 0.5056, "step": 6779 }, { "epoch": 1.0374904361132364, "grad_norm": 2.73646991422452, "learning_rate": 9.879811015849147e-06, "loss": 0.6103, "step": 6780 }, { "epoch": 1.0376434583014538, "grad_norm": 2.5056718112340093, "learning_rate": 9.87733301551237e-06, "loss": 0.5605, "step": 6781 }, { "epoch": 1.037796480489671, "grad_norm": 2.48718682079311, "learning_rate": 9.87485502270905e-06, "loss": 0.5004, "step": 6782 }, { "epoch": 1.0379495026778882, "grad_norm": 2.581830328744179, "learning_rate": 9.872377037591374e-06, "loss": 0.5789, "step": 6783 }, { "epoch": 1.0381025248661055, "grad_norm": 2.368386825181525, "learning_rate": 9.869899060311525e-06, "loss": 0.5313, "step": 6784 }, { "epoch": 1.0382555470543229, "grad_norm": 2.4010158149306884, "learning_rate": 9.86742109102168e-06, "loss": 0.5985, "step": 6785 }, { "epoch": 1.0384085692425402, "grad_norm": 2.483515581888754, "learning_rate": 9.864943129874027e-06, "loss": 0.517, "step": 6786 }, { "epoch": 1.0385615914307575, "grad_norm": 2.694645694193147, "learning_rate": 9.862465177020742e-06, "loss": 0.5725, "step": 6787 }, { "epoch": 1.0387146136189747, "grad_norm": 2.00193207158381, "learning_rate": 9.859987232614012e-06, "loss": 0.4655, "step": 6788 }, { "epoch": 1.038867635807192, "grad_norm": 2.297472219311569, "learning_rate": 9.857509296806014e-06, "loss": 0.5012, "step": 6789 }, { "epoch": 1.0390206579954093, "grad_norm": 2.0400785317490127, "learning_rate": 9.855031369748922e-06, "loss": 0.5425, "step": 6790 }, { "epoch": 1.0391736801836267, "grad_norm": 2.162087427618444, "learning_rate": 9.852553451594929e-06, "loss": 0.4972, "step": 6791 }, { "epoch": 1.039326702371844, "grad_norm": 2.297269429343117, "learning_rate": 9.8500755424962e-06, "loss": 0.4935, "step": 6792 }, { "epoch": 1.039479724560061, "grad_norm": 2.4070600512021785, "learning_rate": 9.847597642604917e-06, "loss": 0.5847, "step": 6793 }, { "epoch": 1.0396327467482784, "grad_norm": 2.517284455670916, "learning_rate": 9.845119752073265e-06, "loss": 0.5392, "step": 6794 }, { "epoch": 1.0397857689364958, "grad_norm": 2.3382151130233817, "learning_rate": 9.84264187105341e-06, "loss": 0.5164, "step": 6795 }, { "epoch": 1.039938791124713, "grad_norm": 2.2612802000483216, "learning_rate": 9.840163999697532e-06, "loss": 0.5151, "step": 6796 }, { "epoch": 1.0400918133129304, "grad_norm": 2.2601432005493263, "learning_rate": 9.837686138157813e-06, "loss": 0.5168, "step": 6797 }, { "epoch": 1.0402448355011478, "grad_norm": 2.6100201009422235, "learning_rate": 9.835208286586419e-06, "loss": 0.6259, "step": 6798 }, { "epoch": 1.0403978576893649, "grad_norm": 2.4883767574053706, "learning_rate": 9.832730445135531e-06, "loss": 0.5042, "step": 6799 }, { "epoch": 1.0405508798775822, "grad_norm": 2.2820139860370183, "learning_rate": 9.830252613957322e-06, "loss": 0.4631, "step": 6800 }, { "epoch": 1.0407039020657995, "grad_norm": 2.25483309950987, "learning_rate": 9.827774793203961e-06, "loss": 0.5455, "step": 6801 }, { "epoch": 1.0408569242540169, "grad_norm": 2.6447960053774993, "learning_rate": 9.825296983027625e-06, "loss": 0.6592, "step": 6802 }, { "epoch": 1.0410099464422342, "grad_norm": 2.053680781948206, "learning_rate": 9.822819183580484e-06, "loss": 0.5091, "step": 6803 }, { "epoch": 1.0411629686304513, "grad_norm": 2.395301936062382, "learning_rate": 9.820341395014713e-06, "loss": 0.546, "step": 6804 }, { "epoch": 1.0413159908186687, "grad_norm": 2.033876449030659, "learning_rate": 9.817863617482479e-06, "loss": 0.4092, "step": 6805 }, { "epoch": 1.041469013006886, "grad_norm": 2.413056030738432, "learning_rate": 9.815385851135948e-06, "loss": 0.652, "step": 6806 }, { "epoch": 1.0416220351951033, "grad_norm": 2.448201678783148, "learning_rate": 9.8129080961273e-06, "loss": 0.4885, "step": 6807 }, { "epoch": 1.0417750573833207, "grad_norm": 2.518063374444106, "learning_rate": 9.810430352608695e-06, "loss": 0.5058, "step": 6808 }, { "epoch": 1.0419280795715378, "grad_norm": 2.000437078764586, "learning_rate": 9.807952620732302e-06, "loss": 0.4609, "step": 6809 }, { "epoch": 1.042081101759755, "grad_norm": 2.602756111222112, "learning_rate": 9.805474900650296e-06, "loss": 0.6179, "step": 6810 }, { "epoch": 1.0422341239479724, "grad_norm": 2.4146641274588654, "learning_rate": 9.802997192514827e-06, "loss": 0.5391, "step": 6811 }, { "epoch": 1.0423871461361898, "grad_norm": 2.4073284310419356, "learning_rate": 9.80051949647808e-06, "loss": 0.5885, "step": 6812 }, { "epoch": 1.042540168324407, "grad_norm": 2.3612873008392166, "learning_rate": 9.798041812692211e-06, "loss": 0.5162, "step": 6813 }, { "epoch": 1.0426931905126244, "grad_norm": 2.5148254857651042, "learning_rate": 9.795564141309376e-06, "loss": 0.5554, "step": 6814 }, { "epoch": 1.0428462127008415, "grad_norm": 2.193824725601136, "learning_rate": 9.793086482481755e-06, "loss": 0.4637, "step": 6815 }, { "epoch": 1.0429992348890589, "grad_norm": 2.195282093201084, "learning_rate": 9.790608836361502e-06, "loss": 0.5601, "step": 6816 }, { "epoch": 1.0431522570772762, "grad_norm": 2.3815272787115522, "learning_rate": 9.788131203100774e-06, "loss": 0.5558, "step": 6817 }, { "epoch": 1.0433052792654935, "grad_norm": 2.579512220117735, "learning_rate": 9.785653582851745e-06, "loss": 0.5961, "step": 6818 }, { "epoch": 1.0434583014537109, "grad_norm": 2.3883347162224244, "learning_rate": 9.783175975766561e-06, "loss": 0.6108, "step": 6819 }, { "epoch": 1.043611323641928, "grad_norm": 2.3174436373500686, "learning_rate": 9.78069838199739e-06, "loss": 0.5733, "step": 6820 }, { "epoch": 1.0437643458301453, "grad_norm": 2.2060498412069744, "learning_rate": 9.778220801696395e-06, "loss": 0.5214, "step": 6821 }, { "epoch": 1.0439173680183627, "grad_norm": 2.2232999074708797, "learning_rate": 9.775743235015721e-06, "loss": 0.5198, "step": 6822 }, { "epoch": 1.04407039020658, "grad_norm": 2.3554090047539535, "learning_rate": 9.773265682107538e-06, "loss": 0.5541, "step": 6823 }, { "epoch": 1.0442234123947973, "grad_norm": 2.16785446117587, "learning_rate": 9.77078814312399e-06, "loss": 0.4758, "step": 6824 }, { "epoch": 1.0443764345830144, "grad_norm": 2.454126851389489, "learning_rate": 9.768310618217246e-06, "loss": 0.5185, "step": 6825 }, { "epoch": 1.0445294567712318, "grad_norm": 2.6924414731654864, "learning_rate": 9.76583310753945e-06, "loss": 0.639, "step": 6826 }, { "epoch": 1.044682478959449, "grad_norm": 2.265111260087062, "learning_rate": 9.763355611242757e-06, "loss": 0.4323, "step": 6827 }, { "epoch": 1.0448355011476664, "grad_norm": 2.277809428494371, "learning_rate": 9.760878129479325e-06, "loss": 0.4809, "step": 6828 }, { "epoch": 1.0449885233358838, "grad_norm": 2.2456569046235453, "learning_rate": 9.758400662401301e-06, "loss": 0.4522, "step": 6829 }, { "epoch": 1.045141545524101, "grad_norm": 2.453577541760022, "learning_rate": 9.755923210160836e-06, "loss": 0.4833, "step": 6830 }, { "epoch": 1.0452945677123182, "grad_norm": 2.482612822718974, "learning_rate": 9.753445772910085e-06, "loss": 0.5288, "step": 6831 }, { "epoch": 1.0454475899005355, "grad_norm": 2.2688767121063216, "learning_rate": 9.75096835080119e-06, "loss": 0.5265, "step": 6832 }, { "epoch": 1.0456006120887529, "grad_norm": 2.3742525332643485, "learning_rate": 9.748490943986304e-06, "loss": 0.5401, "step": 6833 }, { "epoch": 1.0457536342769702, "grad_norm": 2.297535035475942, "learning_rate": 9.746013552617576e-06, "loss": 0.5176, "step": 6834 }, { "epoch": 1.0459066564651875, "grad_norm": 2.2986799616130615, "learning_rate": 9.743536176847145e-06, "loss": 0.6058, "step": 6835 }, { "epoch": 1.0460596786534047, "grad_norm": 2.6040700168531097, "learning_rate": 9.741058816827162e-06, "loss": 0.4995, "step": 6836 }, { "epoch": 1.046212700841622, "grad_norm": 2.305543948337454, "learning_rate": 9.738581472709774e-06, "loss": 0.5135, "step": 6837 }, { "epoch": 1.0463657230298393, "grad_norm": 2.4772310516819034, "learning_rate": 9.736104144647114e-06, "loss": 0.5416, "step": 6838 }, { "epoch": 1.0465187452180567, "grad_norm": 2.292649813878011, "learning_rate": 9.733626832791336e-06, "loss": 0.4853, "step": 6839 }, { "epoch": 1.046671767406274, "grad_norm": 2.7654251480866803, "learning_rate": 9.73114953729457e-06, "loss": 0.4503, "step": 6840 }, { "epoch": 1.046824789594491, "grad_norm": 2.4129007595804945, "learning_rate": 9.72867225830897e-06, "loss": 0.547, "step": 6841 }, { "epoch": 1.0469778117827084, "grad_norm": 2.565444877722573, "learning_rate": 9.726194995986665e-06, "loss": 0.5448, "step": 6842 }, { "epoch": 1.0471308339709258, "grad_norm": 2.3819084893973965, "learning_rate": 9.723717750479793e-06, "loss": 0.4662, "step": 6843 }, { "epoch": 1.047283856159143, "grad_norm": 1.9489195524522251, "learning_rate": 9.721240521940501e-06, "loss": 0.5165, "step": 6844 }, { "epoch": 1.0474368783473604, "grad_norm": 2.5377098649664727, "learning_rate": 9.718763310520916e-06, "loss": 0.6242, "step": 6845 }, { "epoch": 1.0475899005355775, "grad_norm": 2.280532226310244, "learning_rate": 9.716286116373174e-06, "loss": 0.6094, "step": 6846 }, { "epoch": 1.0477429227237949, "grad_norm": 2.3174754567622773, "learning_rate": 9.713808939649417e-06, "loss": 0.4831, "step": 6847 }, { "epoch": 1.0478959449120122, "grad_norm": 2.5142215573367515, "learning_rate": 9.711331780501766e-06, "loss": 0.5593, "step": 6848 }, { "epoch": 1.0480489671002295, "grad_norm": 2.1926272912275264, "learning_rate": 9.708854639082364e-06, "loss": 0.4583, "step": 6849 }, { "epoch": 1.0482019892884469, "grad_norm": 2.226115977484892, "learning_rate": 9.70637751554334e-06, "loss": 0.537, "step": 6850 }, { "epoch": 1.0483550114766642, "grad_norm": 2.115691597008675, "learning_rate": 9.703900410036815e-06, "loss": 0.5448, "step": 6851 }, { "epoch": 1.0485080336648813, "grad_norm": 2.180567669515745, "learning_rate": 9.701423322714928e-06, "loss": 0.5617, "step": 6852 }, { "epoch": 1.0486610558530987, "grad_norm": 2.382574902525598, "learning_rate": 9.698946253729804e-06, "loss": 0.5341, "step": 6853 }, { "epoch": 1.048814078041316, "grad_norm": 2.239884675264611, "learning_rate": 9.696469203233565e-06, "loss": 0.534, "step": 6854 }, { "epoch": 1.0489671002295333, "grad_norm": 2.2210027907755334, "learning_rate": 9.693992171378342e-06, "loss": 0.4105, "step": 6855 }, { "epoch": 1.0491201224177507, "grad_norm": 2.1536780424328894, "learning_rate": 9.691515158316253e-06, "loss": 0.4926, "step": 6856 }, { "epoch": 1.0492731446059678, "grad_norm": 2.533943071647297, "learning_rate": 9.68903816419943e-06, "loss": 0.6514, "step": 6857 }, { "epoch": 1.049426166794185, "grad_norm": 2.292108221676427, "learning_rate": 9.686561189179989e-06, "loss": 0.4834, "step": 6858 }, { "epoch": 1.0495791889824024, "grad_norm": 2.1828211174386403, "learning_rate": 9.684084233410048e-06, "loss": 0.4351, "step": 6859 }, { "epoch": 1.0497322111706198, "grad_norm": 2.3255510612548007, "learning_rate": 9.681607297041734e-06, "loss": 0.4607, "step": 6860 }, { "epoch": 1.049885233358837, "grad_norm": 2.1176499482167475, "learning_rate": 9.679130380227158e-06, "loss": 0.4311, "step": 6861 }, { "epoch": 1.0500382555470544, "grad_norm": 2.317475873274802, "learning_rate": 9.676653483118441e-06, "loss": 0.4604, "step": 6862 }, { "epoch": 1.0501912777352715, "grad_norm": 2.855097863082061, "learning_rate": 9.674176605867702e-06, "loss": 0.5651, "step": 6863 }, { "epoch": 1.0503442999234889, "grad_norm": 2.2833665485609074, "learning_rate": 9.67169974862705e-06, "loss": 0.4677, "step": 6864 }, { "epoch": 1.0504973221117062, "grad_norm": 2.606929688063134, "learning_rate": 9.6692229115486e-06, "loss": 0.597, "step": 6865 }, { "epoch": 1.0506503442999235, "grad_norm": 2.390847436864242, "learning_rate": 9.666746094784468e-06, "loss": 0.5273, "step": 6866 }, { "epoch": 1.0508033664881409, "grad_norm": 2.5605618344036545, "learning_rate": 9.664269298486759e-06, "loss": 0.572, "step": 6867 }, { "epoch": 1.050956388676358, "grad_norm": 2.254858739297128, "learning_rate": 9.661792522807586e-06, "loss": 0.4767, "step": 6868 }, { "epoch": 1.0511094108645753, "grad_norm": 2.4423773067977423, "learning_rate": 9.659315767899055e-06, "loss": 0.5312, "step": 6869 }, { "epoch": 1.0512624330527927, "grad_norm": 2.5114329776973423, "learning_rate": 9.656839033913282e-06, "loss": 0.5757, "step": 6870 }, { "epoch": 1.05141545524101, "grad_norm": 2.17181345130201, "learning_rate": 9.654362321002363e-06, "loss": 0.4097, "step": 6871 }, { "epoch": 1.0515684774292273, "grad_norm": 2.0419482932100363, "learning_rate": 9.651885629318402e-06, "loss": 0.4301, "step": 6872 }, { "epoch": 1.0517214996174444, "grad_norm": 2.266374883329687, "learning_rate": 9.649408959013513e-06, "loss": 0.4822, "step": 6873 }, { "epoch": 1.0518745218056618, "grad_norm": 2.205778807974861, "learning_rate": 9.64693231023979e-06, "loss": 0.5209, "step": 6874 }, { "epoch": 1.052027543993879, "grad_norm": 2.260631995522916, "learning_rate": 9.64445568314933e-06, "loss": 0.44, "step": 6875 }, { "epoch": 1.0521805661820964, "grad_norm": 2.3307479914634466, "learning_rate": 9.641979077894244e-06, "loss": 0.4535, "step": 6876 }, { "epoch": 1.0523335883703138, "grad_norm": 2.2111017557168826, "learning_rate": 9.639502494626618e-06, "loss": 0.5096, "step": 6877 }, { "epoch": 1.0524866105585309, "grad_norm": 2.366404180462465, "learning_rate": 9.637025933498556e-06, "loss": 0.5082, "step": 6878 }, { "epoch": 1.0526396327467482, "grad_norm": 2.51983682850274, "learning_rate": 9.634549394662154e-06, "loss": 0.5402, "step": 6879 }, { "epoch": 1.0527926549349655, "grad_norm": 2.605006859877165, "learning_rate": 9.6320728782695e-06, "loss": 0.5005, "step": 6880 }, { "epoch": 1.0529456771231829, "grad_norm": 1.9584831153342839, "learning_rate": 9.62959638447269e-06, "loss": 0.4544, "step": 6881 }, { "epoch": 1.0530986993114002, "grad_norm": 2.503851041495755, "learning_rate": 9.62711991342382e-06, "loss": 0.4845, "step": 6882 }, { "epoch": 1.0532517214996175, "grad_norm": 2.197647946951353, "learning_rate": 9.624643465274968e-06, "loss": 0.5008, "step": 6883 }, { "epoch": 1.0534047436878347, "grad_norm": 2.6562728932565185, "learning_rate": 9.622167040178233e-06, "loss": 0.5155, "step": 6884 }, { "epoch": 1.053557765876052, "grad_norm": 2.2358372881033945, "learning_rate": 9.619690638285694e-06, "loss": 0.526, "step": 6885 }, { "epoch": 1.0537107880642693, "grad_norm": 2.278041438282626, "learning_rate": 9.617214259749445e-06, "loss": 0.4483, "step": 6886 }, { "epoch": 1.0538638102524867, "grad_norm": 3.765678149655499, "learning_rate": 9.614737904721567e-06, "loss": 0.4127, "step": 6887 }, { "epoch": 1.054016832440704, "grad_norm": 2.345689494157314, "learning_rate": 9.612261573354137e-06, "loss": 0.4669, "step": 6888 }, { "epoch": 1.054169854628921, "grad_norm": 2.215666024931825, "learning_rate": 9.609785265799243e-06, "loss": 0.4277, "step": 6889 }, { "epoch": 1.0543228768171384, "grad_norm": 2.1371736190516355, "learning_rate": 9.607308982208959e-06, "loss": 0.4995, "step": 6890 }, { "epoch": 1.0544758990053558, "grad_norm": 2.5607618798633167, "learning_rate": 9.604832722735373e-06, "loss": 0.4405, "step": 6891 }, { "epoch": 1.054628921193573, "grad_norm": 2.2072868289383214, "learning_rate": 9.602356487530553e-06, "loss": 0.4679, "step": 6892 }, { "epoch": 1.0547819433817904, "grad_norm": 2.417589786036523, "learning_rate": 9.599880276746572e-06, "loss": 0.4911, "step": 6893 }, { "epoch": 1.0549349655700078, "grad_norm": 2.0872106304582982, "learning_rate": 9.597404090535515e-06, "loss": 0.4545, "step": 6894 }, { "epoch": 1.0550879877582249, "grad_norm": 2.46713381700791, "learning_rate": 9.594927929049447e-06, "loss": 0.4741, "step": 6895 }, { "epoch": 1.0552410099464422, "grad_norm": 2.306058536221254, "learning_rate": 9.592451792440433e-06, "loss": 0.4439, "step": 6896 }, { "epoch": 1.0553940321346595, "grad_norm": 3.022745301253467, "learning_rate": 9.589975680860556e-06, "loss": 0.5254, "step": 6897 }, { "epoch": 1.0555470543228769, "grad_norm": 2.345844867262732, "learning_rate": 9.587499594461871e-06, "loss": 0.3766, "step": 6898 }, { "epoch": 1.0557000765110942, "grad_norm": 2.223361710672595, "learning_rate": 9.585023533396452e-06, "loss": 0.4829, "step": 6899 }, { "epoch": 1.0558530986993113, "grad_norm": 2.328724388731237, "learning_rate": 9.582547497816364e-06, "loss": 0.5028, "step": 6900 }, { "epoch": 1.0560061208875287, "grad_norm": 2.2646982308630363, "learning_rate": 9.580071487873663e-06, "loss": 0.4627, "step": 6901 }, { "epoch": 1.056159143075746, "grad_norm": 2.2351291324961093, "learning_rate": 9.577595503720417e-06, "loss": 0.5294, "step": 6902 }, { "epoch": 1.0563121652639633, "grad_norm": 2.317554741459491, "learning_rate": 9.575119545508686e-06, "loss": 0.4855, "step": 6903 }, { "epoch": 1.0564651874521807, "grad_norm": 2.335702932508031, "learning_rate": 9.572643613390521e-06, "loss": 0.4449, "step": 6904 }, { "epoch": 1.0566182096403978, "grad_norm": 2.3390630669388472, "learning_rate": 9.570167707517986e-06, "loss": 0.497, "step": 6905 }, { "epoch": 1.056771231828615, "grad_norm": 2.0915049224848734, "learning_rate": 9.567691828043131e-06, "loss": 0.4223, "step": 6906 }, { "epoch": 1.0569242540168324, "grad_norm": 2.4297434477300177, "learning_rate": 9.565215975118016e-06, "loss": 0.5876, "step": 6907 }, { "epoch": 1.0570772762050498, "grad_norm": 2.4562615398492014, "learning_rate": 9.562740148894687e-06, "loss": 0.4997, "step": 6908 }, { "epoch": 1.057230298393267, "grad_norm": 2.5933852297859024, "learning_rate": 9.560264349525193e-06, "loss": 0.5457, "step": 6909 }, { "epoch": 1.0573833205814842, "grad_norm": 2.107241488019224, "learning_rate": 9.557788577161592e-06, "loss": 0.4718, "step": 6910 }, { "epoch": 1.0575363427697015, "grad_norm": 2.3210855880243195, "learning_rate": 9.555312831955921e-06, "loss": 0.4619, "step": 6911 }, { "epoch": 1.0576893649579189, "grad_norm": 2.2978307140280405, "learning_rate": 9.552837114060226e-06, "loss": 0.5951, "step": 6912 }, { "epoch": 1.0578423871461362, "grad_norm": 2.2115010649647653, "learning_rate": 9.550361423626558e-06, "loss": 0.484, "step": 6913 }, { "epoch": 1.0579954093343535, "grad_norm": 2.003534556604302, "learning_rate": 9.54788576080695e-06, "loss": 0.3852, "step": 6914 }, { "epoch": 1.0581484315225709, "grad_norm": 2.429515724313838, "learning_rate": 9.545410125753448e-06, "loss": 0.5315, "step": 6915 }, { "epoch": 1.058301453710788, "grad_norm": 2.173108303541178, "learning_rate": 9.542934518618092e-06, "loss": 0.4607, "step": 6916 }, { "epoch": 1.0584544758990053, "grad_norm": 2.232859421941343, "learning_rate": 9.540458939552907e-06, "loss": 0.4832, "step": 6917 }, { "epoch": 1.0586074980872227, "grad_norm": 2.1284743617910205, "learning_rate": 9.537983388709943e-06, "loss": 0.4607, "step": 6918 }, { "epoch": 1.05876052027544, "grad_norm": 2.1196258230083673, "learning_rate": 9.535507866241226e-06, "loss": 0.4231, "step": 6919 }, { "epoch": 1.0589135424636573, "grad_norm": 2.532512044150864, "learning_rate": 9.533032372298784e-06, "loss": 0.5098, "step": 6920 }, { "epoch": 1.0590665646518744, "grad_norm": 2.2109378116606604, "learning_rate": 9.530556907034653e-06, "loss": 0.4378, "step": 6921 }, { "epoch": 1.0592195868400918, "grad_norm": 2.3043607556194154, "learning_rate": 9.528081470600857e-06, "loss": 0.4155, "step": 6922 }, { "epoch": 1.059372609028309, "grad_norm": 2.319007189741688, "learning_rate": 9.525606063149429e-06, "loss": 0.4975, "step": 6923 }, { "epoch": 1.0595256312165264, "grad_norm": 2.2928485267167447, "learning_rate": 9.523130684832386e-06, "loss": 0.5141, "step": 6924 }, { "epoch": 1.0596786534047438, "grad_norm": 2.4235326737855254, "learning_rate": 9.52065533580175e-06, "loss": 0.4867, "step": 6925 }, { "epoch": 1.0598316755929609, "grad_norm": 2.168516218109533, "learning_rate": 9.518180016209551e-06, "loss": 0.4407, "step": 6926 }, { "epoch": 1.0599846977811782, "grad_norm": 2.069744482004005, "learning_rate": 9.515704726207796e-06, "loss": 0.3991, "step": 6927 }, { "epoch": 1.0601377199693955, "grad_norm": 2.1506333934930257, "learning_rate": 9.513229465948511e-06, "loss": 0.4787, "step": 6928 }, { "epoch": 1.0602907421576129, "grad_norm": 2.2749822448358814, "learning_rate": 9.510754235583713e-06, "loss": 0.4889, "step": 6929 }, { "epoch": 1.0604437643458302, "grad_norm": 2.018532232312824, "learning_rate": 9.508279035265405e-06, "loss": 0.4167, "step": 6930 }, { "epoch": 1.0605967865340475, "grad_norm": 2.120989225905039, "learning_rate": 9.505803865145606e-06, "loss": 0.4536, "step": 6931 }, { "epoch": 1.0607498087222647, "grad_norm": 2.1292285782630396, "learning_rate": 9.50332872537633e-06, "loss": 0.4711, "step": 6932 }, { "epoch": 1.060902830910482, "grad_norm": 2.5222440705575466, "learning_rate": 9.500853616109572e-06, "loss": 0.536, "step": 6933 }, { "epoch": 1.0610558530986993, "grad_norm": 2.396454824637159, "learning_rate": 9.498378537497352e-06, "loss": 0.5248, "step": 6934 }, { "epoch": 1.0612088752869167, "grad_norm": 2.42334812785671, "learning_rate": 9.495903489691665e-06, "loss": 0.5531, "step": 6935 }, { "epoch": 1.061361897475134, "grad_norm": 2.217976890998022, "learning_rate": 9.493428472844521e-06, "loss": 0.4391, "step": 6936 }, { "epoch": 1.061514919663351, "grad_norm": 2.170116082357399, "learning_rate": 9.490953487107913e-06, "loss": 0.3839, "step": 6937 }, { "epoch": 1.0616679418515684, "grad_norm": 2.2091807909239263, "learning_rate": 9.488478532633842e-06, "loss": 0.4498, "step": 6938 }, { "epoch": 1.0618209640397858, "grad_norm": 2.455234760203659, "learning_rate": 9.48600360957431e-06, "loss": 0.5034, "step": 6939 }, { "epoch": 1.061973986228003, "grad_norm": 2.6446464434159487, "learning_rate": 9.483528718081303e-06, "loss": 0.5489, "step": 6940 }, { "epoch": 1.0621270084162204, "grad_norm": 2.493535788047184, "learning_rate": 9.481053858306816e-06, "loss": 0.5001, "step": 6941 }, { "epoch": 1.0622800306044375, "grad_norm": 2.679684989109719, "learning_rate": 9.478579030402849e-06, "loss": 0.602, "step": 6942 }, { "epoch": 1.0624330527926549, "grad_norm": 2.2237217293867833, "learning_rate": 9.476104234521376e-06, "loss": 0.4686, "step": 6943 }, { "epoch": 1.0625860749808722, "grad_norm": 2.2552698510205804, "learning_rate": 9.473629470814395e-06, "loss": 0.482, "step": 6944 }, { "epoch": 1.0627390971690895, "grad_norm": 2.3382411136262915, "learning_rate": 9.471154739433889e-06, "loss": 0.5241, "step": 6945 }, { "epoch": 1.0628921193573069, "grad_norm": 2.277125644575901, "learning_rate": 9.468680040531834e-06, "loss": 0.4437, "step": 6946 }, { "epoch": 1.063045141545524, "grad_norm": 2.078240326941889, "learning_rate": 9.46620537426022e-06, "loss": 0.4313, "step": 6947 }, { "epoch": 1.0631981637337413, "grad_norm": 2.130682788759023, "learning_rate": 9.463730740771025e-06, "loss": 0.4409, "step": 6948 }, { "epoch": 1.0633511859219587, "grad_norm": 2.30060169305822, "learning_rate": 9.461256140216218e-06, "loss": 0.4819, "step": 6949 }, { "epoch": 1.063504208110176, "grad_norm": 2.519758537925622, "learning_rate": 9.45878157274778e-06, "loss": 0.5226, "step": 6950 }, { "epoch": 1.0636572302983933, "grad_norm": 2.017059855190871, "learning_rate": 9.456307038517682e-06, "loss": 0.3927, "step": 6951 }, { "epoch": 1.0638102524866107, "grad_norm": 2.4161156149987413, "learning_rate": 9.453832537677899e-06, "loss": 0.4671, "step": 6952 }, { "epoch": 1.0639632746748278, "grad_norm": 2.4352087813015295, "learning_rate": 9.451358070380395e-06, "loss": 0.5307, "step": 6953 }, { "epoch": 1.064116296863045, "grad_norm": 2.4821772090895924, "learning_rate": 9.448883636777136e-06, "loss": 0.6598, "step": 6954 }, { "epoch": 1.0642693190512624, "grad_norm": 2.393312834043304, "learning_rate": 9.446409237020093e-06, "loss": 0.5168, "step": 6955 }, { "epoch": 1.0644223412394798, "grad_norm": 2.321195584837522, "learning_rate": 9.443934871261223e-06, "loss": 0.4782, "step": 6956 }, { "epoch": 1.064575363427697, "grad_norm": 2.083851712582457, "learning_rate": 9.441460539652485e-06, "loss": 0.3878, "step": 6957 }, { "epoch": 1.0647283856159142, "grad_norm": 2.2876127565948106, "learning_rate": 9.438986242345844e-06, "loss": 0.4808, "step": 6958 }, { "epoch": 1.0648814078041315, "grad_norm": 2.328857160236901, "learning_rate": 9.436511979493249e-06, "loss": 0.4874, "step": 6959 }, { "epoch": 1.0650344299923489, "grad_norm": 2.483784227392007, "learning_rate": 9.43403775124666e-06, "loss": 0.4848, "step": 6960 }, { "epoch": 1.0651874521805662, "grad_norm": 2.139738952325263, "learning_rate": 9.43156355775803e-06, "loss": 0.4908, "step": 6961 }, { "epoch": 1.0653404743687835, "grad_norm": 2.215075116418688, "learning_rate": 9.429089399179298e-06, "loss": 0.4778, "step": 6962 }, { "epoch": 1.0654934965570009, "grad_norm": 1.9814013494006328, "learning_rate": 9.426615275662426e-06, "loss": 0.441, "step": 6963 }, { "epoch": 1.065646518745218, "grad_norm": 2.275252087564853, "learning_rate": 9.424141187359347e-06, "loss": 0.5158, "step": 6964 }, { "epoch": 1.0657995409334353, "grad_norm": 2.2604361487894638, "learning_rate": 9.421667134422018e-06, "loss": 0.4746, "step": 6965 }, { "epoch": 1.0659525631216527, "grad_norm": 2.539253336905643, "learning_rate": 9.419193117002367e-06, "loss": 0.4601, "step": 6966 }, { "epoch": 1.06610558530987, "grad_norm": 2.3308452441992413, "learning_rate": 9.416719135252338e-06, "loss": 0.469, "step": 6967 }, { "epoch": 1.0662586074980873, "grad_norm": 2.1799849355110417, "learning_rate": 9.414245189323875e-06, "loss": 0.4691, "step": 6968 }, { "epoch": 1.0664116296863044, "grad_norm": 2.438373820976639, "learning_rate": 9.411771279368902e-06, "loss": 0.4941, "step": 6969 }, { "epoch": 1.0665646518745218, "grad_norm": 2.1442590483045816, "learning_rate": 9.409297405539355e-06, "loss": 0.3921, "step": 6970 }, { "epoch": 1.066717674062739, "grad_norm": 2.3576024377638167, "learning_rate": 9.40682356798717e-06, "loss": 0.4943, "step": 6971 }, { "epoch": 1.0668706962509564, "grad_norm": 2.3218424806222435, "learning_rate": 9.404349766864262e-06, "loss": 0.5805, "step": 6972 }, { "epoch": 1.0670237184391738, "grad_norm": 2.4645763128765745, "learning_rate": 9.401876002322573e-06, "loss": 0.4609, "step": 6973 }, { "epoch": 1.0671767406273909, "grad_norm": 2.3130841744661073, "learning_rate": 9.399402274514017e-06, "loss": 0.5233, "step": 6974 }, { "epoch": 1.0673297628156082, "grad_norm": 2.1876222922209703, "learning_rate": 9.39692858359051e-06, "loss": 0.4513, "step": 6975 }, { "epoch": 1.0674827850038255, "grad_norm": 2.033847811009393, "learning_rate": 9.394454929703987e-06, "loss": 0.3781, "step": 6976 }, { "epoch": 1.0676358071920429, "grad_norm": 2.482097524069422, "learning_rate": 9.391981313006354e-06, "loss": 0.4744, "step": 6977 }, { "epoch": 1.0677888293802602, "grad_norm": 2.346499473043113, "learning_rate": 9.38950773364952e-06, "loss": 0.4851, "step": 6978 }, { "epoch": 1.0679418515684773, "grad_norm": 2.157264052254601, "learning_rate": 9.387034191785413e-06, "loss": 0.4382, "step": 6979 }, { "epoch": 1.0680948737566947, "grad_norm": 2.3721231776727056, "learning_rate": 9.384560687565927e-06, "loss": 0.485, "step": 6980 }, { "epoch": 1.068247895944912, "grad_norm": 2.2298549333189306, "learning_rate": 9.382087221142981e-06, "loss": 0.4579, "step": 6981 }, { "epoch": 1.0684009181331293, "grad_norm": 2.0889715056745377, "learning_rate": 9.379613792668478e-06, "loss": 0.4872, "step": 6982 }, { "epoch": 1.0685539403213467, "grad_norm": 2.0541752026840365, "learning_rate": 9.377140402294315e-06, "loss": 0.4358, "step": 6983 }, { "epoch": 1.068706962509564, "grad_norm": 2.327139410292757, "learning_rate": 9.374667050172397e-06, "loss": 0.4975, "step": 6984 }, { "epoch": 1.068859984697781, "grad_norm": 2.367702726919621, "learning_rate": 9.372193736454626e-06, "loss": 0.5055, "step": 6985 }, { "epoch": 1.0690130068859984, "grad_norm": 2.178558568246285, "learning_rate": 9.369720461292889e-06, "loss": 0.4234, "step": 6986 }, { "epoch": 1.0691660290742158, "grad_norm": 2.2406729508801653, "learning_rate": 9.367247224839084e-06, "loss": 0.4451, "step": 6987 }, { "epoch": 1.069319051262433, "grad_norm": 2.2256944403639953, "learning_rate": 9.364774027245102e-06, "loss": 0.443, "step": 6988 }, { "epoch": 1.0694720734506504, "grad_norm": 2.0697411441110463, "learning_rate": 9.362300868662837e-06, "loss": 0.441, "step": 6989 }, { "epoch": 1.0696250956388675, "grad_norm": 2.2535106828944182, "learning_rate": 9.359827749244168e-06, "loss": 0.3842, "step": 6990 }, { "epoch": 1.0697781178270849, "grad_norm": 2.3355363703052214, "learning_rate": 9.357354669140977e-06, "loss": 0.4452, "step": 6991 }, { "epoch": 1.0699311400153022, "grad_norm": 2.144156733012759, "learning_rate": 9.354881628505156e-06, "loss": 0.4491, "step": 6992 }, { "epoch": 1.0700841622035195, "grad_norm": 2.304338238051539, "learning_rate": 9.352408627488574e-06, "loss": 0.4392, "step": 6993 }, { "epoch": 1.0702371843917369, "grad_norm": 2.207433271716185, "learning_rate": 9.349935666243112e-06, "loss": 0.3962, "step": 6994 }, { "epoch": 1.0703902065799542, "grad_norm": 2.339465394300809, "learning_rate": 9.347462744920646e-06, "loss": 0.5691, "step": 6995 }, { "epoch": 1.0705432287681713, "grad_norm": 2.2116556612187286, "learning_rate": 9.344989863673043e-06, "loss": 0.3849, "step": 6996 }, { "epoch": 1.0706962509563887, "grad_norm": 2.2286947116609244, "learning_rate": 9.342517022652176e-06, "loss": 0.4345, "step": 6997 }, { "epoch": 1.070849273144606, "grad_norm": 2.523389393218545, "learning_rate": 9.340044222009913e-06, "loss": 0.5253, "step": 6998 }, { "epoch": 1.0710022953328233, "grad_norm": 2.2312942926442703, "learning_rate": 9.337571461898112e-06, "loss": 0.504, "step": 6999 }, { "epoch": 1.0711553175210407, "grad_norm": 2.2598765293416876, "learning_rate": 9.33509874246864e-06, "loss": 0.5225, "step": 7000 }, { "epoch": 1.0713083397092578, "grad_norm": 2.503438620842996, "learning_rate": 9.332626063873354e-06, "loss": 0.5315, "step": 7001 }, { "epoch": 1.071461361897475, "grad_norm": 2.5807369463416916, "learning_rate": 9.330153426264117e-06, "loss": 0.4877, "step": 7002 }, { "epoch": 1.0716143840856924, "grad_norm": 2.568301950382052, "learning_rate": 9.327680829792775e-06, "loss": 0.5205, "step": 7003 }, { "epoch": 1.0717674062739098, "grad_norm": 2.182053222736725, "learning_rate": 9.325208274611184e-06, "loss": 0.436, "step": 7004 }, { "epoch": 1.071920428462127, "grad_norm": 2.265886222181865, "learning_rate": 9.322735760871194e-06, "loss": 0.4027, "step": 7005 }, { "epoch": 1.0720734506503442, "grad_norm": 2.2566761587437125, "learning_rate": 9.32026328872465e-06, "loss": 0.4805, "step": 7006 }, { "epoch": 1.0722264728385615, "grad_norm": 2.5470439085229133, "learning_rate": 9.317790858323393e-06, "loss": 0.5402, "step": 7007 }, { "epoch": 1.0723794950267789, "grad_norm": 2.473910127790221, "learning_rate": 9.315318469819275e-06, "loss": 0.4526, "step": 7008 }, { "epoch": 1.0725325172149962, "grad_norm": 2.1728995220377776, "learning_rate": 9.31284612336412e-06, "loss": 0.4184, "step": 7009 }, { "epoch": 1.0726855394032135, "grad_norm": 2.166777588898116, "learning_rate": 9.310373819109779e-06, "loss": 0.4201, "step": 7010 }, { "epoch": 1.0728385615914307, "grad_norm": 2.2110283208174177, "learning_rate": 9.30790155720808e-06, "loss": 0.4783, "step": 7011 }, { "epoch": 1.072991583779648, "grad_norm": 2.2649284810005117, "learning_rate": 9.30542933781085e-06, "loss": 0.3991, "step": 7012 }, { "epoch": 1.0731446059678653, "grad_norm": 2.1904501370858522, "learning_rate": 9.302957161069921e-06, "loss": 0.4015, "step": 7013 }, { "epoch": 1.0732976281560827, "grad_norm": 2.1730718132967017, "learning_rate": 9.300485027137125e-06, "loss": 0.4699, "step": 7014 }, { "epoch": 1.0734506503443, "grad_norm": 2.3653172817867314, "learning_rate": 9.298012936164275e-06, "loss": 0.5384, "step": 7015 }, { "epoch": 1.0736036725325173, "grad_norm": 2.1901146142058394, "learning_rate": 9.295540888303198e-06, "loss": 0.5113, "step": 7016 }, { "epoch": 1.0737566947207344, "grad_norm": 2.384766068642065, "learning_rate": 9.29306888370571e-06, "loss": 0.5185, "step": 7017 }, { "epoch": 1.0739097169089518, "grad_norm": 2.6746763962083167, "learning_rate": 9.29059692252363e-06, "loss": 0.4647, "step": 7018 }, { "epoch": 1.074062739097169, "grad_norm": 2.353980841278368, "learning_rate": 9.288125004908768e-06, "loss": 0.4805, "step": 7019 }, { "epoch": 1.0742157612853864, "grad_norm": 2.311592396149525, "learning_rate": 9.28565313101293e-06, "loss": 0.5211, "step": 7020 }, { "epoch": 1.0743687834736038, "grad_norm": 2.252228528893847, "learning_rate": 9.283181300987934e-06, "loss": 0.4451, "step": 7021 }, { "epoch": 1.0745218056618209, "grad_norm": 2.2982993979133925, "learning_rate": 9.280709514985575e-06, "loss": 0.3907, "step": 7022 }, { "epoch": 1.0746748278500382, "grad_norm": 2.3070546754874295, "learning_rate": 9.278237773157657e-06, "loss": 0.4716, "step": 7023 }, { "epoch": 1.0748278500382555, "grad_norm": 2.6131421219502498, "learning_rate": 9.275766075655986e-06, "loss": 0.5161, "step": 7024 }, { "epoch": 1.0749808722264729, "grad_norm": 2.4042427042020873, "learning_rate": 9.27329442263235e-06, "loss": 0.5227, "step": 7025 }, { "epoch": 1.0751338944146902, "grad_norm": 2.3104549373393652, "learning_rate": 9.270822814238547e-06, "loss": 0.4751, "step": 7026 }, { "epoch": 1.0752869166029075, "grad_norm": 2.1835962666687254, "learning_rate": 9.268351250626368e-06, "loss": 0.5067, "step": 7027 }, { "epoch": 1.0754399387911246, "grad_norm": 2.2162958243208735, "learning_rate": 9.265879731947599e-06, "loss": 0.4971, "step": 7028 }, { "epoch": 1.075592960979342, "grad_norm": 2.342103971102228, "learning_rate": 9.263408258354028e-06, "loss": 0.4492, "step": 7029 }, { "epoch": 1.0757459831675593, "grad_norm": 2.5572314302614387, "learning_rate": 9.260936829997437e-06, "loss": 0.5074, "step": 7030 }, { "epoch": 1.0758990053557766, "grad_norm": 2.2950024298977123, "learning_rate": 9.258465447029609e-06, "loss": 0.3915, "step": 7031 }, { "epoch": 1.076052027543994, "grad_norm": 2.3647579223440163, "learning_rate": 9.255994109602319e-06, "loss": 0.5398, "step": 7032 }, { "epoch": 1.076205049732211, "grad_norm": 2.336276675467057, "learning_rate": 9.253522817867337e-06, "loss": 0.4975, "step": 7033 }, { "epoch": 1.0763580719204284, "grad_norm": 2.569841908595689, "learning_rate": 9.251051571976444e-06, "loss": 0.5007, "step": 7034 }, { "epoch": 1.0765110941086458, "grad_norm": 1.9321188143755093, "learning_rate": 9.248580372081403e-06, "loss": 0.4187, "step": 7035 }, { "epoch": 1.076664116296863, "grad_norm": 1.993423653114654, "learning_rate": 9.246109218333978e-06, "loss": 0.4471, "step": 7036 }, { "epoch": 1.0768171384850804, "grad_norm": 2.8320947879207923, "learning_rate": 9.243638110885938e-06, "loss": 0.3765, "step": 7037 }, { "epoch": 1.0769701606732975, "grad_norm": 2.121544366157612, "learning_rate": 9.241167049889038e-06, "loss": 0.4361, "step": 7038 }, { "epoch": 1.0771231828615149, "grad_norm": 2.1231269443520153, "learning_rate": 9.23869603549504e-06, "loss": 0.4724, "step": 7039 }, { "epoch": 1.0772762050497322, "grad_norm": 2.1396557730209924, "learning_rate": 9.236225067855697e-06, "loss": 0.4575, "step": 7040 }, { "epoch": 1.0774292272379495, "grad_norm": 2.444995460337678, "learning_rate": 9.233754147122759e-06, "loss": 0.5192, "step": 7041 }, { "epoch": 1.0775822494261669, "grad_norm": 2.09131681001774, "learning_rate": 9.231283273447976e-06, "loss": 0.417, "step": 7042 }, { "epoch": 1.077735271614384, "grad_norm": 2.412492987813149, "learning_rate": 9.228812446983098e-06, "loss": 0.4716, "step": 7043 }, { "epoch": 1.0778882938026013, "grad_norm": 2.376792204202723, "learning_rate": 9.22634166787986e-06, "loss": 0.5042, "step": 7044 }, { "epoch": 1.0780413159908186, "grad_norm": 2.258267038660303, "learning_rate": 9.22387093629001e-06, "loss": 0.5356, "step": 7045 }, { "epoch": 1.078194338179036, "grad_norm": 2.140885159688897, "learning_rate": 9.221400252365279e-06, "loss": 0.402, "step": 7046 }, { "epoch": 1.0783473603672533, "grad_norm": 2.299639935119907, "learning_rate": 9.218929616257406e-06, "loss": 0.4874, "step": 7047 }, { "epoch": 1.0785003825554704, "grad_norm": 2.0884411596909005, "learning_rate": 9.216459028118123e-06, "loss": 0.4117, "step": 7048 }, { "epoch": 1.0786534047436878, "grad_norm": 2.3604023546791613, "learning_rate": 9.213988488099152e-06, "loss": 0.4785, "step": 7049 }, { "epoch": 1.078806426931905, "grad_norm": 2.2206333646096197, "learning_rate": 9.211517996352226e-06, "loss": 0.5144, "step": 7050 }, { "epoch": 1.0789594491201224, "grad_norm": 2.4418928939273794, "learning_rate": 9.209047553029065e-06, "loss": 0.5097, "step": 7051 }, { "epoch": 1.0791124713083398, "grad_norm": 2.4200213816818756, "learning_rate": 9.206577158281384e-06, "loss": 0.4596, "step": 7052 }, { "epoch": 1.079265493496557, "grad_norm": 2.1039066805422553, "learning_rate": 9.204106812260908e-06, "loss": 0.5227, "step": 7053 }, { "epoch": 1.0794185156847742, "grad_norm": 2.3042584412347136, "learning_rate": 9.20163651511934e-06, "loss": 0.4305, "step": 7054 }, { "epoch": 1.0795715378729915, "grad_norm": 2.4025169673069118, "learning_rate": 9.199166267008404e-06, "loss": 0.513, "step": 7055 }, { "epoch": 1.0797245600612089, "grad_norm": 2.46461175249102, "learning_rate": 9.196696068079799e-06, "loss": 0.4345, "step": 7056 }, { "epoch": 1.0798775822494262, "grad_norm": 2.0642559537388334, "learning_rate": 9.194225918485226e-06, "loss": 0.3485, "step": 7057 }, { "epoch": 1.0800306044376435, "grad_norm": 2.305597405190117, "learning_rate": 9.191755818376397e-06, "loss": 0.4684, "step": 7058 }, { "epoch": 1.0801836266258606, "grad_norm": 2.153729065281024, "learning_rate": 9.189285767905006e-06, "loss": 0.4609, "step": 7059 }, { "epoch": 1.080336648814078, "grad_norm": 2.151763990460353, "learning_rate": 9.18681576722274e-06, "loss": 0.5002, "step": 7060 }, { "epoch": 1.0804896710022953, "grad_norm": 2.121608247611442, "learning_rate": 9.184345816481307e-06, "loss": 0.6741, "step": 7061 }, { "epoch": 1.0806426931905126, "grad_norm": 2.585835936462805, "learning_rate": 9.181875915832381e-06, "loss": 0.5757, "step": 7062 }, { "epoch": 1.08079571537873, "grad_norm": 2.5048579827782746, "learning_rate": 9.17940606542766e-06, "loss": 0.4583, "step": 7063 }, { "epoch": 1.0809487375669473, "grad_norm": 2.149409121009293, "learning_rate": 9.176936265418825e-06, "loss": 0.5809, "step": 7064 }, { "epoch": 1.0811017597551644, "grad_norm": 2.1026339389941575, "learning_rate": 9.174466515957548e-06, "loss": 0.3908, "step": 7065 }, { "epoch": 1.0812547819433818, "grad_norm": 2.281688039861431, "learning_rate": 9.171996817195516e-06, "loss": 0.4972, "step": 7066 }, { "epoch": 1.081407804131599, "grad_norm": 2.359209844798118, "learning_rate": 9.169527169284395e-06, "loss": 0.5039, "step": 7067 }, { "epoch": 1.0815608263198164, "grad_norm": 2.284727340548648, "learning_rate": 9.167057572375863e-06, "loss": 0.4852, "step": 7068 }, { "epoch": 1.0817138485080338, "grad_norm": 2.4648971458789917, "learning_rate": 9.164588026621582e-06, "loss": 0.4096, "step": 7069 }, { "epoch": 1.0818668706962509, "grad_norm": 2.050205220971181, "learning_rate": 9.162118532173217e-06, "loss": 0.4125, "step": 7070 }, { "epoch": 1.0820198928844682, "grad_norm": 2.0811476169259704, "learning_rate": 9.159649089182436e-06, "loss": 0.4692, "step": 7071 }, { "epoch": 1.0821729150726855, "grad_norm": 2.217536427398098, "learning_rate": 9.157179697800889e-06, "loss": 0.4477, "step": 7072 }, { "epoch": 1.0823259372609029, "grad_norm": 2.542074757986943, "learning_rate": 9.154710358180231e-06, "loss": 0.5461, "step": 7073 }, { "epoch": 1.0824789594491202, "grad_norm": 2.050089241280042, "learning_rate": 9.152241070472122e-06, "loss": 0.4656, "step": 7074 }, { "epoch": 1.0826319816373373, "grad_norm": 2.2914035705247695, "learning_rate": 9.149771834828201e-06, "loss": 0.491, "step": 7075 }, { "epoch": 1.0827850038255546, "grad_norm": 2.243598099910007, "learning_rate": 9.147302651400119e-06, "loss": 0.5017, "step": 7076 }, { "epoch": 1.082938026013772, "grad_norm": 2.6116613995542837, "learning_rate": 9.144833520339518e-06, "loss": 0.5296, "step": 7077 }, { "epoch": 1.0830910482019893, "grad_norm": 2.2604014415370592, "learning_rate": 9.142364441798035e-06, "loss": 0.4793, "step": 7078 }, { "epoch": 1.0832440703902066, "grad_norm": 2.140404673851035, "learning_rate": 9.139895415927307e-06, "loss": 0.3525, "step": 7079 }, { "epoch": 1.0833970925784238, "grad_norm": 2.2760551176385913, "learning_rate": 9.137426442878969e-06, "loss": 0.4887, "step": 7080 }, { "epoch": 1.083550114766641, "grad_norm": 2.3819989085455298, "learning_rate": 9.134957522804641e-06, "loss": 0.4958, "step": 7081 }, { "epoch": 1.0837031369548584, "grad_norm": 2.4328489903808523, "learning_rate": 9.13248865585596e-06, "loss": 0.4582, "step": 7082 }, { "epoch": 1.0838561591430758, "grad_norm": 2.1001441662606575, "learning_rate": 9.130019842184542e-06, "loss": 0.4808, "step": 7083 }, { "epoch": 1.084009181331293, "grad_norm": 2.126839624383373, "learning_rate": 9.127551081942013e-06, "loss": 0.505, "step": 7084 }, { "epoch": 1.0841622035195104, "grad_norm": 2.0661438691181386, "learning_rate": 9.125082375279983e-06, "loss": 0.4676, "step": 7085 }, { "epoch": 1.0843152257077275, "grad_norm": 2.104196112200205, "learning_rate": 9.122613722350064e-06, "loss": 0.383, "step": 7086 }, { "epoch": 1.0844682478959449, "grad_norm": 2.2568665355471746, "learning_rate": 9.120145123303874e-06, "loss": 0.5048, "step": 7087 }, { "epoch": 1.0846212700841622, "grad_norm": 2.2024165587118176, "learning_rate": 9.11767657829301e-06, "loss": 0.4727, "step": 7088 }, { "epoch": 1.0847742922723795, "grad_norm": 2.612232217407369, "learning_rate": 9.11520808746908e-06, "loss": 0.5607, "step": 7089 }, { "epoch": 1.0849273144605969, "grad_norm": 2.1450307502448847, "learning_rate": 9.112739650983685e-06, "loss": 0.5029, "step": 7090 }, { "epoch": 1.085080336648814, "grad_norm": 2.578778549735402, "learning_rate": 9.110271268988415e-06, "loss": 0.5127, "step": 7091 }, { "epoch": 1.0852333588370313, "grad_norm": 2.1489916563880165, "learning_rate": 9.107802941634869e-06, "loss": 0.4452, "step": 7092 }, { "epoch": 1.0853863810252486, "grad_norm": 2.2916701107274147, "learning_rate": 9.105334669074637e-06, "loss": 0.4841, "step": 7093 }, { "epoch": 1.085539403213466, "grad_norm": 2.152474576028727, "learning_rate": 9.102866451459299e-06, "loss": 0.416, "step": 7094 }, { "epoch": 1.0856924254016833, "grad_norm": 1.9391836681067787, "learning_rate": 9.100398288940443e-06, "loss": 0.3911, "step": 7095 }, { "epoch": 1.0858454475899006, "grad_norm": 2.284477748078566, "learning_rate": 9.09793018166965e-06, "loss": 0.4323, "step": 7096 }, { "epoch": 1.0859984697781178, "grad_norm": 2.0810449204826558, "learning_rate": 9.09546212979849e-06, "loss": 0.4461, "step": 7097 }, { "epoch": 1.086151491966335, "grad_norm": 2.102591065248399, "learning_rate": 9.09299413347854e-06, "loss": 0.4236, "step": 7098 }, { "epoch": 1.0863045141545524, "grad_norm": 2.285514466521769, "learning_rate": 9.090526192861367e-06, "loss": 0.4432, "step": 7099 }, { "epoch": 1.0864575363427698, "grad_norm": 2.2247005114609064, "learning_rate": 9.088058308098542e-06, "loss": 0.4757, "step": 7100 }, { "epoch": 1.086610558530987, "grad_norm": 2.379153491410123, "learning_rate": 9.085590479341623e-06, "loss": 0.4765, "step": 7101 }, { "epoch": 1.0867635807192042, "grad_norm": 2.321895207959366, "learning_rate": 9.083122706742166e-06, "loss": 0.5086, "step": 7102 }, { "epoch": 1.0869166029074215, "grad_norm": 2.3449443560981345, "learning_rate": 9.080654990451736e-06, "loss": 0.4743, "step": 7103 }, { "epoch": 1.0870696250956389, "grad_norm": 2.169152507590791, "learning_rate": 9.078187330621876e-06, "loss": 0.4113, "step": 7104 }, { "epoch": 1.0872226472838562, "grad_norm": 2.2823786746054187, "learning_rate": 9.075719727404138e-06, "loss": 0.4769, "step": 7105 }, { "epoch": 1.0873756694720735, "grad_norm": 2.2438777723684966, "learning_rate": 9.073252180950072e-06, "loss": 0.4935, "step": 7106 }, { "epoch": 1.0875286916602906, "grad_norm": 2.191000328520221, "learning_rate": 9.070784691411208e-06, "loss": 0.4083, "step": 7107 }, { "epoch": 1.087681713848508, "grad_norm": 2.2185239900801275, "learning_rate": 9.068317258939096e-06, "loss": 0.4524, "step": 7108 }, { "epoch": 1.0878347360367253, "grad_norm": 2.469228148557464, "learning_rate": 9.065849883685265e-06, "loss": 0.4708, "step": 7109 }, { "epoch": 1.0879877582249426, "grad_norm": 2.148317257106404, "learning_rate": 9.063382565801247e-06, "loss": 0.4117, "step": 7110 }, { "epoch": 1.08814078041316, "grad_norm": 2.46960126434831, "learning_rate": 9.060915305438567e-06, "loss": 0.5797, "step": 7111 }, { "epoch": 1.088293802601377, "grad_norm": 2.5654903458195295, "learning_rate": 9.058448102748753e-06, "loss": 0.5204, "step": 7112 }, { "epoch": 1.0884468247895944, "grad_norm": 2.5226254011755085, "learning_rate": 9.055980957883329e-06, "loss": 0.5013, "step": 7113 }, { "epoch": 1.0885998469778118, "grad_norm": 2.444152682210665, "learning_rate": 9.053513870993803e-06, "loss": 0.4974, "step": 7114 }, { "epoch": 1.088752869166029, "grad_norm": 2.1433965549708223, "learning_rate": 9.051046842231693e-06, "loss": 0.3761, "step": 7115 }, { "epoch": 1.0889058913542464, "grad_norm": 2.2244339091179848, "learning_rate": 9.04857987174851e-06, "loss": 0.4533, "step": 7116 }, { "epoch": 1.0890589135424638, "grad_norm": 2.3900548171797342, "learning_rate": 9.04611295969576e-06, "loss": 0.4394, "step": 7117 }, { "epoch": 1.0892119357306809, "grad_norm": 2.477441551752149, "learning_rate": 9.043646106224942e-06, "loss": 0.4756, "step": 7118 }, { "epoch": 1.0893649579188982, "grad_norm": 2.2301190063141707, "learning_rate": 9.04117931148756e-06, "loss": 0.4872, "step": 7119 }, { "epoch": 1.0895179801071155, "grad_norm": 2.2765796549488373, "learning_rate": 9.038712575635105e-06, "loss": 0.5097, "step": 7120 }, { "epoch": 1.0896710022953329, "grad_norm": 2.516283088910062, "learning_rate": 9.036245898819074e-06, "loss": 0.4335, "step": 7121 }, { "epoch": 1.0898240244835502, "grad_norm": 2.279848242904234, "learning_rate": 9.033779281190953e-06, "loss": 0.5019, "step": 7122 }, { "epoch": 1.0899770466717673, "grad_norm": 2.1190201556445114, "learning_rate": 9.031312722902223e-06, "loss": 0.4462, "step": 7123 }, { "epoch": 1.0901300688599846, "grad_norm": 2.3137880793300463, "learning_rate": 9.028846224104369e-06, "loss": 0.4546, "step": 7124 }, { "epoch": 1.090283091048202, "grad_norm": 2.1680262820024723, "learning_rate": 9.026379784948873e-06, "loss": 0.4073, "step": 7125 }, { "epoch": 1.0904361132364193, "grad_norm": 2.2731966671711166, "learning_rate": 9.023913405587196e-06, "loss": 0.4626, "step": 7126 }, { "epoch": 1.0905891354246366, "grad_norm": 2.066266313161121, "learning_rate": 9.021447086170818e-06, "loss": 0.4734, "step": 7127 }, { "epoch": 1.090742157612854, "grad_norm": 1.9706951129586714, "learning_rate": 9.0189808268512e-06, "loss": 0.4781, "step": 7128 }, { "epoch": 1.090895179801071, "grad_norm": 2.141784621387068, "learning_rate": 9.016514627779811e-06, "loss": 0.484, "step": 7129 }, { "epoch": 1.0910482019892884, "grad_norm": 2.63631118781373, "learning_rate": 9.014048489108106e-06, "loss": 0.4505, "step": 7130 }, { "epoch": 1.0912012241775058, "grad_norm": 2.1987115618847795, "learning_rate": 9.011582410987538e-06, "loss": 0.4639, "step": 7131 }, { "epoch": 1.091354246365723, "grad_norm": 2.134264485035292, "learning_rate": 9.009116393569563e-06, "loss": 0.4645, "step": 7132 }, { "epoch": 1.0915072685539404, "grad_norm": 2.0750373860179785, "learning_rate": 9.006650437005627e-06, "loss": 0.4651, "step": 7133 }, { "epoch": 1.0916602907421575, "grad_norm": 2.2855081732746036, "learning_rate": 9.004184541447169e-06, "loss": 0.4995, "step": 7134 }, { "epoch": 1.0918133129303749, "grad_norm": 2.4098402902812914, "learning_rate": 9.00171870704564e-06, "loss": 0.4734, "step": 7135 }, { "epoch": 1.0919663351185922, "grad_norm": 2.032376714257287, "learning_rate": 8.999252933952465e-06, "loss": 0.3866, "step": 7136 }, { "epoch": 1.0921193573068095, "grad_norm": 2.15493218358809, "learning_rate": 8.996787222319087e-06, "loss": 0.5154, "step": 7137 }, { "epoch": 1.0922723794950269, "grad_norm": 2.0858765860099173, "learning_rate": 8.99432157229693e-06, "loss": 0.3879, "step": 7138 }, { "epoch": 1.092425401683244, "grad_norm": 2.5591535079462098, "learning_rate": 8.991855984037414e-06, "loss": 0.4634, "step": 7139 }, { "epoch": 1.0925784238714613, "grad_norm": 2.4454327491285417, "learning_rate": 8.989390457691973e-06, "loss": 0.4858, "step": 7140 }, { "epoch": 1.0927314460596786, "grad_norm": 1.8496428093473942, "learning_rate": 8.986924993412012e-06, "loss": 0.4182, "step": 7141 }, { "epoch": 1.092884468247896, "grad_norm": 1.9629732376566862, "learning_rate": 8.984459591348952e-06, "loss": 0.414, "step": 7142 }, { "epoch": 1.0930374904361133, "grad_norm": 2.382561085581707, "learning_rate": 8.981994251654205e-06, "loss": 0.4709, "step": 7143 }, { "epoch": 1.0931905126243304, "grad_norm": 2.076307911023831, "learning_rate": 8.979528974479169e-06, "loss": 0.4202, "step": 7144 }, { "epoch": 1.0933435348125478, "grad_norm": 2.1286263317947687, "learning_rate": 8.977063759975251e-06, "loss": 0.4613, "step": 7145 }, { "epoch": 1.093496557000765, "grad_norm": 2.4719234461371986, "learning_rate": 8.974598608293853e-06, "loss": 0.4872, "step": 7146 }, { "epoch": 1.0936495791889824, "grad_norm": 1.9484181799070321, "learning_rate": 8.972133519586361e-06, "loss": 0.349, "step": 7147 }, { "epoch": 1.0938026013771998, "grad_norm": 2.149297644516257, "learning_rate": 8.969668494004173e-06, "loss": 0.4462, "step": 7148 }, { "epoch": 1.0939556235654169, "grad_norm": 2.3154019028958643, "learning_rate": 8.967203531698672e-06, "loss": 0.4999, "step": 7149 }, { "epoch": 1.0941086457536342, "grad_norm": 2.345475674509572, "learning_rate": 8.964738632821245e-06, "loss": 0.4747, "step": 7150 }, { "epoch": 1.0942616679418515, "grad_norm": 2.028434119282612, "learning_rate": 8.962273797523268e-06, "loss": 0.3636, "step": 7151 }, { "epoch": 1.0944146901300689, "grad_norm": 2.5352442491743523, "learning_rate": 8.959809025956113e-06, "loss": 0.4937, "step": 7152 }, { "epoch": 1.0945677123182862, "grad_norm": 2.0639018071881408, "learning_rate": 8.957344318271161e-06, "loss": 0.3728, "step": 7153 }, { "epoch": 1.0947207345065035, "grad_norm": 2.3340432211508357, "learning_rate": 8.954879674619769e-06, "loss": 0.4858, "step": 7154 }, { "epoch": 1.0948737566947206, "grad_norm": 2.0559018209376685, "learning_rate": 8.952415095153305e-06, "loss": 0.4086, "step": 7155 }, { "epoch": 1.095026778882938, "grad_norm": 2.316442921819214, "learning_rate": 8.949950580023131e-06, "loss": 0.4435, "step": 7156 }, { "epoch": 1.0951798010711553, "grad_norm": 1.9374221563323648, "learning_rate": 8.947486129380597e-06, "loss": 0.4009, "step": 7157 }, { "epoch": 1.0953328232593726, "grad_norm": 2.030063875991149, "learning_rate": 8.945021743377057e-06, "loss": 0.3603, "step": 7158 }, { "epoch": 1.09548584544759, "grad_norm": 2.1826213973706667, "learning_rate": 8.942557422163864e-06, "loss": 0.4447, "step": 7159 }, { "epoch": 1.095638867635807, "grad_norm": 2.252983268260625, "learning_rate": 8.94009316589235e-06, "loss": 0.4963, "step": 7160 }, { "epoch": 1.0957918898240244, "grad_norm": 2.6669262080305516, "learning_rate": 8.937628974713864e-06, "loss": 0.5089, "step": 7161 }, { "epoch": 1.0959449120122418, "grad_norm": 2.12773257212889, "learning_rate": 8.93516484877974e-06, "loss": 0.4969, "step": 7162 }, { "epoch": 1.096097934200459, "grad_norm": 2.3072830017793313, "learning_rate": 8.932700788241304e-06, "loss": 0.479, "step": 7163 }, { "epoch": 1.0962509563886764, "grad_norm": 2.2042128737534004, "learning_rate": 8.93023679324989e-06, "loss": 0.4839, "step": 7164 }, { "epoch": 1.0964039785768938, "grad_norm": 2.793680246758663, "learning_rate": 8.92777286395682e-06, "loss": 0.4981, "step": 7165 }, { "epoch": 1.0965570007651109, "grad_norm": 2.225104256306868, "learning_rate": 8.925309000513414e-06, "loss": 0.4377, "step": 7166 }, { "epoch": 1.0967100229533282, "grad_norm": 1.9745454245625953, "learning_rate": 8.922845203070984e-06, "loss": 0.4017, "step": 7167 }, { "epoch": 1.0968630451415455, "grad_norm": 2.368420047885716, "learning_rate": 8.920381471780843e-06, "loss": 0.5215, "step": 7168 }, { "epoch": 1.0970160673297629, "grad_norm": 2.051795870663101, "learning_rate": 8.917917806794305e-06, "loss": 0.3828, "step": 7169 }, { "epoch": 1.0971690895179802, "grad_norm": 2.123677254807295, "learning_rate": 8.915454208262664e-06, "loss": 0.4633, "step": 7170 }, { "epoch": 1.0973221117061973, "grad_norm": 2.2627608124003604, "learning_rate": 8.912990676337221e-06, "loss": 0.4726, "step": 7171 }, { "epoch": 1.0974751338944146, "grad_norm": 2.103757846488755, "learning_rate": 8.910527211169277e-06, "loss": 0.4227, "step": 7172 }, { "epoch": 1.097628156082632, "grad_norm": 2.2143920295035544, "learning_rate": 8.908063812910116e-06, "loss": 0.4625, "step": 7173 }, { "epoch": 1.0977811782708493, "grad_norm": 2.100586428663275, "learning_rate": 8.905600481711027e-06, "loss": 0.4364, "step": 7174 }, { "epoch": 1.0979342004590666, "grad_norm": 2.058446058812767, "learning_rate": 8.9031372177233e-06, "loss": 0.4965, "step": 7175 }, { "epoch": 1.0980872226472838, "grad_norm": 2.588950853621863, "learning_rate": 8.9006740210982e-06, "loss": 0.3974, "step": 7176 }, { "epoch": 1.098240244835501, "grad_norm": 2.1324434817566065, "learning_rate": 8.898210891987012e-06, "loss": 0.5475, "step": 7177 }, { "epoch": 1.0983932670237184, "grad_norm": 2.0296371639639674, "learning_rate": 8.895747830541001e-06, "loss": 0.4481, "step": 7178 }, { "epoch": 1.0985462892119358, "grad_norm": 2.0897999254760973, "learning_rate": 8.89328483691144e-06, "loss": 0.4616, "step": 7179 }, { "epoch": 1.098699311400153, "grad_norm": 2.14171265984956, "learning_rate": 8.890821911249585e-06, "loss": 0.4869, "step": 7180 }, { "epoch": 1.0988523335883702, "grad_norm": 2.1623838358382805, "learning_rate": 8.888359053706694e-06, "loss": 0.4638, "step": 7181 }, { "epoch": 1.0990053557765875, "grad_norm": 1.956343477779932, "learning_rate": 8.885896264434025e-06, "loss": 0.3838, "step": 7182 }, { "epoch": 1.0991583779648049, "grad_norm": 2.3400499233335768, "learning_rate": 8.883433543582824e-06, "loss": 0.4655, "step": 7183 }, { "epoch": 1.0993114001530222, "grad_norm": 2.399181740585056, "learning_rate": 8.880970891304334e-06, "loss": 0.4578, "step": 7184 }, { "epoch": 1.0994644223412395, "grad_norm": 2.250008234169996, "learning_rate": 8.878508307749804e-06, "loss": 0.428, "step": 7185 }, { "epoch": 1.0996174445294569, "grad_norm": 2.290696083811117, "learning_rate": 8.87604579307046e-06, "loss": 0.4269, "step": 7186 }, { "epoch": 1.099770466717674, "grad_norm": 2.505122870008909, "learning_rate": 8.873583347417546e-06, "loss": 0.5127, "step": 7187 }, { "epoch": 1.0999234889058913, "grad_norm": 2.353030872555066, "learning_rate": 8.871120970942285e-06, "loss": 0.4578, "step": 7188 }, { "epoch": 1.1000765110941086, "grad_norm": 2.301167013530359, "learning_rate": 8.868658663795898e-06, "loss": 0.3812, "step": 7189 }, { "epoch": 1.100229533282326, "grad_norm": 2.2533182412177513, "learning_rate": 8.866196426129612e-06, "loss": 0.427, "step": 7190 }, { "epoch": 1.1003825554705433, "grad_norm": 2.4637131571486406, "learning_rate": 8.863734258094638e-06, "loss": 0.4376, "step": 7191 }, { "epoch": 1.1005355776587604, "grad_norm": 2.0197232101443223, "learning_rate": 8.861272159842186e-06, "loss": 0.4344, "step": 7192 }, { "epoch": 1.1006885998469778, "grad_norm": 2.251483371734288, "learning_rate": 8.858810131523467e-06, "loss": 0.4298, "step": 7193 }, { "epoch": 1.100841622035195, "grad_norm": 2.287008056541385, "learning_rate": 8.85634817328968e-06, "loss": 0.4718, "step": 7194 }, { "epoch": 1.1009946442234124, "grad_norm": 2.209324158741729, "learning_rate": 8.85388628529203e-06, "loss": 0.4657, "step": 7195 }, { "epoch": 1.1011476664116298, "grad_norm": 2.1505467139385304, "learning_rate": 8.851424467681705e-06, "loss": 0.4322, "step": 7196 }, { "epoch": 1.101300688599847, "grad_norm": 2.1982904432190984, "learning_rate": 8.848962720609894e-06, "loss": 0.4675, "step": 7197 }, { "epoch": 1.1014537107880642, "grad_norm": 2.1486253183804767, "learning_rate": 8.84650104422779e-06, "loss": 0.4145, "step": 7198 }, { "epoch": 1.1016067329762815, "grad_norm": 2.0660350104739864, "learning_rate": 8.844039438686566e-06, "loss": 0.4289, "step": 7199 }, { "epoch": 1.1017597551644989, "grad_norm": 2.2958785347357793, "learning_rate": 8.841577904137402e-06, "loss": 0.4849, "step": 7200 }, { "epoch": 1.1019127773527162, "grad_norm": 2.3249009204416216, "learning_rate": 8.839116440731475e-06, "loss": 0.4445, "step": 7201 }, { "epoch": 1.1020657995409335, "grad_norm": 2.2802870963931188, "learning_rate": 8.836655048619943e-06, "loss": 0.4219, "step": 7202 }, { "epoch": 1.1022188217291506, "grad_norm": 2.1524504956749304, "learning_rate": 8.834193727953977e-06, "loss": 0.4425, "step": 7203 }, { "epoch": 1.102371843917368, "grad_norm": 2.3059346698587024, "learning_rate": 8.831732478884739e-06, "loss": 0.481, "step": 7204 }, { "epoch": 1.1025248661055853, "grad_norm": 2.017182531907496, "learning_rate": 8.829271301563375e-06, "loss": 0.3548, "step": 7205 }, { "epoch": 1.1026778882938026, "grad_norm": 2.1409796994991286, "learning_rate": 8.826810196141042e-06, "loss": 0.5375, "step": 7206 }, { "epoch": 1.10283091048202, "grad_norm": 2.1409288991943924, "learning_rate": 8.824349162768882e-06, "loss": 0.4341, "step": 7207 }, { "epoch": 1.102983932670237, "grad_norm": 2.198060299539323, "learning_rate": 8.821888201598044e-06, "loss": 0.4512, "step": 7208 }, { "epoch": 1.1031369548584544, "grad_norm": 2.262625789778196, "learning_rate": 8.819427312779658e-06, "loss": 0.4792, "step": 7209 }, { "epoch": 1.1032899770466718, "grad_norm": 2.2566093525445594, "learning_rate": 8.816966496464858e-06, "loss": 0.4995, "step": 7210 }, { "epoch": 1.103442999234889, "grad_norm": 2.4283834426361994, "learning_rate": 8.814505752804776e-06, "loss": 0.4806, "step": 7211 }, { "epoch": 1.1035960214231064, "grad_norm": 2.170904697292499, "learning_rate": 8.812045081950532e-06, "loss": 0.4919, "step": 7212 }, { "epoch": 1.1037490436113235, "grad_norm": 1.9863702432643318, "learning_rate": 8.809584484053245e-06, "loss": 0.375, "step": 7213 }, { "epoch": 1.1039020657995409, "grad_norm": 2.5916602921862286, "learning_rate": 8.807123959264036e-06, "loss": 0.4797, "step": 7214 }, { "epoch": 1.1040550879877582, "grad_norm": 2.434544800191652, "learning_rate": 8.804663507734004e-06, "loss": 0.3995, "step": 7215 }, { "epoch": 1.1042081101759755, "grad_norm": 2.499246869230129, "learning_rate": 8.802203129614269e-06, "loss": 0.5353, "step": 7216 }, { "epoch": 1.1043611323641929, "grad_norm": 2.0781080712876117, "learning_rate": 8.799742825055927e-06, "loss": 0.38, "step": 7217 }, { "epoch": 1.1045141545524102, "grad_norm": 2.394442587470399, "learning_rate": 8.797282594210066e-06, "loss": 0.4759, "step": 7218 }, { "epoch": 1.1046671767406273, "grad_norm": 2.51859505988219, "learning_rate": 8.79482243722779e-06, "loss": 0.4989, "step": 7219 }, { "epoch": 1.1048201989288446, "grad_norm": 2.075859944947023, "learning_rate": 8.792362354260185e-06, "loss": 0.4434, "step": 7220 }, { "epoch": 1.104973221117062, "grad_norm": 2.4451130046615592, "learning_rate": 8.789902345458326e-06, "loss": 0.4663, "step": 7221 }, { "epoch": 1.1051262433052793, "grad_norm": 2.3113134861788036, "learning_rate": 8.787442410973305e-06, "loss": 0.4555, "step": 7222 }, { "epoch": 1.1052792654934966, "grad_norm": 2.1639552175738856, "learning_rate": 8.784982550956184e-06, "loss": 0.424, "step": 7223 }, { "epoch": 1.1054322876817138, "grad_norm": 1.9803878884912123, "learning_rate": 8.782522765558039e-06, "loss": 0.4537, "step": 7224 }, { "epoch": 1.105585309869931, "grad_norm": 2.2787793998286823, "learning_rate": 8.780063054929937e-06, "loss": 0.4371, "step": 7225 }, { "epoch": 1.1057383320581484, "grad_norm": 2.3573504102686713, "learning_rate": 8.77760341922293e-06, "loss": 0.5008, "step": 7226 }, { "epoch": 1.1058913542463658, "grad_norm": 2.1996380165093177, "learning_rate": 8.775143858588082e-06, "loss": 0.4169, "step": 7227 }, { "epoch": 1.106044376434583, "grad_norm": 2.616789474953216, "learning_rate": 8.772684373176443e-06, "loss": 0.4648, "step": 7228 }, { "epoch": 1.1061973986228004, "grad_norm": 2.128350016291785, "learning_rate": 8.770224963139054e-06, "loss": 0.4217, "step": 7229 }, { "epoch": 1.1063504208110175, "grad_norm": 2.579626546907676, "learning_rate": 8.767765628626962e-06, "loss": 0.5056, "step": 7230 }, { "epoch": 1.1065034429992349, "grad_norm": 2.2513999128668636, "learning_rate": 8.765306369791204e-06, "loss": 0.4425, "step": 7231 }, { "epoch": 1.1066564651874522, "grad_norm": 2.442135717799566, "learning_rate": 8.762847186782813e-06, "loss": 0.506, "step": 7232 }, { "epoch": 1.1068094873756695, "grad_norm": 2.090965212912735, "learning_rate": 8.760388079752815e-06, "loss": 0.4475, "step": 7233 }, { "epoch": 1.1069625095638869, "grad_norm": 2.2371641896853802, "learning_rate": 8.757929048852233e-06, "loss": 0.484, "step": 7234 }, { "epoch": 1.107115531752104, "grad_norm": 2.3009833508605007, "learning_rate": 8.75547009423209e-06, "loss": 0.3488, "step": 7235 }, { "epoch": 1.1072685539403213, "grad_norm": 2.1163473200298957, "learning_rate": 8.753011216043396e-06, "loss": 0.3902, "step": 7236 }, { "epoch": 1.1074215761285386, "grad_norm": 2.3626068037558685, "learning_rate": 8.75055241443716e-06, "loss": 0.4614, "step": 7237 }, { "epoch": 1.107574598316756, "grad_norm": 1.8373619367737042, "learning_rate": 8.748093689564392e-06, "loss": 0.3726, "step": 7238 }, { "epoch": 1.1077276205049733, "grad_norm": 1.9346840039209652, "learning_rate": 8.745635041576082e-06, "loss": 0.3809, "step": 7239 }, { "epoch": 1.1078806426931904, "grad_norm": 2.1799202758607312, "learning_rate": 8.743176470623236e-06, "loss": 0.4697, "step": 7240 }, { "epoch": 1.1080336648814078, "grad_norm": 2.351668788150053, "learning_rate": 8.74071797685684e-06, "loss": 0.3959, "step": 7241 }, { "epoch": 1.108186687069625, "grad_norm": 2.0925064474546686, "learning_rate": 8.738259560427876e-06, "loss": 0.41, "step": 7242 }, { "epoch": 1.1083397092578424, "grad_norm": 2.260684681391703, "learning_rate": 8.73580122148733e-06, "loss": 0.4818, "step": 7243 }, { "epoch": 1.1084927314460598, "grad_norm": 2.1853032894152995, "learning_rate": 8.733342960186174e-06, "loss": 0.4506, "step": 7244 }, { "epoch": 1.1086457536342769, "grad_norm": 1.9906423609607566, "learning_rate": 8.730884776675386e-06, "loss": 0.3836, "step": 7245 }, { "epoch": 1.1087987758224942, "grad_norm": 1.9829231974861674, "learning_rate": 8.728426671105929e-06, "loss": 0.497, "step": 7246 }, { "epoch": 1.1089517980107115, "grad_norm": 2.0781493892344316, "learning_rate": 8.72596864362876e-06, "loss": 0.4439, "step": 7247 }, { "epoch": 1.1091048201989289, "grad_norm": 2.380914255234149, "learning_rate": 8.723510694394845e-06, "loss": 0.4626, "step": 7248 }, { "epoch": 1.1092578423871462, "grad_norm": 2.430407170746528, "learning_rate": 8.721052823555131e-06, "loss": 0.4854, "step": 7249 }, { "epoch": 1.1094108645753635, "grad_norm": 2.4319507114869667, "learning_rate": 8.718595031260564e-06, "loss": 0.3742, "step": 7250 }, { "epoch": 1.1095638867635806, "grad_norm": 2.093265509251606, "learning_rate": 8.716137317662093e-06, "loss": 0.388, "step": 7251 }, { "epoch": 1.109716908951798, "grad_norm": 2.4559992923329106, "learning_rate": 8.713679682910648e-06, "loss": 0.4244, "step": 7252 }, { "epoch": 1.1098699311400153, "grad_norm": 2.1829029164513982, "learning_rate": 8.711222127157166e-06, "loss": 0.3357, "step": 7253 }, { "epoch": 1.1100229533282326, "grad_norm": 2.3403552031528903, "learning_rate": 8.70876465055258e-06, "loss": 0.4064, "step": 7254 }, { "epoch": 1.11017597551645, "grad_norm": 1.989255895084661, "learning_rate": 8.7063072532478e-06, "loss": 0.3819, "step": 7255 }, { "epoch": 1.110328997704667, "grad_norm": 2.184660067497534, "learning_rate": 8.703849935393758e-06, "loss": 0.4368, "step": 7256 }, { "epoch": 1.1104820198928844, "grad_norm": 2.349449352310267, "learning_rate": 8.701392697141363e-06, "loss": 0.4845, "step": 7257 }, { "epoch": 1.1106350420811018, "grad_norm": 2.198702813821092, "learning_rate": 8.698935538641519e-06, "loss": 0.4256, "step": 7258 }, { "epoch": 1.110788064269319, "grad_norm": 2.342120943064589, "learning_rate": 8.696478460045134e-06, "loss": 0.451, "step": 7259 }, { "epoch": 1.1109410864575364, "grad_norm": 2.187839864774295, "learning_rate": 8.694021461503105e-06, "loss": 0.4097, "step": 7260 }, { "epoch": 1.1110941086457535, "grad_norm": 2.165677195044475, "learning_rate": 8.691564543166332e-06, "loss": 0.4467, "step": 7261 }, { "epoch": 1.1112471308339709, "grad_norm": 2.278960948831545, "learning_rate": 8.689107705185697e-06, "loss": 0.4216, "step": 7262 }, { "epoch": 1.1114001530221882, "grad_norm": 2.1925602427988085, "learning_rate": 8.686650947712084e-06, "loss": 0.4818, "step": 7263 }, { "epoch": 1.1115531752104055, "grad_norm": 2.4280978672138365, "learning_rate": 8.684194270896376e-06, "loss": 0.4793, "step": 7264 }, { "epoch": 1.1117061973986229, "grad_norm": 2.413291630851117, "learning_rate": 8.681737674889445e-06, "loss": 0.436, "step": 7265 }, { "epoch": 1.1118592195868402, "grad_norm": 2.182619190908532, "learning_rate": 8.679281159842157e-06, "loss": 0.4091, "step": 7266 }, { "epoch": 1.1120122417750573, "grad_norm": 2.352484482668082, "learning_rate": 8.676824725905386e-06, "loss": 0.4431, "step": 7267 }, { "epoch": 1.1121652639632746, "grad_norm": 2.104077049629743, "learning_rate": 8.67436837322998e-06, "loss": 0.4664, "step": 7268 }, { "epoch": 1.112318286151492, "grad_norm": 1.9325614114153729, "learning_rate": 8.671912101966799e-06, "loss": 0.4152, "step": 7269 }, { "epoch": 1.1124713083397093, "grad_norm": 2.2218232070237933, "learning_rate": 8.669455912266696e-06, "loss": 0.4681, "step": 7270 }, { "epoch": 1.1126243305279266, "grad_norm": 2.3511511595238925, "learning_rate": 8.666999804280503e-06, "loss": 0.4319, "step": 7271 }, { "epoch": 1.1127773527161438, "grad_norm": 2.5154012800065515, "learning_rate": 8.664543778159071e-06, "loss": 0.5433, "step": 7272 }, { "epoch": 1.112930374904361, "grad_norm": 2.094015164774007, "learning_rate": 8.662087834053232e-06, "loss": 0.4057, "step": 7273 }, { "epoch": 1.1130833970925784, "grad_norm": 2.407236738502629, "learning_rate": 8.659631972113809e-06, "loss": 0.4519, "step": 7274 }, { "epoch": 1.1132364192807958, "grad_norm": 2.3070387985152503, "learning_rate": 8.65717619249163e-06, "loss": 0.4029, "step": 7275 }, { "epoch": 1.113389441469013, "grad_norm": 2.2050815747283057, "learning_rate": 8.654720495337514e-06, "loss": 0.4293, "step": 7276 }, { "epoch": 1.1135424636572302, "grad_norm": 1.9874516443915964, "learning_rate": 8.652264880802279e-06, "loss": 0.3515, "step": 7277 }, { "epoch": 1.1136954858454475, "grad_norm": 2.283468114473653, "learning_rate": 8.649809349036729e-06, "loss": 0.5043, "step": 7278 }, { "epoch": 1.1138485080336649, "grad_norm": 2.228147809122041, "learning_rate": 8.647353900191665e-06, "loss": 0.4794, "step": 7279 }, { "epoch": 1.1140015302218822, "grad_norm": 2.181698558558112, "learning_rate": 8.644898534417893e-06, "loss": 0.4362, "step": 7280 }, { "epoch": 1.1141545524100995, "grad_norm": 2.7327555296956514, "learning_rate": 8.642443251866202e-06, "loss": 0.5515, "step": 7281 }, { "epoch": 1.1143075745983166, "grad_norm": 2.172183101436314, "learning_rate": 8.639988052687383e-06, "loss": 0.5035, "step": 7282 }, { "epoch": 1.114460596786534, "grad_norm": 2.3012502821953564, "learning_rate": 8.637532937032218e-06, "loss": 0.42, "step": 7283 }, { "epoch": 1.1146136189747513, "grad_norm": 2.3750682620962333, "learning_rate": 8.635077905051484e-06, "loss": 0.4683, "step": 7284 }, { "epoch": 1.1147666411629686, "grad_norm": 2.295739800025103, "learning_rate": 8.632622956895956e-06, "loss": 0.4134, "step": 7285 }, { "epoch": 1.114919663351186, "grad_norm": 2.5572348746935862, "learning_rate": 8.630168092716403e-06, "loss": 0.4558, "step": 7286 }, { "epoch": 1.1150726855394033, "grad_norm": 2.2569408220861042, "learning_rate": 8.627713312663582e-06, "loss": 0.4033, "step": 7287 }, { "epoch": 1.1152257077276204, "grad_norm": 2.270829840814031, "learning_rate": 8.625258616888258e-06, "loss": 0.5059, "step": 7288 }, { "epoch": 1.1153787299158378, "grad_norm": 2.402699754110219, "learning_rate": 8.622804005541179e-06, "loss": 0.462, "step": 7289 }, { "epoch": 1.115531752104055, "grad_norm": 2.4722485610391782, "learning_rate": 8.620349478773095e-06, "loss": 0.4529, "step": 7290 }, { "epoch": 1.1156847742922724, "grad_norm": 2.1893417797352344, "learning_rate": 8.617895036734747e-06, "loss": 0.4421, "step": 7291 }, { "epoch": 1.1158377964804898, "grad_norm": 2.1580635465283633, "learning_rate": 8.61544067957687e-06, "loss": 0.3836, "step": 7292 }, { "epoch": 1.1159908186687069, "grad_norm": 2.4058476302646374, "learning_rate": 8.6129864074502e-06, "loss": 0.4373, "step": 7293 }, { "epoch": 1.1161438408569242, "grad_norm": 2.3222777156880037, "learning_rate": 8.610532220505459e-06, "loss": 0.5089, "step": 7294 }, { "epoch": 1.1162968630451415, "grad_norm": 2.1307415290698852, "learning_rate": 8.608078118893368e-06, "loss": 0.5199, "step": 7295 }, { "epoch": 1.1164498852333589, "grad_norm": 2.383794896617956, "learning_rate": 8.60562410276465e-06, "loss": 0.4593, "step": 7296 }, { "epoch": 1.1166029074215762, "grad_norm": 2.5837278205838663, "learning_rate": 8.603170172270002e-06, "loss": 0.4638, "step": 7297 }, { "epoch": 1.1167559296097935, "grad_norm": 2.2525760948688944, "learning_rate": 8.600716327560146e-06, "loss": 0.4474, "step": 7298 }, { "epoch": 1.1169089517980106, "grad_norm": 2.615571933223899, "learning_rate": 8.598262568785775e-06, "loss": 0.4843, "step": 7299 }, { "epoch": 1.117061973986228, "grad_norm": 2.439123207803759, "learning_rate": 8.595808896097575e-06, "loss": 0.4902, "step": 7300 }, { "epoch": 1.1172149961744453, "grad_norm": 2.2731590292985677, "learning_rate": 8.593355309646252e-06, "loss": 0.514, "step": 7301 }, { "epoch": 1.1173680183626626, "grad_norm": 2.2254431206595773, "learning_rate": 8.590901809582482e-06, "loss": 0.4406, "step": 7302 }, { "epoch": 1.11752104055088, "grad_norm": 2.1556714339537226, "learning_rate": 8.588448396056938e-06, "loss": 0.4179, "step": 7303 }, { "epoch": 1.117674062739097, "grad_norm": 2.2555805850430817, "learning_rate": 8.585995069220305e-06, "loss": 0.4607, "step": 7304 }, { "epoch": 1.1178270849273144, "grad_norm": 2.095833604865627, "learning_rate": 8.583541829223243e-06, "loss": 0.5086, "step": 7305 }, { "epoch": 1.1179801071155318, "grad_norm": 2.3270010835496397, "learning_rate": 8.581088676216421e-06, "loss": 0.4744, "step": 7306 }, { "epoch": 1.118133129303749, "grad_norm": 2.491796490650458, "learning_rate": 8.578635610350497e-06, "loss": 0.4065, "step": 7307 }, { "epoch": 1.1182861514919664, "grad_norm": 2.182575815083912, "learning_rate": 8.576182631776115e-06, "loss": 0.4834, "step": 7308 }, { "epoch": 1.1184391736801835, "grad_norm": 2.173635352835729, "learning_rate": 8.57372974064393e-06, "loss": 0.4341, "step": 7309 }, { "epoch": 1.1185921958684009, "grad_norm": 2.3043141043629274, "learning_rate": 8.571276937104584e-06, "loss": 0.464, "step": 7310 }, { "epoch": 1.1187452180566182, "grad_norm": 2.316805699809123, "learning_rate": 8.568824221308705e-06, "loss": 0.4992, "step": 7311 }, { "epoch": 1.1188982402448355, "grad_norm": 2.100553392834741, "learning_rate": 8.566371593406933e-06, "loss": 0.447, "step": 7312 }, { "epoch": 1.1190512624330529, "grad_norm": 2.3347884127487055, "learning_rate": 8.563919053549887e-06, "loss": 0.4517, "step": 7313 }, { "epoch": 1.11920428462127, "grad_norm": 2.011899264289727, "learning_rate": 8.561466601888195e-06, "loss": 0.381, "step": 7314 }, { "epoch": 1.1193573068094873, "grad_norm": 2.264732519715413, "learning_rate": 8.559014238572463e-06, "loss": 0.4625, "step": 7315 }, { "epoch": 1.1195103289977046, "grad_norm": 2.1982141461822944, "learning_rate": 8.556561963753303e-06, "loss": 0.433, "step": 7316 }, { "epoch": 1.119663351185922, "grad_norm": 2.2287969140588038, "learning_rate": 8.554109777581322e-06, "loss": 0.4322, "step": 7317 }, { "epoch": 1.1198163733741393, "grad_norm": 2.0805687752430218, "learning_rate": 8.551657680207114e-06, "loss": 0.4368, "step": 7318 }, { "epoch": 1.1199693955623566, "grad_norm": 2.167889830590893, "learning_rate": 8.549205671781273e-06, "loss": 0.6545, "step": 7319 }, { "epoch": 1.1201224177505738, "grad_norm": 2.528070985327336, "learning_rate": 8.54675375245439e-06, "loss": 0.5258, "step": 7320 }, { "epoch": 1.120275439938791, "grad_norm": 1.787445525421544, "learning_rate": 8.544301922377043e-06, "loss": 0.3962, "step": 7321 }, { "epoch": 1.1204284621270084, "grad_norm": 2.244017891063301, "learning_rate": 8.54185018169981e-06, "loss": 0.4742, "step": 7322 }, { "epoch": 1.1205814843152258, "grad_norm": 2.249034278168932, "learning_rate": 8.539398530573264e-06, "loss": 0.4758, "step": 7323 }, { "epoch": 1.120734506503443, "grad_norm": 2.0505223499372796, "learning_rate": 8.536946969147963e-06, "loss": 0.4034, "step": 7324 }, { "epoch": 1.1208875286916602, "grad_norm": 2.433144003092916, "learning_rate": 8.534495497574472e-06, "loss": 0.5097, "step": 7325 }, { "epoch": 1.1210405508798775, "grad_norm": 2.136664385459225, "learning_rate": 8.532044116003347e-06, "loss": 0.4433, "step": 7326 }, { "epoch": 1.1211935730680949, "grad_norm": 2.360936102413662, "learning_rate": 8.52959282458514e-06, "loss": 0.4784, "step": 7327 }, { "epoch": 1.1213465952563122, "grad_norm": 2.1996065493633106, "learning_rate": 8.527141623470387e-06, "loss": 0.4174, "step": 7328 }, { "epoch": 1.1214996174445295, "grad_norm": 2.2465195864524463, "learning_rate": 8.524690512809626e-06, "loss": 0.3862, "step": 7329 }, { "epoch": 1.1216526396327469, "grad_norm": 2.1053183068145045, "learning_rate": 8.522239492753395e-06, "loss": 0.475, "step": 7330 }, { "epoch": 1.121805661820964, "grad_norm": 2.418720533759001, "learning_rate": 8.519788563452218e-06, "loss": 0.4415, "step": 7331 }, { "epoch": 1.1219586840091813, "grad_norm": 1.9996187128183676, "learning_rate": 8.517337725056613e-06, "loss": 0.4324, "step": 7332 }, { "epoch": 1.1221117061973986, "grad_norm": 2.1481582833504693, "learning_rate": 8.514886977717102e-06, "loss": 0.4025, "step": 7333 }, { "epoch": 1.122264728385616, "grad_norm": 2.2852880745037676, "learning_rate": 8.51243632158419e-06, "loss": 0.5204, "step": 7334 }, { "epoch": 1.1224177505738333, "grad_norm": 1.9810063469638823, "learning_rate": 8.509985756808382e-06, "loss": 0.417, "step": 7335 }, { "epoch": 1.1225707727620504, "grad_norm": 2.354682978168584, "learning_rate": 8.507535283540181e-06, "loss": 0.4445, "step": 7336 }, { "epoch": 1.1227237949502678, "grad_norm": 2.049284352078451, "learning_rate": 8.505084901930073e-06, "loss": 0.4622, "step": 7337 }, { "epoch": 1.122876817138485, "grad_norm": 2.2985255632558106, "learning_rate": 8.502634612128551e-06, "loss": 0.4512, "step": 7338 }, { "epoch": 1.1230298393267024, "grad_norm": 2.289298123164532, "learning_rate": 8.500184414286096e-06, "loss": 0.4481, "step": 7339 }, { "epoch": 1.1231828615149198, "grad_norm": 2.066046084729003, "learning_rate": 8.49773430855318e-06, "loss": 0.5145, "step": 7340 }, { "epoch": 1.1233358837031369, "grad_norm": 2.4768352381868155, "learning_rate": 8.495284295080281e-06, "loss": 0.507, "step": 7341 }, { "epoch": 1.1234889058913542, "grad_norm": 2.379390037498325, "learning_rate": 8.492834374017856e-06, "loss": 0.4829, "step": 7342 }, { "epoch": 1.1236419280795715, "grad_norm": 2.107590759872093, "learning_rate": 8.490384545516373e-06, "loss": 0.411, "step": 7343 }, { "epoch": 1.1237949502677889, "grad_norm": 1.9761377140115508, "learning_rate": 8.487934809726277e-06, "loss": 0.4598, "step": 7344 }, { "epoch": 1.1239479724560062, "grad_norm": 1.9653246388829446, "learning_rate": 8.485485166798019e-06, "loss": 0.4399, "step": 7345 }, { "epoch": 1.1241009946442233, "grad_norm": 2.3005849600149917, "learning_rate": 8.483035616882046e-06, "loss": 0.4133, "step": 7346 }, { "epoch": 1.1242540168324406, "grad_norm": 2.108212619768816, "learning_rate": 8.480586160128785e-06, "loss": 0.4129, "step": 7347 }, { "epoch": 1.124407039020658, "grad_norm": 2.189179438990085, "learning_rate": 8.478136796688674e-06, "loss": 0.4236, "step": 7348 }, { "epoch": 1.1245600612088753, "grad_norm": 2.0614034092253117, "learning_rate": 8.47568752671214e-06, "loss": 0.4482, "step": 7349 }, { "epoch": 1.1247130833970926, "grad_norm": 2.3106721350735357, "learning_rate": 8.473238350349593e-06, "loss": 0.4582, "step": 7350 }, { "epoch": 1.12486610558531, "grad_norm": 2.069049495107061, "learning_rate": 8.470789267751452e-06, "loss": 0.4362, "step": 7351 }, { "epoch": 1.125019127773527, "grad_norm": 2.232653664335247, "learning_rate": 8.468340279068128e-06, "loss": 0.4467, "step": 7352 }, { "epoch": 1.1251721499617444, "grad_norm": 2.2760784233432263, "learning_rate": 8.465891384450015e-06, "loss": 0.4328, "step": 7353 }, { "epoch": 1.1253251721499617, "grad_norm": 1.9416601069747, "learning_rate": 8.463442584047516e-06, "loss": 0.3905, "step": 7354 }, { "epoch": 1.125478194338179, "grad_norm": 2.023979021978067, "learning_rate": 8.460993878011014e-06, "loss": 0.4174, "step": 7355 }, { "epoch": 1.1256312165263964, "grad_norm": 2.585206276544525, "learning_rate": 8.458545266490906e-06, "loss": 0.454, "step": 7356 }, { "epoch": 1.1257842387146135, "grad_norm": 2.0345591716158298, "learning_rate": 8.456096749637561e-06, "loss": 0.3921, "step": 7357 }, { "epoch": 1.1259372609028309, "grad_norm": 2.470455072554232, "learning_rate": 8.453648327601352e-06, "loss": 0.4929, "step": 7358 }, { "epoch": 1.1260902830910482, "grad_norm": 2.3686162736588385, "learning_rate": 8.451200000532653e-06, "loss": 0.4353, "step": 7359 }, { "epoch": 1.1262433052792655, "grad_norm": 2.135323906416676, "learning_rate": 8.448751768581818e-06, "loss": 0.3795, "step": 7360 }, { "epoch": 1.1263963274674829, "grad_norm": 2.33267928641562, "learning_rate": 8.446303631899203e-06, "loss": 0.4306, "step": 7361 }, { "epoch": 1.1265493496557002, "grad_norm": 2.4218289282958114, "learning_rate": 8.443855590635165e-06, "loss": 0.451, "step": 7362 }, { "epoch": 1.1267023718439173, "grad_norm": 2.132079052539921, "learning_rate": 8.441407644940038e-06, "loss": 0.4516, "step": 7363 }, { "epoch": 1.1268553940321346, "grad_norm": 2.270796945887167, "learning_rate": 8.438959794964168e-06, "loss": 0.4633, "step": 7364 }, { "epoch": 1.127008416220352, "grad_norm": 2.23165428476424, "learning_rate": 8.436512040857885e-06, "loss": 0.4663, "step": 7365 }, { "epoch": 1.1271614384085693, "grad_norm": 2.474571748588033, "learning_rate": 8.43406438277151e-06, "loss": 0.4026, "step": 7366 }, { "epoch": 1.1273144605967866, "grad_norm": 1.9023084237155796, "learning_rate": 8.43161682085537e-06, "loss": 0.4036, "step": 7367 }, { "epoch": 1.1274674827850037, "grad_norm": 2.2417421961042034, "learning_rate": 8.429169355259778e-06, "loss": 0.401, "step": 7368 }, { "epoch": 1.127620504973221, "grad_norm": 2.4185358282453864, "learning_rate": 8.426721986135037e-06, "loss": 0.5283, "step": 7369 }, { "epoch": 1.1277735271614384, "grad_norm": 2.27465022088665, "learning_rate": 8.424274713631455e-06, "loss": 0.4105, "step": 7370 }, { "epoch": 1.1279265493496557, "grad_norm": 2.2414361092641815, "learning_rate": 8.421827537899325e-06, "loss": 0.4595, "step": 7371 }, { "epoch": 1.128079571537873, "grad_norm": 2.2130236838711457, "learning_rate": 8.419380459088943e-06, "loss": 0.3782, "step": 7372 }, { "epoch": 1.1282325937260902, "grad_norm": 2.289444586623291, "learning_rate": 8.41693347735059e-06, "loss": 0.4152, "step": 7373 }, { "epoch": 1.1283856159143075, "grad_norm": 2.1547623456356573, "learning_rate": 8.414486592834544e-06, "loss": 0.3986, "step": 7374 }, { "epoch": 1.1285386381025249, "grad_norm": 2.3965413396527033, "learning_rate": 8.412039805691082e-06, "loss": 0.4543, "step": 7375 }, { "epoch": 1.1286916602907422, "grad_norm": 2.0682256832583192, "learning_rate": 8.409593116070465e-06, "loss": 0.3893, "step": 7376 }, { "epoch": 1.1288446824789595, "grad_norm": 2.1555593129350967, "learning_rate": 8.407146524122956e-06, "loss": 0.4843, "step": 7377 }, { "epoch": 1.1289977046671766, "grad_norm": 2.235134611714402, "learning_rate": 8.404700029998813e-06, "loss": 0.4193, "step": 7378 }, { "epoch": 1.129150726855394, "grad_norm": 2.0948079300685025, "learning_rate": 8.402253633848275e-06, "loss": 0.4189, "step": 7379 }, { "epoch": 1.1293037490436113, "grad_norm": 2.5618929392411385, "learning_rate": 8.399807335821599e-06, "loss": 0.4643, "step": 7380 }, { "epoch": 1.1294567712318286, "grad_norm": 2.1511509812008645, "learning_rate": 8.397361136069016e-06, "loss": 0.4467, "step": 7381 }, { "epoch": 1.129609793420046, "grad_norm": 2.386196178306402, "learning_rate": 8.394915034740746e-06, "loss": 0.4485, "step": 7382 }, { "epoch": 1.129762815608263, "grad_norm": 2.102940746700613, "learning_rate": 8.39246903198703e-06, "loss": 0.4044, "step": 7383 }, { "epoch": 1.1299158377964804, "grad_norm": 2.122835102459708, "learning_rate": 8.390023127958076e-06, "loss": 0.3913, "step": 7384 }, { "epoch": 1.1300688599846977, "grad_norm": 2.1996709406789043, "learning_rate": 8.387577322804102e-06, "loss": 0.4211, "step": 7385 }, { "epoch": 1.130221882172915, "grad_norm": 2.23042537125408, "learning_rate": 8.385131616675316e-06, "loss": 0.3713, "step": 7386 }, { "epoch": 1.1303749043611324, "grad_norm": 2.1043667069097745, "learning_rate": 8.382686009721907e-06, "loss": 0.4529, "step": 7387 }, { "epoch": 1.1305279265493497, "grad_norm": 2.227960780238535, "learning_rate": 8.380240502094083e-06, "loss": 0.4243, "step": 7388 }, { "epoch": 1.1306809487375669, "grad_norm": 2.4418394916459008, "learning_rate": 8.377795093942026e-06, "loss": 0.48, "step": 7389 }, { "epoch": 1.1308339709257842, "grad_norm": 2.484896572042387, "learning_rate": 8.375349785415915e-06, "loss": 0.376, "step": 7390 }, { "epoch": 1.1309869931140015, "grad_norm": 2.1153869841882553, "learning_rate": 8.372904576665931e-06, "loss": 0.4547, "step": 7391 }, { "epoch": 1.1311400153022189, "grad_norm": 2.135980963386847, "learning_rate": 8.37045946784224e-06, "loss": 0.3852, "step": 7392 }, { "epoch": 1.1312930374904362, "grad_norm": 2.3502007142725856, "learning_rate": 8.368014459095014e-06, "loss": 0.4678, "step": 7393 }, { "epoch": 1.1314460596786535, "grad_norm": 2.2129245462799525, "learning_rate": 8.3655695505744e-06, "loss": 0.3617, "step": 7394 }, { "epoch": 1.1315990818668706, "grad_norm": 2.4771896435377085, "learning_rate": 8.363124742430552e-06, "loss": 0.4377, "step": 7395 }, { "epoch": 1.131752104055088, "grad_norm": 2.1686976601970414, "learning_rate": 8.360680034813621e-06, "loss": 0.4344, "step": 7396 }, { "epoch": 1.1319051262433053, "grad_norm": 2.219669874900676, "learning_rate": 8.358235427873741e-06, "loss": 0.3737, "step": 7397 }, { "epoch": 1.1320581484315226, "grad_norm": 2.7668635869641043, "learning_rate": 8.355790921761045e-06, "loss": 0.4628, "step": 7398 }, { "epoch": 1.13221117061974, "grad_norm": 2.257255357947881, "learning_rate": 8.353346516625662e-06, "loss": 0.4475, "step": 7399 }, { "epoch": 1.132364192807957, "grad_norm": 1.7893405512069935, "learning_rate": 8.350902212617708e-06, "loss": 0.3202, "step": 7400 }, { "epoch": 1.1325172149961744, "grad_norm": 2.188151848584422, "learning_rate": 8.348458009887305e-06, "loss": 0.4079, "step": 7401 }, { "epoch": 1.1326702371843917, "grad_norm": 2.1347935928648623, "learning_rate": 8.346013908584556e-06, "loss": 0.4305, "step": 7402 }, { "epoch": 1.132823259372609, "grad_norm": 2.3676766269499026, "learning_rate": 8.343569908859559e-06, "loss": 0.5252, "step": 7403 }, { "epoch": 1.1329762815608264, "grad_norm": 2.284607042130858, "learning_rate": 8.341126010862417e-06, "loss": 0.4101, "step": 7404 }, { "epoch": 1.1331293037490435, "grad_norm": 2.3344145212373126, "learning_rate": 8.33868221474322e-06, "loss": 0.4269, "step": 7405 }, { "epoch": 1.1332823259372609, "grad_norm": 2.371166608832448, "learning_rate": 8.33623852065204e-06, "loss": 0.4966, "step": 7406 }, { "epoch": 1.1334353481254782, "grad_norm": 2.0768928444278565, "learning_rate": 8.333794928738963e-06, "loss": 0.4451, "step": 7407 }, { "epoch": 1.1335883703136955, "grad_norm": 2.2826255992028726, "learning_rate": 8.331351439154058e-06, "loss": 0.4068, "step": 7408 }, { "epoch": 1.1337413925019129, "grad_norm": 2.1691141270829264, "learning_rate": 8.328908052047392e-06, "loss": 0.449, "step": 7409 }, { "epoch": 1.13389441469013, "grad_norm": 2.0987622141051334, "learning_rate": 8.326464767569018e-06, "loss": 0.4117, "step": 7410 }, { "epoch": 1.1340474368783473, "grad_norm": 1.9073261756337854, "learning_rate": 8.324021585868987e-06, "loss": 0.3589, "step": 7411 }, { "epoch": 1.1342004590665646, "grad_norm": 2.145654462179916, "learning_rate": 8.321578507097351e-06, "loss": 0.4972, "step": 7412 }, { "epoch": 1.134353481254782, "grad_norm": 2.285381702770092, "learning_rate": 8.319135531404143e-06, "loss": 0.4493, "step": 7413 }, { "epoch": 1.1345065034429993, "grad_norm": 2.0970514541562153, "learning_rate": 8.316692658939396e-06, "loss": 0.4087, "step": 7414 }, { "epoch": 1.1346595256312164, "grad_norm": 2.2048734136664607, "learning_rate": 8.314249889853141e-06, "loss": 0.4177, "step": 7415 }, { "epoch": 1.1348125478194337, "grad_norm": 2.1074902936965225, "learning_rate": 8.311807224295391e-06, "loss": 0.4048, "step": 7416 }, { "epoch": 1.134965570007651, "grad_norm": 2.4923095199826726, "learning_rate": 8.309364662416167e-06, "loss": 0.4147, "step": 7417 }, { "epoch": 1.1351185921958684, "grad_norm": 2.108320231994888, "learning_rate": 8.306922204365476e-06, "loss": 0.3964, "step": 7418 }, { "epoch": 1.1352716143840857, "grad_norm": 2.3299560327530404, "learning_rate": 8.304479850293308e-06, "loss": 0.4453, "step": 7419 }, { "epoch": 1.135424636572303, "grad_norm": 1.9868538731801448, "learning_rate": 8.302037600349669e-06, "loss": 0.3799, "step": 7420 }, { "epoch": 1.1355776587605202, "grad_norm": 1.9462273485980057, "learning_rate": 8.29959545468454e-06, "loss": 0.4148, "step": 7421 }, { "epoch": 1.1357306809487375, "grad_norm": 2.234376101383108, "learning_rate": 8.297153413447914e-06, "loss": 0.4836, "step": 7422 }, { "epoch": 1.1358837031369549, "grad_norm": 2.0793008207140047, "learning_rate": 8.294711476789754e-06, "loss": 0.4197, "step": 7423 }, { "epoch": 1.1360367253251722, "grad_norm": 2.257538544402379, "learning_rate": 8.292269644860032e-06, "loss": 0.4509, "step": 7424 }, { "epoch": 1.1361897475133895, "grad_norm": 2.200890531426487, "learning_rate": 8.289827917808715e-06, "loss": 0.472, "step": 7425 }, { "epoch": 1.1363427697016066, "grad_norm": 2.278989470946727, "learning_rate": 8.287386295785755e-06, "loss": 0.3801, "step": 7426 }, { "epoch": 1.136495791889824, "grad_norm": 2.39384075061946, "learning_rate": 8.284944778941098e-06, "loss": 0.5339, "step": 7427 }, { "epoch": 1.1366488140780413, "grad_norm": 2.1283550645597367, "learning_rate": 8.282503367424697e-06, "loss": 0.4964, "step": 7428 }, { "epoch": 1.1368018362662586, "grad_norm": 2.0117879432634798, "learning_rate": 8.280062061386478e-06, "loss": 0.4534, "step": 7429 }, { "epoch": 1.136954858454476, "grad_norm": 2.2323821418668883, "learning_rate": 8.277620860976382e-06, "loss": 0.468, "step": 7430 }, { "epoch": 1.1371078806426933, "grad_norm": 2.1809204167250678, "learning_rate": 8.275179766344325e-06, "loss": 0.4446, "step": 7431 }, { "epoch": 1.1372609028309104, "grad_norm": 2.193452628067021, "learning_rate": 8.272738777640224e-06, "loss": 0.4116, "step": 7432 }, { "epoch": 1.1374139250191277, "grad_norm": 2.288415436946979, "learning_rate": 8.270297895013992e-06, "loss": 0.496, "step": 7433 }, { "epoch": 1.137566947207345, "grad_norm": 2.18335735896481, "learning_rate": 8.267857118615536e-06, "loss": 0.4222, "step": 7434 }, { "epoch": 1.1377199693955624, "grad_norm": 2.181193321791857, "learning_rate": 8.265416448594747e-06, "loss": 0.372, "step": 7435 }, { "epoch": 1.1378729915837797, "grad_norm": 2.268232897097735, "learning_rate": 8.26297588510152e-06, "loss": 0.494, "step": 7436 }, { "epoch": 1.1380260137719969, "grad_norm": 2.122086095027104, "learning_rate": 8.260535428285739e-06, "loss": 0.3753, "step": 7437 }, { "epoch": 1.1381790359602142, "grad_norm": 2.526263316541223, "learning_rate": 8.258095078297286e-06, "loss": 0.442, "step": 7438 }, { "epoch": 1.1383320581484315, "grad_norm": 2.1160673691906653, "learning_rate": 8.255654835286025e-06, "loss": 0.3435, "step": 7439 }, { "epoch": 1.1384850803366489, "grad_norm": 2.114820062384801, "learning_rate": 8.253214699401824e-06, "loss": 0.4397, "step": 7440 }, { "epoch": 1.1386381025248662, "grad_norm": 2.3148968139526325, "learning_rate": 8.250774670794545e-06, "loss": 0.4095, "step": 7441 }, { "epoch": 1.1387911247130833, "grad_norm": 2.3769438544271257, "learning_rate": 8.248334749614035e-06, "loss": 0.4592, "step": 7442 }, { "epoch": 1.1389441469013006, "grad_norm": 1.9651717159386775, "learning_rate": 8.245894936010139e-06, "loss": 0.4159, "step": 7443 }, { "epoch": 1.139097169089518, "grad_norm": 1.93039744862964, "learning_rate": 8.243455230132699e-06, "loss": 0.389, "step": 7444 }, { "epoch": 1.1392501912777353, "grad_norm": 2.2701040105758095, "learning_rate": 8.241015632131543e-06, "loss": 0.4002, "step": 7445 }, { "epoch": 1.1394032134659526, "grad_norm": 2.456302414590909, "learning_rate": 8.2385761421565e-06, "loss": 0.5128, "step": 7446 }, { "epoch": 1.1395562356541697, "grad_norm": 2.441590053887338, "learning_rate": 8.236136760357388e-06, "loss": 0.4405, "step": 7447 }, { "epoch": 1.139709257842387, "grad_norm": 2.142496840381271, "learning_rate": 8.233697486884014e-06, "loss": 0.3596, "step": 7448 }, { "epoch": 1.1398622800306044, "grad_norm": 2.2436694830487847, "learning_rate": 8.23125832188619e-06, "loss": 0.3891, "step": 7449 }, { "epoch": 1.1400153022188217, "grad_norm": 2.3067821364697085, "learning_rate": 8.228819265513713e-06, "loss": 0.356, "step": 7450 }, { "epoch": 1.140168324407039, "grad_norm": 2.2973291155628903, "learning_rate": 8.22638031791637e-06, "loss": 0.4147, "step": 7451 }, { "epoch": 1.1403213465952562, "grad_norm": 2.3150537810209344, "learning_rate": 8.223941479243952e-06, "loss": 0.4021, "step": 7452 }, { "epoch": 1.1404743687834735, "grad_norm": 2.1101437473072373, "learning_rate": 8.221502749646233e-06, "loss": 0.3884, "step": 7453 }, { "epoch": 1.1406273909716909, "grad_norm": 2.2940668477947015, "learning_rate": 8.219064129272993e-06, "loss": 0.4888, "step": 7454 }, { "epoch": 1.1407804131599082, "grad_norm": 2.5443419586116107, "learning_rate": 8.21662561827399e-06, "loss": 0.4524, "step": 7455 }, { "epoch": 1.1409334353481255, "grad_norm": 2.2535777983806433, "learning_rate": 8.214187216798982e-06, "loss": 0.4283, "step": 7456 }, { "epoch": 1.1410864575363429, "grad_norm": 2.2630948114110048, "learning_rate": 8.211748924997727e-06, "loss": 0.3295, "step": 7457 }, { "epoch": 1.14123947972456, "grad_norm": 2.3979824581281535, "learning_rate": 8.20931074301996e-06, "loss": 0.4793, "step": 7458 }, { "epoch": 1.1413925019127773, "grad_norm": 2.849113718168752, "learning_rate": 8.206872671015431e-06, "loss": 0.4199, "step": 7459 }, { "epoch": 1.1415455241009946, "grad_norm": 2.4386612095527807, "learning_rate": 8.204434709133868e-06, "loss": 0.3872, "step": 7460 }, { "epoch": 1.141698546289212, "grad_norm": 2.6483087030550845, "learning_rate": 8.201996857524985e-06, "loss": 0.4643, "step": 7461 }, { "epoch": 1.1418515684774293, "grad_norm": 1.8880437723686634, "learning_rate": 8.199559116338516e-06, "loss": 0.3426, "step": 7462 }, { "epoch": 1.1420045906656466, "grad_norm": 2.252181667335893, "learning_rate": 8.197121485724165e-06, "loss": 0.3879, "step": 7463 }, { "epoch": 1.1421576128538637, "grad_norm": 2.2676544247977684, "learning_rate": 8.194683965831632e-06, "loss": 0.4494, "step": 7464 }, { "epoch": 1.142310635042081, "grad_norm": 2.5427993887949705, "learning_rate": 8.192246556810623e-06, "loss": 0.4561, "step": 7465 }, { "epoch": 1.1424636572302984, "grad_norm": 2.4818084453252025, "learning_rate": 8.189809258810821e-06, "loss": 0.4889, "step": 7466 }, { "epoch": 1.1426166794185157, "grad_norm": 2.1968397362218144, "learning_rate": 8.187372071981918e-06, "loss": 0.4769, "step": 7467 }, { "epoch": 1.142769701606733, "grad_norm": 2.382228924959736, "learning_rate": 8.184934996473588e-06, "loss": 0.4139, "step": 7468 }, { "epoch": 1.1429227237949502, "grad_norm": 2.2384393655517107, "learning_rate": 8.182498032435495e-06, "loss": 0.4245, "step": 7469 }, { "epoch": 1.1430757459831675, "grad_norm": 2.1015126974026064, "learning_rate": 8.180061180017312e-06, "loss": 0.3678, "step": 7470 }, { "epoch": 1.1432287681713849, "grad_norm": 2.428458221772654, "learning_rate": 8.177624439368692e-06, "loss": 0.378, "step": 7471 }, { "epoch": 1.1433817903596022, "grad_norm": 2.2063195875131902, "learning_rate": 8.175187810639281e-06, "loss": 0.4007, "step": 7472 }, { "epoch": 1.1435348125478195, "grad_norm": 2.1957528996819184, "learning_rate": 8.172751293978727e-06, "loss": 0.4849, "step": 7473 }, { "epoch": 1.1436878347360366, "grad_norm": 2.3310595610922826, "learning_rate": 8.170314889536663e-06, "loss": 0.4719, "step": 7474 }, { "epoch": 1.143840856924254, "grad_norm": 2.1956159717965815, "learning_rate": 8.167878597462724e-06, "loss": 0.4452, "step": 7475 }, { "epoch": 1.1439938791124713, "grad_norm": 2.6239595075786855, "learning_rate": 8.165442417906523e-06, "loss": 0.401, "step": 7476 }, { "epoch": 1.1441469013006886, "grad_norm": 2.4291514293723875, "learning_rate": 8.163006351017681e-06, "loss": 0.4379, "step": 7477 }, { "epoch": 1.144299923488906, "grad_norm": 2.1139992285994014, "learning_rate": 8.160570396945808e-06, "loss": 0.4388, "step": 7478 }, { "epoch": 1.144452945677123, "grad_norm": 2.1633213676632015, "learning_rate": 8.1581345558405e-06, "loss": 0.4151, "step": 7479 }, { "epoch": 1.1446059678653404, "grad_norm": 2.2390982385776397, "learning_rate": 8.155698827851354e-06, "loss": 0.412, "step": 7480 }, { "epoch": 1.1447589900535577, "grad_norm": 2.2089163980451088, "learning_rate": 8.153263213127961e-06, "loss": 0.4565, "step": 7481 }, { "epoch": 1.144912012241775, "grad_norm": 2.176734428087088, "learning_rate": 8.150827711819894e-06, "loss": 0.4647, "step": 7482 }, { "epoch": 1.1450650344299924, "grad_norm": 2.253278709863533, "learning_rate": 8.148392324076733e-06, "loss": 0.4017, "step": 7483 }, { "epoch": 1.1452180566182095, "grad_norm": 2.152174433588315, "learning_rate": 8.145957050048047e-06, "loss": 0.3346, "step": 7484 }, { "epoch": 1.1453710788064269, "grad_norm": 2.047079847330625, "learning_rate": 8.143521889883385e-06, "loss": 0.3476, "step": 7485 }, { "epoch": 1.1455241009946442, "grad_norm": 2.4767797743598443, "learning_rate": 8.141086843732311e-06, "loss": 0.4391, "step": 7486 }, { "epoch": 1.1456771231828615, "grad_norm": 2.0852254844893476, "learning_rate": 8.138651911744362e-06, "loss": 0.3421, "step": 7487 }, { "epoch": 1.1458301453710789, "grad_norm": 2.3333992177155394, "learning_rate": 8.136217094069085e-06, "loss": 0.4186, "step": 7488 }, { "epoch": 1.1459831675592962, "grad_norm": 2.242432354816371, "learning_rate": 8.133782390856007e-06, "loss": 0.4564, "step": 7489 }, { "epoch": 1.1461361897475133, "grad_norm": 2.1801434144194447, "learning_rate": 8.131347802254649e-06, "loss": 0.4897, "step": 7490 }, { "epoch": 1.1462892119357306, "grad_norm": 2.7231872288313315, "learning_rate": 8.128913328414538e-06, "loss": 0.417, "step": 7491 }, { "epoch": 1.146442234123948, "grad_norm": 2.113303644811708, "learning_rate": 8.126478969485176e-06, "loss": 0.5045, "step": 7492 }, { "epoch": 1.1465952563121653, "grad_norm": 2.095819049578236, "learning_rate": 8.124044725616069e-06, "loss": 0.426, "step": 7493 }, { "epoch": 1.1467482785003826, "grad_norm": 2.040810152227828, "learning_rate": 8.121610596956718e-06, "loss": 0.4056, "step": 7494 }, { "epoch": 1.1469013006886, "grad_norm": 2.1692836388967125, "learning_rate": 8.119176583656603e-06, "loss": 0.44, "step": 7495 }, { "epoch": 1.147054322876817, "grad_norm": 2.2409161920316434, "learning_rate": 8.116742685865217e-06, "loss": 0.3894, "step": 7496 }, { "epoch": 1.1472073450650344, "grad_norm": 2.1774102605411776, "learning_rate": 8.114308903732028e-06, "loss": 0.4041, "step": 7497 }, { "epoch": 1.1473603672532517, "grad_norm": 1.728871470396512, "learning_rate": 8.111875237406506e-06, "loss": 0.3032, "step": 7498 }, { "epoch": 1.147513389441469, "grad_norm": 2.3391781721148743, "learning_rate": 8.109441687038111e-06, "loss": 0.4852, "step": 7499 }, { "epoch": 1.1476664116296864, "grad_norm": 2.104355287358558, "learning_rate": 8.107008252776301e-06, "loss": 0.4481, "step": 7500 }, { "epoch": 1.1478194338179035, "grad_norm": 2.2728732595750474, "learning_rate": 8.104574934770516e-06, "loss": 0.3932, "step": 7501 }, { "epoch": 1.1479724560061209, "grad_norm": 2.7508028520265686, "learning_rate": 8.102141733170202e-06, "loss": 0.422, "step": 7502 }, { "epoch": 1.1481254781943382, "grad_norm": 2.3486796594495885, "learning_rate": 8.099708648124785e-06, "loss": 0.542, "step": 7503 }, { "epoch": 1.1482785003825555, "grad_norm": 2.2815892384437806, "learning_rate": 8.097275679783698e-06, "loss": 0.4602, "step": 7504 }, { "epoch": 1.1484315225707729, "grad_norm": 2.371707040162622, "learning_rate": 8.094842828296354e-06, "loss": 0.4856, "step": 7505 }, { "epoch": 1.14858454475899, "grad_norm": 1.8999703078986259, "learning_rate": 8.092410093812161e-06, "loss": 0.3464, "step": 7506 }, { "epoch": 1.1487375669472073, "grad_norm": 1.9999448169209695, "learning_rate": 8.089977476480533e-06, "loss": 0.3793, "step": 7507 }, { "epoch": 1.1488905891354246, "grad_norm": 2.101338405608802, "learning_rate": 8.08754497645086e-06, "loss": 0.367, "step": 7508 }, { "epoch": 1.149043611323642, "grad_norm": 2.254916559111806, "learning_rate": 8.085112593872524e-06, "loss": 0.4823, "step": 7509 }, { "epoch": 1.1491966335118593, "grad_norm": 2.2298089722186103, "learning_rate": 8.082680328894923e-06, "loss": 0.4959, "step": 7510 }, { "epoch": 1.1493496557000764, "grad_norm": 2.017233130477621, "learning_rate": 8.080248181667417e-06, "loss": 0.4274, "step": 7511 }, { "epoch": 1.1495026778882937, "grad_norm": 2.358574713364482, "learning_rate": 8.077816152339383e-06, "loss": 0.5025, "step": 7512 }, { "epoch": 1.149655700076511, "grad_norm": 2.0398205910476657, "learning_rate": 8.075384241060183e-06, "loss": 0.4287, "step": 7513 }, { "epoch": 1.1498087222647284, "grad_norm": 2.0429358861791305, "learning_rate": 8.072952447979159e-06, "loss": 0.3928, "step": 7514 }, { "epoch": 1.1499617444529457, "grad_norm": 2.141370554482417, "learning_rate": 8.070520773245667e-06, "loss": 0.3852, "step": 7515 }, { "epoch": 1.1501147666411629, "grad_norm": 2.169035469722936, "learning_rate": 8.068089217009045e-06, "loss": 0.3634, "step": 7516 }, { "epoch": 1.1502677888293802, "grad_norm": 2.4022699362669058, "learning_rate": 8.065657779418617e-06, "loss": 0.4754, "step": 7517 }, { "epoch": 1.1504208110175975, "grad_norm": 2.0431104935895847, "learning_rate": 8.063226460623714e-06, "loss": 0.371, "step": 7518 }, { "epoch": 1.1505738332058149, "grad_norm": 2.0616275176271235, "learning_rate": 8.060795260773649e-06, "loss": 0.4364, "step": 7519 }, { "epoch": 1.1507268553940322, "grad_norm": 2.389435632037756, "learning_rate": 8.058364180017738e-06, "loss": 0.4303, "step": 7520 }, { "epoch": 1.1508798775822495, "grad_norm": 2.390938065826764, "learning_rate": 8.055933218505275e-06, "loss": 0.4162, "step": 7521 }, { "epoch": 1.1510328997704666, "grad_norm": 2.0817572989263735, "learning_rate": 8.053502376385555e-06, "loss": 0.4079, "step": 7522 }, { "epoch": 1.151185921958684, "grad_norm": 1.9572766628941676, "learning_rate": 8.051071653807877e-06, "loss": 0.3664, "step": 7523 }, { "epoch": 1.1513389441469013, "grad_norm": 1.97525311017114, "learning_rate": 8.048641050921505e-06, "loss": 0.3817, "step": 7524 }, { "epoch": 1.1514919663351186, "grad_norm": 2.189527479601311, "learning_rate": 8.046210567875725e-06, "loss": 0.4619, "step": 7525 }, { "epoch": 1.151644988523336, "grad_norm": 2.0684633513104136, "learning_rate": 8.043780204819796e-06, "loss": 0.3981, "step": 7526 }, { "epoch": 1.1517980107115533, "grad_norm": 2.199448040768017, "learning_rate": 8.041349961902976e-06, "loss": 0.3961, "step": 7527 }, { "epoch": 1.1519510328997704, "grad_norm": 2.2150480648254236, "learning_rate": 8.038919839274519e-06, "loss": 0.4216, "step": 7528 }, { "epoch": 1.1521040550879877, "grad_norm": 2.292813109070026, "learning_rate": 8.036489837083668e-06, "loss": 0.3829, "step": 7529 }, { "epoch": 1.152257077276205, "grad_norm": 2.065860761981022, "learning_rate": 8.034059955479652e-06, "loss": 0.398, "step": 7530 }, { "epoch": 1.1524100994644224, "grad_norm": 2.1957137938311764, "learning_rate": 8.031630194611708e-06, "loss": 0.3777, "step": 7531 }, { "epoch": 1.1525631216526397, "grad_norm": 2.395586780136042, "learning_rate": 8.029200554629052e-06, "loss": 0.4595, "step": 7532 }, { "epoch": 1.1527161438408569, "grad_norm": 2.354007174674176, "learning_rate": 8.026771035680905e-06, "loss": 0.4501, "step": 7533 }, { "epoch": 1.1528691660290742, "grad_norm": 2.1309239235307276, "learning_rate": 8.024341637916465e-06, "loss": 0.4385, "step": 7534 }, { "epoch": 1.1530221882172915, "grad_norm": 2.0989633908232554, "learning_rate": 8.02191236148493e-06, "loss": 0.4596, "step": 7535 }, { "epoch": 1.1531752104055089, "grad_norm": 2.0051365553355307, "learning_rate": 8.019483206535501e-06, "loss": 0.3603, "step": 7536 }, { "epoch": 1.1533282325937262, "grad_norm": 2.1379711591576602, "learning_rate": 8.017054173217354e-06, "loss": 0.4969, "step": 7537 }, { "epoch": 1.1534812547819433, "grad_norm": 2.0414756729412007, "learning_rate": 8.014625261679666e-06, "loss": 0.3722, "step": 7538 }, { "epoch": 1.1536342769701606, "grad_norm": 1.960059689751741, "learning_rate": 8.012196472071612e-06, "loss": 0.5155, "step": 7539 }, { "epoch": 1.153787299158378, "grad_norm": 1.9148456988264915, "learning_rate": 8.009767804542341e-06, "loss": 0.3806, "step": 7540 }, { "epoch": 1.1539403213465953, "grad_norm": 2.1346428516068507, "learning_rate": 8.007339259241022e-06, "loss": 0.3781, "step": 7541 }, { "epoch": 1.1540933435348126, "grad_norm": 2.341555927456598, "learning_rate": 8.004910836316796e-06, "loss": 0.4134, "step": 7542 }, { "epoch": 1.1542463657230297, "grad_norm": 2.1533856307933448, "learning_rate": 8.002482535918792e-06, "loss": 0.3918, "step": 7543 }, { "epoch": 1.154399387911247, "grad_norm": 2.203898129272653, "learning_rate": 8.000054358196156e-06, "loss": 0.3875, "step": 7544 }, { "epoch": 1.1545524100994644, "grad_norm": 2.0530530827506106, "learning_rate": 7.997626303298008e-06, "loss": 0.4047, "step": 7545 }, { "epoch": 1.1547054322876817, "grad_norm": 2.1846781568623026, "learning_rate": 7.995198371373455e-06, "loss": 0.4748, "step": 7546 }, { "epoch": 1.154858454475899, "grad_norm": 2.369642955439486, "learning_rate": 7.992770562571616e-06, "loss": 0.4418, "step": 7547 }, { "epoch": 1.1550114766641162, "grad_norm": 2.876693910486257, "learning_rate": 7.990342877041588e-06, "loss": 0.3995, "step": 7548 }, { "epoch": 1.1551644988523335, "grad_norm": 2.2881944470526503, "learning_rate": 7.987915314932467e-06, "loss": 0.4277, "step": 7549 }, { "epoch": 1.1553175210405509, "grad_norm": 2.082026776655421, "learning_rate": 7.98548787639334e-06, "loss": 0.474, "step": 7550 }, { "epoch": 1.1554705432287682, "grad_norm": 2.4770772431505623, "learning_rate": 7.98306056157328e-06, "loss": 0.4932, "step": 7551 }, { "epoch": 1.1556235654169855, "grad_norm": 2.400411126727818, "learning_rate": 7.980633370621361e-06, "loss": 0.417, "step": 7552 }, { "epoch": 1.1557765876052026, "grad_norm": 2.249144501782921, "learning_rate": 7.97820630368665e-06, "loss": 0.4198, "step": 7553 }, { "epoch": 1.15592960979342, "grad_norm": 2.4416647224932526, "learning_rate": 7.975779360918196e-06, "loss": 0.4977, "step": 7554 }, { "epoch": 1.1560826319816373, "grad_norm": 2.315267686111871, "learning_rate": 7.973352542465052e-06, "loss": 0.3786, "step": 7555 }, { "epoch": 1.1562356541698546, "grad_norm": 2.12994967417012, "learning_rate": 7.970925848476253e-06, "loss": 0.4149, "step": 7556 }, { "epoch": 1.156388676358072, "grad_norm": 2.4649859720017604, "learning_rate": 7.968499279100841e-06, "loss": 0.5204, "step": 7557 }, { "epoch": 1.1565416985462893, "grad_norm": 2.546695379639944, "learning_rate": 7.966072834487832e-06, "loss": 0.4823, "step": 7558 }, { "epoch": 1.1566947207345064, "grad_norm": 2.111908023754244, "learning_rate": 7.963646514786246e-06, "loss": 0.4027, "step": 7559 }, { "epoch": 1.1568477429227237, "grad_norm": 2.079272062182409, "learning_rate": 7.961220320145099e-06, "loss": 0.3618, "step": 7560 }, { "epoch": 1.157000765110941, "grad_norm": 2.0099331247968517, "learning_rate": 7.958794250713381e-06, "loss": 0.3389, "step": 7561 }, { "epoch": 1.1571537872991584, "grad_norm": 2.3728785319967693, "learning_rate": 7.956368306640097e-06, "loss": 0.4472, "step": 7562 }, { "epoch": 1.1573068094873757, "grad_norm": 2.124001664400862, "learning_rate": 7.953942488074233e-06, "loss": 0.4035, "step": 7563 }, { "epoch": 1.157459831675593, "grad_norm": 2.2562599575262685, "learning_rate": 7.95151679516476e-06, "loss": 0.4425, "step": 7564 }, { "epoch": 1.1576128538638102, "grad_norm": 2.1706628764944798, "learning_rate": 7.949091228060657e-06, "loss": 0.4145, "step": 7565 }, { "epoch": 1.1577658760520275, "grad_norm": 1.9386706013096928, "learning_rate": 7.946665786910885e-06, "loss": 0.3171, "step": 7566 }, { "epoch": 1.1579188982402449, "grad_norm": 2.2644379442856297, "learning_rate": 7.944240471864398e-06, "loss": 0.4577, "step": 7567 }, { "epoch": 1.1580719204284622, "grad_norm": 2.2324278833799864, "learning_rate": 7.941815283070147e-06, "loss": 0.4197, "step": 7568 }, { "epoch": 1.1582249426166795, "grad_norm": 2.4366820568521512, "learning_rate": 7.939390220677068e-06, "loss": 0.4956, "step": 7569 }, { "epoch": 1.1583779648048966, "grad_norm": 2.274130967656845, "learning_rate": 7.936965284834102e-06, "loss": 0.4817, "step": 7570 }, { "epoch": 1.158530986993114, "grad_norm": 2.214947057393825, "learning_rate": 7.934540475690167e-06, "loss": 0.4042, "step": 7571 }, { "epoch": 1.1586840091813313, "grad_norm": 2.2361223952851823, "learning_rate": 7.932115793394177e-06, "loss": 0.3838, "step": 7572 }, { "epoch": 1.1588370313695486, "grad_norm": 2.4647857683566823, "learning_rate": 7.929691238095053e-06, "loss": 0.4323, "step": 7573 }, { "epoch": 1.158990053557766, "grad_norm": 2.2605032776577287, "learning_rate": 7.927266809941684e-06, "loss": 0.4403, "step": 7574 }, { "epoch": 1.159143075745983, "grad_norm": 2.307062509681339, "learning_rate": 7.924842509082968e-06, "loss": 0.4219, "step": 7575 }, { "epoch": 1.1592960979342004, "grad_norm": 2.2580402550302336, "learning_rate": 7.922418335667796e-06, "loss": 0.4076, "step": 7576 }, { "epoch": 1.1594491201224177, "grad_norm": 1.926770123976774, "learning_rate": 7.919994289845038e-06, "loss": 0.3209, "step": 7577 }, { "epoch": 1.159602142310635, "grad_norm": 1.88033873862793, "learning_rate": 7.917570371763568e-06, "loss": 0.3106, "step": 7578 }, { "epoch": 1.1597551644988524, "grad_norm": 2.3143555284094726, "learning_rate": 7.915146581572253e-06, "loss": 0.4199, "step": 7579 }, { "epoch": 1.1599081866870695, "grad_norm": 2.5111769532452524, "learning_rate": 7.912722919419936e-06, "loss": 0.4582, "step": 7580 }, { "epoch": 1.1600612088752869, "grad_norm": 2.159383285210872, "learning_rate": 7.910299385455472e-06, "loss": 0.3882, "step": 7581 }, { "epoch": 1.1602142310635042, "grad_norm": 2.0463292222580227, "learning_rate": 7.9078759798277e-06, "loss": 0.3195, "step": 7582 }, { "epoch": 1.1603672532517215, "grad_norm": 2.315690123330286, "learning_rate": 7.905452702685446e-06, "loss": 0.4594, "step": 7583 }, { "epoch": 1.1605202754399389, "grad_norm": 2.4911195808536464, "learning_rate": 7.903029554177535e-06, "loss": 0.4797, "step": 7584 }, { "epoch": 1.160673297628156, "grad_norm": 2.0446690195381847, "learning_rate": 7.900606534452782e-06, "loss": 0.4061, "step": 7585 }, { "epoch": 1.1608263198163733, "grad_norm": 2.1561715447958574, "learning_rate": 7.898183643659998e-06, "loss": 0.4537, "step": 7586 }, { "epoch": 1.1609793420045906, "grad_norm": 2.3006046638561366, "learning_rate": 7.895760881947976e-06, "loss": 0.5039, "step": 7587 }, { "epoch": 1.161132364192808, "grad_norm": 2.495860237065185, "learning_rate": 7.89333824946551e-06, "loss": 0.4654, "step": 7588 }, { "epoch": 1.1612853863810253, "grad_norm": 2.1959113314709873, "learning_rate": 7.890915746361388e-06, "loss": 0.3754, "step": 7589 }, { "epoch": 1.1614384085692426, "grad_norm": 2.2157690704589954, "learning_rate": 7.888493372784375e-06, "loss": 0.4643, "step": 7590 }, { "epoch": 1.1615914307574597, "grad_norm": 2.1585183274845083, "learning_rate": 7.886071128883245e-06, "loss": 0.454, "step": 7591 }, { "epoch": 1.161744452945677, "grad_norm": 2.3488517000482165, "learning_rate": 7.883649014806762e-06, "loss": 0.4765, "step": 7592 }, { "epoch": 1.1618974751338944, "grad_norm": 2.3071998807798417, "learning_rate": 7.881227030703666e-06, "loss": 0.4174, "step": 7593 }, { "epoch": 1.1620504973221117, "grad_norm": 2.0712309131234066, "learning_rate": 7.878805176722708e-06, "loss": 0.4144, "step": 7594 }, { "epoch": 1.162203519510329, "grad_norm": 2.09316779209673, "learning_rate": 7.876383453012626e-06, "loss": 0.4189, "step": 7595 }, { "epoch": 1.1623565416985464, "grad_norm": 1.9814309260192482, "learning_rate": 7.873961859722139e-06, "loss": 0.3391, "step": 7596 }, { "epoch": 1.1625095638867635, "grad_norm": 2.1146901984425783, "learning_rate": 7.871540396999974e-06, "loss": 0.3784, "step": 7597 }, { "epoch": 1.1626625860749809, "grad_norm": 2.0404107507386264, "learning_rate": 7.869119064994836e-06, "loss": 0.4149, "step": 7598 }, { "epoch": 1.1628156082631982, "grad_norm": 2.083732346909204, "learning_rate": 7.866697863855439e-06, "loss": 0.4429, "step": 7599 }, { "epoch": 1.1629686304514155, "grad_norm": 2.3775803032947636, "learning_rate": 7.864276793730468e-06, "loss": 0.3781, "step": 7600 }, { "epoch": 1.1631216526396329, "grad_norm": 2.115787174371097, "learning_rate": 7.861855854768611e-06, "loss": 0.3634, "step": 7601 }, { "epoch": 1.16327467482785, "grad_norm": 2.203471890719275, "learning_rate": 7.859435047118558e-06, "loss": 0.407, "step": 7602 }, { "epoch": 1.1634276970160673, "grad_norm": 2.121708542637144, "learning_rate": 7.857014370928968e-06, "loss": 0.4098, "step": 7603 }, { "epoch": 1.1635807192042846, "grad_norm": 2.324513780877982, "learning_rate": 7.854593826348506e-06, "loss": 0.4553, "step": 7604 }, { "epoch": 1.163733741392502, "grad_norm": 2.1593388977675976, "learning_rate": 7.852173413525837e-06, "loss": 0.3437, "step": 7605 }, { "epoch": 1.1638867635807193, "grad_norm": 2.2737818262158362, "learning_rate": 7.849753132609595e-06, "loss": 0.4773, "step": 7606 }, { "epoch": 1.1640397857689364, "grad_norm": 2.4196407421903534, "learning_rate": 7.847332983748427e-06, "loss": 0.4916, "step": 7607 }, { "epoch": 1.1641928079571537, "grad_norm": 2.2265966790800853, "learning_rate": 7.844912967090965e-06, "loss": 0.3524, "step": 7608 }, { "epoch": 1.164345830145371, "grad_norm": 2.300774220942024, "learning_rate": 7.842493082785823e-06, "loss": 0.4525, "step": 7609 }, { "epoch": 1.1644988523335884, "grad_norm": 2.1765958482202246, "learning_rate": 7.840073330981623e-06, "loss": 0.4291, "step": 7610 }, { "epoch": 1.1646518745218057, "grad_norm": 2.545845268184133, "learning_rate": 7.837653711826973e-06, "loss": 0.4652, "step": 7611 }, { "epoch": 1.1648048967100229, "grad_norm": 2.2236738456132437, "learning_rate": 7.835234225470462e-06, "loss": 0.4389, "step": 7612 }, { "epoch": 1.1649579188982402, "grad_norm": 2.161295283860229, "learning_rate": 7.832814872060688e-06, "loss": 0.429, "step": 7613 }, { "epoch": 1.1651109410864575, "grad_norm": 2.267543706470478, "learning_rate": 7.83039565174623e-06, "loss": 0.4238, "step": 7614 }, { "epoch": 1.1652639632746749, "grad_norm": 2.096609865562028, "learning_rate": 7.827976564675666e-06, "loss": 0.4182, "step": 7615 }, { "epoch": 1.1654169854628922, "grad_norm": 1.9474587819076785, "learning_rate": 7.825557610997556e-06, "loss": 0.4151, "step": 7616 }, { "epoch": 1.1655700076511093, "grad_norm": 2.231859405584811, "learning_rate": 7.823138790860457e-06, "loss": 0.4394, "step": 7617 }, { "epoch": 1.1657230298393266, "grad_norm": 2.1358801598651036, "learning_rate": 7.820720104412926e-06, "loss": 0.4202, "step": 7618 }, { "epoch": 1.165876052027544, "grad_norm": 2.273267880308986, "learning_rate": 7.818301551803495e-06, "loss": 0.4307, "step": 7619 }, { "epoch": 1.1660290742157613, "grad_norm": 2.2176807501043183, "learning_rate": 7.8158831331807e-06, "loss": 0.3873, "step": 7620 }, { "epoch": 1.1661820964039786, "grad_norm": 2.164388307223667, "learning_rate": 7.813464848693071e-06, "loss": 0.418, "step": 7621 }, { "epoch": 1.166335118592196, "grad_norm": 2.3247978450960405, "learning_rate": 7.81104669848911e-06, "loss": 0.4521, "step": 7622 }, { "epoch": 1.166488140780413, "grad_norm": 2.1664042575123195, "learning_rate": 7.808628682717344e-06, "loss": 0.4207, "step": 7623 }, { "epoch": 1.1666411629686304, "grad_norm": 2.0898246135348004, "learning_rate": 7.806210801526262e-06, "loss": 0.3832, "step": 7624 }, { "epoch": 1.1667941851568477, "grad_norm": 2.0408662892538523, "learning_rate": 7.80379305506435e-06, "loss": 0.3524, "step": 7625 }, { "epoch": 1.166947207345065, "grad_norm": 2.2001590021513397, "learning_rate": 7.801375443480106e-06, "loss": 0.4377, "step": 7626 }, { "epoch": 1.1671002295332824, "grad_norm": 2.35247171593286, "learning_rate": 7.798957966921992e-06, "loss": 0.4511, "step": 7627 }, { "epoch": 1.1672532517214997, "grad_norm": 2.093124942157242, "learning_rate": 7.796540625538482e-06, "loss": 0.3984, "step": 7628 }, { "epoch": 1.1674062739097169, "grad_norm": 2.3270121913506743, "learning_rate": 7.794123419478034e-06, "loss": 0.4626, "step": 7629 }, { "epoch": 1.1675592960979342, "grad_norm": 2.235583289520111, "learning_rate": 7.791706348889092e-06, "loss": 0.421, "step": 7630 }, { "epoch": 1.1677123182861515, "grad_norm": 2.2800471006152336, "learning_rate": 7.789289413920104e-06, "loss": 0.4149, "step": 7631 }, { "epoch": 1.1678653404743689, "grad_norm": 2.11975063545846, "learning_rate": 7.786872614719504e-06, "loss": 0.4212, "step": 7632 }, { "epoch": 1.1680183626625862, "grad_norm": 1.976637872464587, "learning_rate": 7.78445595143571e-06, "loss": 0.3641, "step": 7633 }, { "epoch": 1.1681713848508033, "grad_norm": 2.5678946494338426, "learning_rate": 7.782039424217145e-06, "loss": 0.3952, "step": 7634 }, { "epoch": 1.1683244070390206, "grad_norm": 2.4311254543526712, "learning_rate": 7.779623033212213e-06, "loss": 0.5068, "step": 7635 }, { "epoch": 1.168477429227238, "grad_norm": 2.125309875951746, "learning_rate": 7.777206778569323e-06, "loss": 0.39, "step": 7636 }, { "epoch": 1.1686304514154553, "grad_norm": 1.9029565175583325, "learning_rate": 7.774790660436857e-06, "loss": 0.3747, "step": 7637 }, { "epoch": 1.1687834736036726, "grad_norm": 1.8974621969583034, "learning_rate": 7.772374678963199e-06, "loss": 0.3954, "step": 7638 }, { "epoch": 1.1689364957918897, "grad_norm": 2.1731954934867397, "learning_rate": 7.769958834296733e-06, "loss": 0.4806, "step": 7639 }, { "epoch": 1.169089517980107, "grad_norm": 2.164959683654725, "learning_rate": 7.767543126585815e-06, "loss": 0.3921, "step": 7640 }, { "epoch": 1.1692425401683244, "grad_norm": 2.0519145883295917, "learning_rate": 7.765127555978805e-06, "loss": 0.3916, "step": 7641 }, { "epoch": 1.1693955623565417, "grad_norm": 2.022149191067335, "learning_rate": 7.762712122624059e-06, "loss": 0.5355, "step": 7642 }, { "epoch": 1.169548584544759, "grad_norm": 2.2553693306957934, "learning_rate": 7.76029682666991e-06, "loss": 0.4656, "step": 7643 }, { "epoch": 1.1697016067329762, "grad_norm": 2.1677346659488395, "learning_rate": 7.757881668264696e-06, "loss": 0.4061, "step": 7644 }, { "epoch": 1.1698546289211935, "grad_norm": 2.2303487915393183, "learning_rate": 7.755466647556742e-06, "loss": 0.4142, "step": 7645 }, { "epoch": 1.1700076511094109, "grad_norm": 2.2087393847961745, "learning_rate": 7.753051764694358e-06, "loss": 0.4007, "step": 7646 }, { "epoch": 1.1701606732976282, "grad_norm": 2.1866091937779326, "learning_rate": 7.750637019825858e-06, "loss": 0.3785, "step": 7647 }, { "epoch": 1.1703136954858455, "grad_norm": 2.333612890088411, "learning_rate": 7.74822241309954e-06, "loss": 0.488, "step": 7648 }, { "epoch": 1.1704667176740626, "grad_norm": 2.152719088963061, "learning_rate": 7.745807944663688e-06, "loss": 0.421, "step": 7649 }, { "epoch": 1.17061973986228, "grad_norm": 2.322270994926128, "learning_rate": 7.74339361466659e-06, "loss": 0.4393, "step": 7650 }, { "epoch": 1.1707727620504973, "grad_norm": 2.195968124463279, "learning_rate": 7.740979423256518e-06, "loss": 0.4031, "step": 7651 }, { "epoch": 1.1709257842387146, "grad_norm": 2.136519735206509, "learning_rate": 7.738565370581739e-06, "loss": 0.3473, "step": 7652 }, { "epoch": 1.171078806426932, "grad_norm": 2.2023901399428305, "learning_rate": 7.736151456790506e-06, "loss": 0.4934, "step": 7653 }, { "epoch": 1.171231828615149, "grad_norm": 2.2416044379867994, "learning_rate": 7.733737682031066e-06, "loss": 0.4688, "step": 7654 }, { "epoch": 1.1713848508033664, "grad_norm": 1.9033203376997059, "learning_rate": 7.731324046451665e-06, "loss": 0.4178, "step": 7655 }, { "epoch": 1.1715378729915837, "grad_norm": 2.007408800033132, "learning_rate": 7.728910550200528e-06, "loss": 0.4644, "step": 7656 }, { "epoch": 1.171690895179801, "grad_norm": 2.187477421883523, "learning_rate": 7.726497193425875e-06, "loss": 0.4285, "step": 7657 }, { "epoch": 1.1718439173680184, "grad_norm": 2.3383291826778363, "learning_rate": 7.72408397627593e-06, "loss": 0.4946, "step": 7658 }, { "epoch": 1.1719969395562357, "grad_norm": 2.097809814469768, "learning_rate": 7.721670898898886e-06, "loss": 0.4198, "step": 7659 }, { "epoch": 1.1721499617444529, "grad_norm": 2.134611181161027, "learning_rate": 7.719257961442946e-06, "loss": 0.3677, "step": 7660 }, { "epoch": 1.1723029839326702, "grad_norm": 2.0635524558988165, "learning_rate": 7.716845164056301e-06, "loss": 0.3861, "step": 7661 }, { "epoch": 1.1724560061208875, "grad_norm": 2.1668260879700285, "learning_rate": 7.714432506887119e-06, "loss": 0.3923, "step": 7662 }, { "epoch": 1.1726090283091049, "grad_norm": 2.165575599965999, "learning_rate": 7.712019990083583e-06, "loss": 0.3705, "step": 7663 }, { "epoch": 1.1727620504973222, "grad_norm": 2.3510883434857717, "learning_rate": 7.709607613793847e-06, "loss": 0.4414, "step": 7664 }, { "epoch": 1.1729150726855395, "grad_norm": 2.4418747152055733, "learning_rate": 7.707195378166071e-06, "loss": 0.4232, "step": 7665 }, { "epoch": 1.1730680948737566, "grad_norm": 1.8774606626370103, "learning_rate": 7.704783283348396e-06, "loss": 0.3496, "step": 7666 }, { "epoch": 1.173221117061974, "grad_norm": 2.4097174796880125, "learning_rate": 7.702371329488954e-06, "loss": 0.402, "step": 7667 }, { "epoch": 1.1733741392501913, "grad_norm": 2.141900723164932, "learning_rate": 7.699959516735884e-06, "loss": 0.4609, "step": 7668 }, { "epoch": 1.1735271614384086, "grad_norm": 1.9648011311206592, "learning_rate": 7.697547845237294e-06, "loss": 0.307, "step": 7669 }, { "epoch": 1.173680183626626, "grad_norm": 2.014416906522851, "learning_rate": 7.695136315141295e-06, "loss": 0.4412, "step": 7670 }, { "epoch": 1.173833205814843, "grad_norm": 1.9892079843533914, "learning_rate": 7.692724926595998e-06, "loss": 0.386, "step": 7671 }, { "epoch": 1.1739862280030604, "grad_norm": 2.123848925579515, "learning_rate": 7.690313679749484e-06, "loss": 0.4088, "step": 7672 }, { "epoch": 1.1741392501912777, "grad_norm": 2.4469979298476146, "learning_rate": 7.687902574749844e-06, "loss": 0.538, "step": 7673 }, { "epoch": 1.174292272379495, "grad_norm": 2.1653847556064414, "learning_rate": 7.685491611745155e-06, "loss": 0.4599, "step": 7674 }, { "epoch": 1.1744452945677124, "grad_norm": 2.1231595403483468, "learning_rate": 7.683080790883477e-06, "loss": 0.4341, "step": 7675 }, { "epoch": 1.1745983167559295, "grad_norm": 2.180437643743477, "learning_rate": 7.680670112312871e-06, "loss": 0.387, "step": 7676 }, { "epoch": 1.1747513389441469, "grad_norm": 2.1797587090293455, "learning_rate": 7.678259576181391e-06, "loss": 0.4162, "step": 7677 }, { "epoch": 1.1749043611323642, "grad_norm": 1.9300874158917527, "learning_rate": 7.675849182637069e-06, "loss": 0.3977, "step": 7678 }, { "epoch": 1.1750573833205815, "grad_norm": 2.176277192004729, "learning_rate": 7.67343893182794e-06, "loss": 0.3371, "step": 7679 }, { "epoch": 1.1752104055087988, "grad_norm": 2.1934070557560537, "learning_rate": 7.671028823902029e-06, "loss": 0.4144, "step": 7680 }, { "epoch": 1.175363427697016, "grad_norm": 2.2349384227823914, "learning_rate": 7.668618859007351e-06, "loss": 0.3794, "step": 7681 }, { "epoch": 1.1755164498852333, "grad_norm": 2.2353031611686416, "learning_rate": 7.666209037291909e-06, "loss": 0.439, "step": 7682 }, { "epoch": 1.1756694720734506, "grad_norm": 2.511919213185073, "learning_rate": 7.663799358903698e-06, "loss": 0.455, "step": 7683 }, { "epoch": 1.175822494261668, "grad_norm": 2.0920595984190626, "learning_rate": 7.661389823990711e-06, "loss": 0.3794, "step": 7684 }, { "epoch": 1.1759755164498853, "grad_norm": 2.247879750828007, "learning_rate": 7.65898043270092e-06, "loss": 0.4846, "step": 7685 }, { "epoch": 1.1761285386381024, "grad_norm": 2.1945571365625653, "learning_rate": 7.656571185182298e-06, "loss": 0.4249, "step": 7686 }, { "epoch": 1.1762815608263197, "grad_norm": 2.043387306270332, "learning_rate": 7.654162081582812e-06, "loss": 0.3923, "step": 7687 }, { "epoch": 1.176434583014537, "grad_norm": 2.1797498439512193, "learning_rate": 7.651753122050404e-06, "loss": 0.4203, "step": 7688 }, { "epoch": 1.1765876052027544, "grad_norm": 1.9302525156159196, "learning_rate": 7.649344306733026e-06, "loss": 0.3811, "step": 7689 }, { "epoch": 1.1767406273909717, "grad_norm": 2.3062243258549735, "learning_rate": 7.646935635778612e-06, "loss": 0.4832, "step": 7690 }, { "epoch": 1.176893649579189, "grad_norm": 2.2193174384523315, "learning_rate": 7.64452710933508e-06, "loss": 0.4152, "step": 7691 }, { "epoch": 1.1770466717674062, "grad_norm": 2.1635717890959754, "learning_rate": 7.642118727550358e-06, "loss": 0.3953, "step": 7692 }, { "epoch": 1.1771996939556235, "grad_norm": 1.9823843125750913, "learning_rate": 7.639710490572348e-06, "loss": 0.3899, "step": 7693 }, { "epoch": 1.1773527161438408, "grad_norm": 2.0966731774522684, "learning_rate": 7.637302398548949e-06, "loss": 0.3648, "step": 7694 }, { "epoch": 1.1775057383320582, "grad_norm": 2.159828350633099, "learning_rate": 7.634894451628053e-06, "loss": 0.4285, "step": 7695 }, { "epoch": 1.1776587605202755, "grad_norm": 2.314588251676099, "learning_rate": 7.632486649957539e-06, "loss": 0.4209, "step": 7696 }, { "epoch": 1.1778117827084928, "grad_norm": 2.234225308705491, "learning_rate": 7.630078993685286e-06, "loss": 0.3981, "step": 7697 }, { "epoch": 1.17796480489671, "grad_norm": 2.360790111872005, "learning_rate": 7.627671482959152e-06, "loss": 0.4557, "step": 7698 }, { "epoch": 1.1781178270849273, "grad_norm": 2.206115497805982, "learning_rate": 7.625264117926989e-06, "loss": 0.4567, "step": 7699 }, { "epoch": 1.1782708492731446, "grad_norm": 2.7009488220205395, "learning_rate": 7.622856898736652e-06, "loss": 0.3969, "step": 7700 }, { "epoch": 1.178423871461362, "grad_norm": 1.9240114837643656, "learning_rate": 7.6204498255359674e-06, "loss": 0.4017, "step": 7701 }, { "epoch": 1.1785768936495793, "grad_norm": 2.0628563892650558, "learning_rate": 7.618042898472771e-06, "loss": 0.4437, "step": 7702 }, { "epoch": 1.1787299158377964, "grad_norm": 1.8012689942102933, "learning_rate": 7.6156361176948804e-06, "loss": 0.3697, "step": 7703 }, { "epoch": 1.1788829380260137, "grad_norm": 2.22070307433343, "learning_rate": 7.613229483350095e-06, "loss": 0.4171, "step": 7704 }, { "epoch": 1.179035960214231, "grad_norm": 2.312836250788219, "learning_rate": 7.610822995586234e-06, "loss": 0.4379, "step": 7705 }, { "epoch": 1.1791889824024484, "grad_norm": 2.2321765546942665, "learning_rate": 7.608416654551077e-06, "loss": 0.3946, "step": 7706 }, { "epoch": 1.1793420045906657, "grad_norm": 2.1449678132243677, "learning_rate": 7.606010460392402e-06, "loss": 0.3959, "step": 7707 }, { "epoch": 1.1794950267788828, "grad_norm": 2.1178482207877836, "learning_rate": 7.603604413257998e-06, "loss": 0.4641, "step": 7708 }, { "epoch": 1.1796480489671002, "grad_norm": 2.277472552097201, "learning_rate": 7.601198513295618e-06, "loss": 0.4133, "step": 7709 }, { "epoch": 1.1798010711553175, "grad_norm": 2.3060438068546674, "learning_rate": 7.598792760653022e-06, "loss": 0.3717, "step": 7710 }, { "epoch": 1.1799540933435348, "grad_norm": 2.212645777195339, "learning_rate": 7.5963871554779586e-06, "loss": 0.4233, "step": 7711 }, { "epoch": 1.1801071155317522, "grad_norm": 2.288782015626369, "learning_rate": 7.593981697918159e-06, "loss": 0.3845, "step": 7712 }, { "epoch": 1.1802601377199693, "grad_norm": 1.9850372928797795, "learning_rate": 7.5915763881213576e-06, "loss": 0.3853, "step": 7713 }, { "epoch": 1.1804131599081866, "grad_norm": 2.1714931935713127, "learning_rate": 7.589171226235276e-06, "loss": 0.3907, "step": 7714 }, { "epoch": 1.180566182096404, "grad_norm": 1.9973204967252072, "learning_rate": 7.586766212407615e-06, "loss": 0.3507, "step": 7715 }, { "epoch": 1.1807192042846213, "grad_norm": 2.041853555787875, "learning_rate": 7.584361346786082e-06, "loss": 0.4383, "step": 7716 }, { "epoch": 1.1808722264728386, "grad_norm": 2.0279848709164843, "learning_rate": 7.581956629518369e-06, "loss": 0.4229, "step": 7717 }, { "epoch": 1.1810252486610557, "grad_norm": 2.06147392563384, "learning_rate": 7.579552060752162e-06, "loss": 0.4688, "step": 7718 }, { "epoch": 1.181178270849273, "grad_norm": 2.257617092359455, "learning_rate": 7.57714764063513e-06, "loss": 0.4485, "step": 7719 }, { "epoch": 1.1813312930374904, "grad_norm": 2.089931676977981, "learning_rate": 7.574743369314937e-06, "loss": 0.4228, "step": 7720 }, { "epoch": 1.1814843152257077, "grad_norm": 2.018731297347733, "learning_rate": 7.572339246939247e-06, "loss": 0.3592, "step": 7721 }, { "epoch": 1.181637337413925, "grad_norm": 1.9133562808684277, "learning_rate": 7.569935273655696e-06, "loss": 0.349, "step": 7722 }, { "epoch": 1.1817903596021424, "grad_norm": 2.3225293651885828, "learning_rate": 7.567531449611927e-06, "loss": 0.3854, "step": 7723 }, { "epoch": 1.1819433817903595, "grad_norm": 2.080329898307183, "learning_rate": 7.565127774955571e-06, "loss": 0.4338, "step": 7724 }, { "epoch": 1.1820964039785768, "grad_norm": 2.127662122852471, "learning_rate": 7.562724249834239e-06, "loss": 0.4683, "step": 7725 }, { "epoch": 1.1822494261667942, "grad_norm": 2.1114355637464133, "learning_rate": 7.560320874395547e-06, "loss": 0.3616, "step": 7726 }, { "epoch": 1.1824024483550115, "grad_norm": 2.3223541842836237, "learning_rate": 7.557917648787097e-06, "loss": 0.4541, "step": 7727 }, { "epoch": 1.1825554705432288, "grad_norm": 2.153187629446042, "learning_rate": 7.555514573156474e-06, "loss": 0.4665, "step": 7728 }, { "epoch": 1.1827084927314462, "grad_norm": 2.115658998412094, "learning_rate": 7.553111647651266e-06, "loss": 0.3996, "step": 7729 }, { "epoch": 1.1828615149196633, "grad_norm": 2.293884325074409, "learning_rate": 7.5507088724190445e-06, "loss": 0.4079, "step": 7730 }, { "epoch": 1.1830145371078806, "grad_norm": 2.3765090288857658, "learning_rate": 7.54830624760737e-06, "loss": 0.4425, "step": 7731 }, { "epoch": 1.183167559296098, "grad_norm": 1.9207125051904854, "learning_rate": 7.5459037733638016e-06, "loss": 0.3427, "step": 7732 }, { "epoch": 1.1833205814843153, "grad_norm": 2.115216638237201, "learning_rate": 7.54350144983588e-06, "loss": 0.4216, "step": 7733 }, { "epoch": 1.1834736036725326, "grad_norm": 2.072322814839418, "learning_rate": 7.54109927717115e-06, "loss": 0.4091, "step": 7734 }, { "epoch": 1.1836266258607497, "grad_norm": 1.972905904780838, "learning_rate": 7.53869725551713e-06, "loss": 0.3766, "step": 7735 }, { "epoch": 1.183779648048967, "grad_norm": 2.169516411743189, "learning_rate": 7.536295385021338e-06, "loss": 0.3662, "step": 7736 }, { "epoch": 1.1839326702371844, "grad_norm": 2.0957775433273538, "learning_rate": 7.5338936658312885e-06, "loss": 0.398, "step": 7737 }, { "epoch": 1.1840856924254017, "grad_norm": 2.1343320274405793, "learning_rate": 7.531492098094473e-06, "loss": 0.4014, "step": 7738 }, { "epoch": 1.184238714613619, "grad_norm": 2.378363664272812, "learning_rate": 7.5290906819583865e-06, "loss": 0.4425, "step": 7739 }, { "epoch": 1.1843917368018362, "grad_norm": 2.2596488259073535, "learning_rate": 7.52668941757051e-06, "loss": 0.4086, "step": 7740 }, { "epoch": 1.1845447589900535, "grad_norm": 2.2518267482599135, "learning_rate": 7.524288305078307e-06, "loss": 0.3888, "step": 7741 }, { "epoch": 1.1846977811782708, "grad_norm": 2.2319015517268386, "learning_rate": 7.521887344629246e-06, "loss": 0.4249, "step": 7742 }, { "epoch": 1.1848508033664882, "grad_norm": 2.099102832105148, "learning_rate": 7.519486536370779e-06, "loss": 0.3981, "step": 7743 }, { "epoch": 1.1850038255547055, "grad_norm": 2.326704059378749, "learning_rate": 7.517085880450345e-06, "loss": 0.4581, "step": 7744 }, { "epoch": 1.1851568477429226, "grad_norm": 2.3914827027493297, "learning_rate": 7.514685377015383e-06, "loss": 0.394, "step": 7745 }, { "epoch": 1.18530986993114, "grad_norm": 1.8983373970935058, "learning_rate": 7.512285026213311e-06, "loss": 0.3382, "step": 7746 }, { "epoch": 1.1854628921193573, "grad_norm": 2.3908709484472523, "learning_rate": 7.509884828191552e-06, "loss": 0.4461, "step": 7747 }, { "epoch": 1.1856159143075746, "grad_norm": 2.483737125826371, "learning_rate": 7.5074847830975054e-06, "loss": 0.4697, "step": 7748 }, { "epoch": 1.185768936495792, "grad_norm": 2.1147090426635224, "learning_rate": 7.505084891078566e-06, "loss": 0.3533, "step": 7749 }, { "epoch": 1.185921958684009, "grad_norm": 2.1135289311255407, "learning_rate": 7.5026851522821286e-06, "loss": 0.4444, "step": 7750 }, { "epoch": 1.1860749808722264, "grad_norm": 2.1675730484291864, "learning_rate": 7.500285566855564e-06, "loss": 0.3763, "step": 7751 }, { "epoch": 1.1862280030604437, "grad_norm": 2.1575555766890706, "learning_rate": 7.497886134946238e-06, "loss": 0.4758, "step": 7752 }, { "epoch": 1.186381025248661, "grad_norm": 1.7799980041205554, "learning_rate": 7.49548685670152e-06, "loss": 0.3235, "step": 7753 }, { "epoch": 1.1865340474368784, "grad_norm": 2.344961863500604, "learning_rate": 7.493087732268744e-06, "loss": 0.4517, "step": 7754 }, { "epoch": 1.1866870696250955, "grad_norm": 2.3828288288807684, "learning_rate": 7.490688761795262e-06, "loss": 0.4251, "step": 7755 }, { "epoch": 1.1868400918133128, "grad_norm": 2.224048831830438, "learning_rate": 7.4882899454284e-06, "loss": 0.4147, "step": 7756 }, { "epoch": 1.1869931140015302, "grad_norm": 2.3063506729017655, "learning_rate": 7.485891283315475e-06, "loss": 0.3875, "step": 7757 }, { "epoch": 1.1871461361897475, "grad_norm": 2.369613366127151, "learning_rate": 7.483492775603803e-06, "loss": 0.4198, "step": 7758 }, { "epoch": 1.1872991583779648, "grad_norm": 2.2857022695312095, "learning_rate": 7.481094422440688e-06, "loss": 0.4651, "step": 7759 }, { "epoch": 1.1874521805661822, "grad_norm": 1.9585673421612289, "learning_rate": 7.478696223973413e-06, "loss": 0.3196, "step": 7760 }, { "epoch": 1.1876052027543993, "grad_norm": 2.103682155279868, "learning_rate": 7.476298180349269e-06, "loss": 0.403, "step": 7761 }, { "epoch": 1.1877582249426166, "grad_norm": 1.8589192664213134, "learning_rate": 7.4739002917155235e-06, "loss": 0.3739, "step": 7762 }, { "epoch": 1.187911247130834, "grad_norm": 2.2039683638286203, "learning_rate": 7.471502558219448e-06, "loss": 0.4164, "step": 7763 }, { "epoch": 1.1880642693190513, "grad_norm": 2.285658684641684, "learning_rate": 7.469104980008291e-06, "loss": 0.4457, "step": 7764 }, { "epoch": 1.1882172915072686, "grad_norm": 2.0870031789407077, "learning_rate": 7.466707557229294e-06, "loss": 0.4712, "step": 7765 }, { "epoch": 1.188370313695486, "grad_norm": 1.981110909098681, "learning_rate": 7.464310290029702e-06, "loss": 0.442, "step": 7766 }, { "epoch": 1.188523335883703, "grad_norm": 2.146376730190204, "learning_rate": 7.461913178556731e-06, "loss": 0.4094, "step": 7767 }, { "epoch": 1.1886763580719204, "grad_norm": 2.1228310670875827, "learning_rate": 7.4595162229576005e-06, "loss": 0.425, "step": 7768 }, { "epoch": 1.1888293802601377, "grad_norm": 2.0450434435134115, "learning_rate": 7.45711942337952e-06, "loss": 0.3992, "step": 7769 }, { "epoch": 1.188982402448355, "grad_norm": 2.072140769338632, "learning_rate": 7.45472277996968e-06, "loss": 0.3567, "step": 7770 }, { "epoch": 1.1891354246365724, "grad_norm": 2.1452549233035163, "learning_rate": 7.452326292875273e-06, "loss": 0.3996, "step": 7771 }, { "epoch": 1.1892884468247895, "grad_norm": 2.0564488471141025, "learning_rate": 7.449929962243478e-06, "loss": 0.4003, "step": 7772 }, { "epoch": 1.1894414690130068, "grad_norm": 2.2765367450207177, "learning_rate": 7.447533788221454e-06, "loss": 0.4523, "step": 7773 }, { "epoch": 1.1895944912012242, "grad_norm": 2.474886870340549, "learning_rate": 7.445137770956368e-06, "loss": 0.4561, "step": 7774 }, { "epoch": 1.1897475133894415, "grad_norm": 2.296110569783041, "learning_rate": 7.442741910595365e-06, "loss": 0.4152, "step": 7775 }, { "epoch": 1.1899005355776588, "grad_norm": 2.3301560185251726, "learning_rate": 7.440346207285589e-06, "loss": 0.4871, "step": 7776 }, { "epoch": 1.190053557765876, "grad_norm": 1.8039552048846907, "learning_rate": 7.437950661174164e-06, "loss": 0.3184, "step": 7777 }, { "epoch": 1.1902065799540933, "grad_norm": 2.1926490325759, "learning_rate": 7.435555272408208e-06, "loss": 0.4312, "step": 7778 }, { "epoch": 1.1903596021423106, "grad_norm": 2.104249637935987, "learning_rate": 7.433160041134841e-06, "loss": 0.3409, "step": 7779 }, { "epoch": 1.190512624330528, "grad_norm": 2.20756794295377, "learning_rate": 7.430764967501155e-06, "loss": 0.3749, "step": 7780 }, { "epoch": 1.1906656465187453, "grad_norm": 2.069229329976173, "learning_rate": 7.428370051654241e-06, "loss": 0.3903, "step": 7781 }, { "epoch": 1.1908186687069624, "grad_norm": 2.2207924833034087, "learning_rate": 7.425975293741186e-06, "loss": 0.4389, "step": 7782 }, { "epoch": 1.1909716908951797, "grad_norm": 2.2331530214251623, "learning_rate": 7.4235806939090515e-06, "loss": 0.4131, "step": 7783 }, { "epoch": 1.191124713083397, "grad_norm": 2.2504109035986026, "learning_rate": 7.421186252304913e-06, "loss": 0.4731, "step": 7784 }, { "epoch": 1.1912777352716144, "grad_norm": 2.371044724991984, "learning_rate": 7.4187919690758145e-06, "loss": 0.4377, "step": 7785 }, { "epoch": 1.1914307574598317, "grad_norm": 2.242523834065211, "learning_rate": 7.416397844368792e-06, "loss": 0.3621, "step": 7786 }, { "epoch": 1.1915837796480488, "grad_norm": 2.2945870940039668, "learning_rate": 7.414003878330893e-06, "loss": 0.3849, "step": 7787 }, { "epoch": 1.1917368018362662, "grad_norm": 2.266870340168214, "learning_rate": 7.411610071109131e-06, "loss": 0.4038, "step": 7788 }, { "epoch": 1.1918898240244835, "grad_norm": 2.1237321269660834, "learning_rate": 7.409216422850514e-06, "loss": 0.3927, "step": 7789 }, { "epoch": 1.1920428462127008, "grad_norm": 2.3142228870616126, "learning_rate": 7.406822933702058e-06, "loss": 0.4434, "step": 7790 }, { "epoch": 1.1921958684009182, "grad_norm": 2.0955604959527068, "learning_rate": 7.404429603810747e-06, "loss": 0.3222, "step": 7791 }, { "epoch": 1.1923488905891355, "grad_norm": 1.9617282423466649, "learning_rate": 7.402036433323569e-06, "loss": 0.3517, "step": 7792 }, { "epoch": 1.1925019127773526, "grad_norm": 1.9869255508557389, "learning_rate": 7.399643422387499e-06, "loss": 0.3596, "step": 7793 }, { "epoch": 1.19265493496557, "grad_norm": 2.2481717873813367, "learning_rate": 7.397250571149496e-06, "loss": 0.4018, "step": 7794 }, { "epoch": 1.1928079571537873, "grad_norm": 2.267844809299893, "learning_rate": 7.3948578797565185e-06, "loss": 0.4399, "step": 7795 }, { "epoch": 1.1929609793420046, "grad_norm": 2.296245229262881, "learning_rate": 7.392465348355512e-06, "loss": 0.4375, "step": 7796 }, { "epoch": 1.193114001530222, "grad_norm": 2.4275607536962585, "learning_rate": 7.390072977093405e-06, "loss": 0.4886, "step": 7797 }, { "epoch": 1.1932670237184393, "grad_norm": 2.1878475452491633, "learning_rate": 7.387680766117129e-06, "loss": 0.4665, "step": 7798 }, { "epoch": 1.1934200459066564, "grad_norm": 2.2456587752248445, "learning_rate": 7.3852887155735955e-06, "loss": 0.4437, "step": 7799 }, { "epoch": 1.1935730680948737, "grad_norm": 2.395071968132589, "learning_rate": 7.382896825609714e-06, "loss": 0.456, "step": 7800 }, { "epoch": 1.193726090283091, "grad_norm": 2.1758010578869738, "learning_rate": 7.380505096372375e-06, "loss": 0.3582, "step": 7801 }, { "epoch": 1.1938791124713084, "grad_norm": 2.457008422878569, "learning_rate": 7.3781135280084615e-06, "loss": 0.4464, "step": 7802 }, { "epoch": 1.1940321346595257, "grad_norm": 2.0253508595174394, "learning_rate": 7.375722120664859e-06, "loss": 0.3641, "step": 7803 }, { "epoch": 1.1941851568477428, "grad_norm": 2.207183732033762, "learning_rate": 7.373330874488422e-06, "loss": 0.4679, "step": 7804 }, { "epoch": 1.1943381790359602, "grad_norm": 1.9874369076730363, "learning_rate": 7.370939789626016e-06, "loss": 0.3426, "step": 7805 }, { "epoch": 1.1944912012241775, "grad_norm": 2.2001806026700153, "learning_rate": 7.368548866224483e-06, "loss": 0.3841, "step": 7806 }, { "epoch": 1.1946442234123948, "grad_norm": 1.9436191856311789, "learning_rate": 7.366158104430654e-06, "loss": 0.3105, "step": 7807 }, { "epoch": 1.1947972456006122, "grad_norm": 2.0716591900676145, "learning_rate": 7.363767504391362e-06, "loss": 0.3567, "step": 7808 }, { "epoch": 1.1949502677888293, "grad_norm": 2.3756768448765504, "learning_rate": 7.361377066253424e-06, "loss": 0.4912, "step": 7809 }, { "epoch": 1.1951032899770466, "grad_norm": 2.044315279391537, "learning_rate": 7.358986790163636e-06, "loss": 0.3538, "step": 7810 }, { "epoch": 1.195256312165264, "grad_norm": 1.9855034404666652, "learning_rate": 7.356596676268804e-06, "loss": 0.3895, "step": 7811 }, { "epoch": 1.1954093343534813, "grad_norm": 2.315701745879983, "learning_rate": 7.354206724715709e-06, "loss": 0.4436, "step": 7812 }, { "epoch": 1.1955623565416986, "grad_norm": 2.134487707383376, "learning_rate": 7.3518169356511335e-06, "loss": 0.3905, "step": 7813 }, { "epoch": 1.1957153787299157, "grad_norm": 2.0187548298884543, "learning_rate": 7.349427309221838e-06, "loss": 0.3984, "step": 7814 }, { "epoch": 1.195868400918133, "grad_norm": 2.271455573960681, "learning_rate": 7.347037845574578e-06, "loss": 0.3748, "step": 7815 }, { "epoch": 1.1960214231063504, "grad_norm": 2.5486034536590987, "learning_rate": 7.344648544856108e-06, "loss": 0.5062, "step": 7816 }, { "epoch": 1.1961744452945677, "grad_norm": 2.222378931229507, "learning_rate": 7.342259407213155e-06, "loss": 0.3106, "step": 7817 }, { "epoch": 1.196327467482785, "grad_norm": 2.384115619572824, "learning_rate": 7.339870432792448e-06, "loss": 0.4188, "step": 7818 }, { "epoch": 1.1964804896710022, "grad_norm": 2.4136145453264652, "learning_rate": 7.337481621740707e-06, "loss": 0.4704, "step": 7819 }, { "epoch": 1.1966335118592195, "grad_norm": 2.081016144614866, "learning_rate": 7.335092974204632e-06, "loss": 0.409, "step": 7820 }, { "epoch": 1.1967865340474368, "grad_norm": 2.0694517272109563, "learning_rate": 7.332704490330924e-06, "loss": 0.3886, "step": 7821 }, { "epoch": 1.1969395562356542, "grad_norm": 2.015225672566741, "learning_rate": 7.3303161702662715e-06, "loss": 0.3692, "step": 7822 }, { "epoch": 1.1970925784238715, "grad_norm": 2.503758938970734, "learning_rate": 7.327928014157341e-06, "loss": 0.4168, "step": 7823 }, { "epoch": 1.1972456006120888, "grad_norm": 2.1923521871806626, "learning_rate": 7.3255400221508076e-06, "loss": 0.4147, "step": 7824 }, { "epoch": 1.197398622800306, "grad_norm": 1.9996996439024073, "learning_rate": 7.323152194393326e-06, "loss": 0.424, "step": 7825 }, { "epoch": 1.1975516449885233, "grad_norm": 2.3176090940404004, "learning_rate": 7.320764531031535e-06, "loss": 0.4398, "step": 7826 }, { "epoch": 1.1977046671767406, "grad_norm": 2.0145284165200503, "learning_rate": 7.318377032212078e-06, "loss": 0.361, "step": 7827 }, { "epoch": 1.197857689364958, "grad_norm": 1.818639838520028, "learning_rate": 7.315989698081576e-06, "loss": 0.3775, "step": 7828 }, { "epoch": 1.1980107115531753, "grad_norm": 2.1098549855251676, "learning_rate": 7.313602528786649e-06, "loss": 0.3689, "step": 7829 }, { "epoch": 1.1981637337413926, "grad_norm": 2.128136439868593, "learning_rate": 7.3112155244739e-06, "loss": 0.4344, "step": 7830 }, { "epoch": 1.1983167559296097, "grad_norm": 2.4135696579714314, "learning_rate": 7.3088286852899224e-06, "loss": 0.4493, "step": 7831 }, { "epoch": 1.198469778117827, "grad_norm": 2.0006879555739894, "learning_rate": 7.306442011381307e-06, "loss": 0.4525, "step": 7832 }, { "epoch": 1.1986228003060444, "grad_norm": 2.051025939461601, "learning_rate": 7.3040555028946225e-06, "loss": 0.4063, "step": 7833 }, { "epoch": 1.1987758224942617, "grad_norm": 2.034625467920058, "learning_rate": 7.301669159976434e-06, "loss": 0.4244, "step": 7834 }, { "epoch": 1.198928844682479, "grad_norm": 2.0888212266302326, "learning_rate": 7.299282982773301e-06, "loss": 0.3443, "step": 7835 }, { "epoch": 1.1990818668706962, "grad_norm": 1.9813426666478027, "learning_rate": 7.296896971431764e-06, "loss": 0.3808, "step": 7836 }, { "epoch": 1.1992348890589135, "grad_norm": 2.1295621512831446, "learning_rate": 7.2945111260983584e-06, "loss": 0.3701, "step": 7837 }, { "epoch": 1.1993879112471308, "grad_norm": 2.0436205396614646, "learning_rate": 7.292125446919611e-06, "loss": 0.3835, "step": 7838 }, { "epoch": 1.1995409334353482, "grad_norm": 2.119557743892499, "learning_rate": 7.289739934042031e-06, "loss": 0.3901, "step": 7839 }, { "epoch": 1.1996939556235655, "grad_norm": 2.300357163359858, "learning_rate": 7.287354587612123e-06, "loss": 0.424, "step": 7840 }, { "epoch": 1.1998469778117826, "grad_norm": 2.085956552889289, "learning_rate": 7.284969407776382e-06, "loss": 0.3756, "step": 7841 }, { "epoch": 1.2, "grad_norm": 2.3081369285656725, "learning_rate": 7.282584394681294e-06, "loss": 0.4268, "step": 7842 }, { "epoch": 1.2001530221882173, "grad_norm": 2.353996381043388, "learning_rate": 7.280199548473328e-06, "loss": 0.4335, "step": 7843 }, { "epoch": 1.2003060443764346, "grad_norm": 2.414828978611619, "learning_rate": 7.277814869298945e-06, "loss": 0.4901, "step": 7844 }, { "epoch": 1.200459066564652, "grad_norm": 2.3718399813628452, "learning_rate": 7.275430357304604e-06, "loss": 0.4031, "step": 7845 }, { "epoch": 1.200612088752869, "grad_norm": 2.2121027430213163, "learning_rate": 7.273046012636742e-06, "loss": 0.4049, "step": 7846 }, { "epoch": 1.2007651109410864, "grad_norm": 2.0996973379693897, "learning_rate": 7.270661835441789e-06, "loss": 0.3518, "step": 7847 }, { "epoch": 1.2009181331293037, "grad_norm": 2.3792243056952844, "learning_rate": 7.268277825866175e-06, "loss": 0.4506, "step": 7848 }, { "epoch": 1.201071155317521, "grad_norm": 2.2470123496158294, "learning_rate": 7.265893984056302e-06, "loss": 0.4264, "step": 7849 }, { "epoch": 1.2012241775057384, "grad_norm": 2.6193999301938273, "learning_rate": 7.263510310158577e-06, "loss": 0.4304, "step": 7850 }, { "epoch": 1.2013771996939555, "grad_norm": 1.9781200787965287, "learning_rate": 7.261126804319391e-06, "loss": 0.3284, "step": 7851 }, { "epoch": 1.2015302218821728, "grad_norm": 2.0903308853535227, "learning_rate": 7.258743466685119e-06, "loss": 0.3898, "step": 7852 }, { "epoch": 1.2016832440703902, "grad_norm": 2.0779995520765464, "learning_rate": 7.2563602974021365e-06, "loss": 0.3994, "step": 7853 }, { "epoch": 1.2018362662586075, "grad_norm": 2.185208271217259, "learning_rate": 7.253977296616802e-06, "loss": 0.4303, "step": 7854 }, { "epoch": 1.2019892884468248, "grad_norm": 2.093158167390441, "learning_rate": 7.251594464475462e-06, "loss": 0.4102, "step": 7855 }, { "epoch": 1.2021423106350422, "grad_norm": 2.474345761675588, "learning_rate": 7.249211801124459e-06, "loss": 0.3867, "step": 7856 }, { "epoch": 1.2022953328232593, "grad_norm": 2.354161944422409, "learning_rate": 7.246829306710118e-06, "loss": 0.4842, "step": 7857 }, { "epoch": 1.2024483550114766, "grad_norm": 2.066474315946716, "learning_rate": 7.244446981378764e-06, "loss": 0.3929, "step": 7858 }, { "epoch": 1.202601377199694, "grad_norm": 2.2841546405429742, "learning_rate": 7.242064825276699e-06, "loss": 0.3933, "step": 7859 }, { "epoch": 1.2027543993879113, "grad_norm": 2.421878768693007, "learning_rate": 7.239682838550219e-06, "loss": 0.4667, "step": 7860 }, { "epoch": 1.2029074215761286, "grad_norm": 2.121335990190176, "learning_rate": 7.237301021345621e-06, "loss": 0.4164, "step": 7861 }, { "epoch": 1.203060443764346, "grad_norm": 2.2477816426607347, "learning_rate": 7.23491937380917e-06, "loss": 0.4141, "step": 7862 }, { "epoch": 1.203213465952563, "grad_norm": 2.0842774961175965, "learning_rate": 7.232537896087138e-06, "loss": 0.4256, "step": 7863 }, { "epoch": 1.2033664881407804, "grad_norm": 2.3760441894624034, "learning_rate": 7.230156588325783e-06, "loss": 0.4749, "step": 7864 }, { "epoch": 1.2035195103289977, "grad_norm": 2.8127024874878117, "learning_rate": 7.2277754506713415e-06, "loss": 0.3896, "step": 7865 }, { "epoch": 1.203672532517215, "grad_norm": 2.3056681652953546, "learning_rate": 7.22539448327006e-06, "loss": 0.4713, "step": 7866 }, { "epoch": 1.2038255547054324, "grad_norm": 2.038914674694339, "learning_rate": 7.223013686268159e-06, "loss": 0.3701, "step": 7867 }, { "epoch": 1.2039785768936495, "grad_norm": 2.279184568859362, "learning_rate": 7.2206330598118435e-06, "loss": 0.5052, "step": 7868 }, { "epoch": 1.2041315990818668, "grad_norm": 2.362094871311031, "learning_rate": 7.218252604047331e-06, "loss": 0.4475, "step": 7869 }, { "epoch": 1.2042846212700842, "grad_norm": 2.2299992118588188, "learning_rate": 7.215872319120809e-06, "loss": 0.4853, "step": 7870 }, { "epoch": 1.2044376434583015, "grad_norm": 1.8836964001194594, "learning_rate": 7.213492205178454e-06, "loss": 0.2751, "step": 7871 }, { "epoch": 1.2045906656465188, "grad_norm": 2.1731065858170515, "learning_rate": 7.2111122623664486e-06, "loss": 0.4221, "step": 7872 }, { "epoch": 1.204743687834736, "grad_norm": 2.042405359275383, "learning_rate": 7.208732490830945e-06, "loss": 0.3539, "step": 7873 }, { "epoch": 1.2048967100229533, "grad_norm": 1.916086959905358, "learning_rate": 7.206352890718102e-06, "loss": 0.3526, "step": 7874 }, { "epoch": 1.2050497322111706, "grad_norm": 2.08298656195251, "learning_rate": 7.203973462174059e-06, "loss": 0.3444, "step": 7875 }, { "epoch": 1.205202754399388, "grad_norm": 2.127087834921326, "learning_rate": 7.201594205344937e-06, "loss": 0.3774, "step": 7876 }, { "epoch": 1.2053557765876053, "grad_norm": 1.8822819513462488, "learning_rate": 7.199215120376866e-06, "loss": 0.4222, "step": 7877 }, { "epoch": 1.2055087987758224, "grad_norm": 2.0069738268178043, "learning_rate": 7.19683620741595e-06, "loss": 0.3617, "step": 7878 }, { "epoch": 1.2056618209640397, "grad_norm": 2.347032617268608, "learning_rate": 7.1944574666082925e-06, "loss": 0.4167, "step": 7879 }, { "epoch": 1.205814843152257, "grad_norm": 1.9841895774201872, "learning_rate": 7.192078898099975e-06, "loss": 0.3959, "step": 7880 }, { "epoch": 1.2059678653404744, "grad_norm": 2.1545952125519254, "learning_rate": 7.189700502037073e-06, "loss": 0.3581, "step": 7881 }, { "epoch": 1.2061208875286917, "grad_norm": 2.2942907440257345, "learning_rate": 7.187322278565665e-06, "loss": 0.4123, "step": 7882 }, { "epoch": 1.2062739097169088, "grad_norm": 2.3337214188107076, "learning_rate": 7.184944227831794e-06, "loss": 0.4186, "step": 7883 }, { "epoch": 1.2064269319051262, "grad_norm": 2.1105765004274546, "learning_rate": 7.18256634998151e-06, "loss": 0.3696, "step": 7884 }, { "epoch": 1.2065799540933435, "grad_norm": 1.8495228363779856, "learning_rate": 7.180188645160851e-06, "loss": 0.3263, "step": 7885 }, { "epoch": 1.2067329762815608, "grad_norm": 2.178466457135846, "learning_rate": 7.1778111135158355e-06, "loss": 0.4567, "step": 7886 }, { "epoch": 1.2068859984697782, "grad_norm": 2.329996121904899, "learning_rate": 7.175433755192483e-06, "loss": 0.3929, "step": 7887 }, { "epoch": 1.2070390206579953, "grad_norm": 2.7057684229341143, "learning_rate": 7.173056570336794e-06, "loss": 0.4107, "step": 7888 }, { "epoch": 1.2071920428462126, "grad_norm": 2.2813650165867045, "learning_rate": 7.170679559094755e-06, "loss": 0.3918, "step": 7889 }, { "epoch": 1.20734506503443, "grad_norm": 2.2649980429048617, "learning_rate": 7.168302721612357e-06, "loss": 0.3625, "step": 7890 }, { "epoch": 1.2074980872226473, "grad_norm": 1.9690419186555863, "learning_rate": 7.165926058035566e-06, "loss": 0.3964, "step": 7891 }, { "epoch": 1.2076511094108646, "grad_norm": 2.402097955904389, "learning_rate": 7.163549568510341e-06, "loss": 0.4267, "step": 7892 }, { "epoch": 1.207804131599082, "grad_norm": 2.241109535031182, "learning_rate": 7.161173253182634e-06, "loss": 0.3604, "step": 7893 }, { "epoch": 1.207957153787299, "grad_norm": 2.3475139013875213, "learning_rate": 7.1587971121983815e-06, "loss": 0.4277, "step": 7894 }, { "epoch": 1.2081101759755164, "grad_norm": 2.4465913526801844, "learning_rate": 7.156421145703517e-06, "loss": 0.3985, "step": 7895 }, { "epoch": 1.2082631981637337, "grad_norm": 2.3863337251537318, "learning_rate": 7.154045353843952e-06, "loss": 0.3679, "step": 7896 }, { "epoch": 1.208416220351951, "grad_norm": 2.126924455000003, "learning_rate": 7.151669736765594e-06, "loss": 0.3994, "step": 7897 }, { "epoch": 1.2085692425401684, "grad_norm": 2.304629645464992, "learning_rate": 7.149294294614344e-06, "loss": 0.3881, "step": 7898 }, { "epoch": 1.2087222647283857, "grad_norm": 2.10307375531413, "learning_rate": 7.146919027536081e-06, "loss": 0.407, "step": 7899 }, { "epoch": 1.2088752869166028, "grad_norm": 2.1356508233173357, "learning_rate": 7.14454393567668e-06, "loss": 0.3948, "step": 7900 }, { "epoch": 1.2090283091048202, "grad_norm": 2.1661367713580146, "learning_rate": 7.14216901918201e-06, "loss": 0.369, "step": 7901 }, { "epoch": 1.2091813312930375, "grad_norm": 2.073359287919676, "learning_rate": 7.1397942781979165e-06, "loss": 0.4001, "step": 7902 }, { "epoch": 1.2093343534812548, "grad_norm": 2.285306774993513, "learning_rate": 7.137419712870248e-06, "loss": 0.4791, "step": 7903 }, { "epoch": 1.2094873756694722, "grad_norm": 2.2565147922670272, "learning_rate": 7.135045323344836e-06, "loss": 0.4129, "step": 7904 }, { "epoch": 1.2096403978576893, "grad_norm": 2.205771600598996, "learning_rate": 7.132671109767493e-06, "loss": 0.4151, "step": 7905 }, { "epoch": 1.2097934200459066, "grad_norm": 2.3554198855954804, "learning_rate": 7.130297072284037e-06, "loss": 0.3735, "step": 7906 }, { "epoch": 1.209946442234124, "grad_norm": 2.460816659992818, "learning_rate": 7.127923211040267e-06, "loss": 0.4421, "step": 7907 }, { "epoch": 1.2100994644223413, "grad_norm": 2.2894306166581697, "learning_rate": 7.125549526181963e-06, "loss": 0.4287, "step": 7908 }, { "epoch": 1.2102524866105586, "grad_norm": 2.2431820015839734, "learning_rate": 7.123176017854909e-06, "loss": 0.3583, "step": 7909 }, { "epoch": 1.2104055087987757, "grad_norm": 2.3312818039925576, "learning_rate": 7.120802686204869e-06, "loss": 0.4208, "step": 7910 }, { "epoch": 1.210558530986993, "grad_norm": 2.3032859188438737, "learning_rate": 7.118429531377604e-06, "loss": 0.4121, "step": 7911 }, { "epoch": 1.2107115531752104, "grad_norm": 2.251670699371594, "learning_rate": 7.116056553518851e-06, "loss": 0.4033, "step": 7912 }, { "epoch": 1.2108645753634277, "grad_norm": 2.0986050604327278, "learning_rate": 7.113683752774345e-06, "loss": 0.3579, "step": 7913 }, { "epoch": 1.211017597551645, "grad_norm": 2.2418784167236545, "learning_rate": 7.1113111292898174e-06, "loss": 0.407, "step": 7914 }, { "epoch": 1.2111706197398622, "grad_norm": 2.163620185778059, "learning_rate": 7.108938683210968e-06, "loss": 0.4055, "step": 7915 }, { "epoch": 1.2113236419280795, "grad_norm": 2.355202682771081, "learning_rate": 7.106566414683506e-06, "loss": 0.4669, "step": 7916 }, { "epoch": 1.2114766641162968, "grad_norm": 2.1271629737901625, "learning_rate": 7.104194323853122e-06, "loss": 0.3561, "step": 7917 }, { "epoch": 1.2116296863045142, "grad_norm": 2.0575868876645096, "learning_rate": 7.101822410865489e-06, "loss": 0.3592, "step": 7918 }, { "epoch": 1.2117827084927315, "grad_norm": 2.0538971546012696, "learning_rate": 7.099450675866282e-06, "loss": 0.4481, "step": 7919 }, { "epoch": 1.2119357306809486, "grad_norm": 1.919676635657094, "learning_rate": 7.0970791190011566e-06, "loss": 0.3355, "step": 7920 }, { "epoch": 1.212088752869166, "grad_norm": 2.204903650709108, "learning_rate": 7.094707740415756e-06, "loss": 0.4253, "step": 7921 }, { "epoch": 1.2122417750573833, "grad_norm": 2.3272233967248335, "learning_rate": 7.09233654025572e-06, "loss": 0.3645, "step": 7922 }, { "epoch": 1.2123947972456006, "grad_norm": 2.116304203277378, "learning_rate": 7.08996551866667e-06, "loss": 0.4008, "step": 7923 }, { "epoch": 1.212547819433818, "grad_norm": 2.12004466283652, "learning_rate": 7.087594675794226e-06, "loss": 0.3908, "step": 7924 }, { "epoch": 1.2127008416220353, "grad_norm": 2.3012142929784414, "learning_rate": 7.085224011783984e-06, "loss": 0.4094, "step": 7925 }, { "epoch": 1.2128538638102524, "grad_norm": 2.225388844694236, "learning_rate": 7.082853526781537e-06, "loss": 0.3643, "step": 7926 }, { "epoch": 1.2130068859984697, "grad_norm": 2.076324760976028, "learning_rate": 7.080483220932469e-06, "loss": 0.5358, "step": 7927 }, { "epoch": 1.213159908186687, "grad_norm": 2.108445837070922, "learning_rate": 7.078113094382348e-06, "loss": 0.3903, "step": 7928 }, { "epoch": 1.2133129303749044, "grad_norm": 2.541491652948375, "learning_rate": 7.075743147276728e-06, "loss": 0.4761, "step": 7929 }, { "epoch": 1.2134659525631217, "grad_norm": 2.1264106715257003, "learning_rate": 7.073373379761166e-06, "loss": 0.4052, "step": 7930 }, { "epoch": 1.213618974751339, "grad_norm": 2.500809227928582, "learning_rate": 7.07100379198119e-06, "loss": 0.4861, "step": 7931 }, { "epoch": 1.2137719969395562, "grad_norm": 2.0731214372985245, "learning_rate": 7.068634384082331e-06, "loss": 0.3271, "step": 7932 }, { "epoch": 1.2139250191277735, "grad_norm": 2.2473665302879544, "learning_rate": 7.066265156210105e-06, "loss": 0.3925, "step": 7933 }, { "epoch": 1.2140780413159908, "grad_norm": 2.023127298800279, "learning_rate": 7.063896108510008e-06, "loss": 0.374, "step": 7934 }, { "epoch": 1.2142310635042082, "grad_norm": 2.1057736936995712, "learning_rate": 7.0615272411275395e-06, "loss": 0.4494, "step": 7935 }, { "epoch": 1.2143840856924255, "grad_norm": 2.1942854844397583, "learning_rate": 7.05915855420818e-06, "loss": 0.4072, "step": 7936 }, { "epoch": 1.2145371078806426, "grad_norm": 2.16395490685311, "learning_rate": 7.056790047897394e-06, "loss": 0.3736, "step": 7937 }, { "epoch": 1.21469013006886, "grad_norm": 2.2678730333273793, "learning_rate": 7.054421722340647e-06, "loss": 0.444, "step": 7938 }, { "epoch": 1.2148431522570773, "grad_norm": 2.200405364638579, "learning_rate": 7.052053577683384e-06, "loss": 0.3841, "step": 7939 }, { "epoch": 1.2149961744452946, "grad_norm": 2.277747587318131, "learning_rate": 7.049685614071047e-06, "loss": 0.476, "step": 7940 }, { "epoch": 1.215149196633512, "grad_norm": 2.010732008758074, "learning_rate": 7.047317831649056e-06, "loss": 0.3571, "step": 7941 }, { "epoch": 1.215302218821729, "grad_norm": 2.252647658079372, "learning_rate": 7.044950230562826e-06, "loss": 0.4632, "step": 7942 }, { "epoch": 1.2154552410099464, "grad_norm": 2.3309241879798486, "learning_rate": 7.042582810957767e-06, "loss": 0.407, "step": 7943 }, { "epoch": 1.2156082631981637, "grad_norm": 1.9771222565408002, "learning_rate": 7.040215572979262e-06, "loss": 0.3727, "step": 7944 }, { "epoch": 1.215761285386381, "grad_norm": 2.2011196818146517, "learning_rate": 7.0378485167727e-06, "loss": 0.3456, "step": 7945 }, { "epoch": 1.2159143075745984, "grad_norm": 2.0983866502398154, "learning_rate": 7.035481642483451e-06, "loss": 0.3567, "step": 7946 }, { "epoch": 1.2160673297628155, "grad_norm": 2.391056276231594, "learning_rate": 7.033114950256865e-06, "loss": 0.4006, "step": 7947 }, { "epoch": 1.2162203519510328, "grad_norm": 2.3512508079427223, "learning_rate": 7.0307484402383015e-06, "loss": 0.4517, "step": 7948 }, { "epoch": 1.2163733741392502, "grad_norm": 2.1246473090309577, "learning_rate": 7.028382112573093e-06, "loss": 0.3766, "step": 7949 }, { "epoch": 1.2165263963274675, "grad_norm": 2.075108528173804, "learning_rate": 7.026015967406559e-06, "loss": 0.3935, "step": 7950 }, { "epoch": 1.2166794185156848, "grad_norm": 2.121208516187529, "learning_rate": 7.023650004884024e-06, "loss": 0.3355, "step": 7951 }, { "epoch": 1.216832440703902, "grad_norm": 2.3563143997973084, "learning_rate": 7.021284225150782e-06, "loss": 0.398, "step": 7952 }, { "epoch": 1.2169854628921193, "grad_norm": 2.073709554473588, "learning_rate": 7.0189186283521295e-06, "loss": 0.3789, "step": 7953 }, { "epoch": 1.2171384850803366, "grad_norm": 2.528122947279484, "learning_rate": 7.016553214633348e-06, "loss": 0.4205, "step": 7954 }, { "epoch": 1.217291507268554, "grad_norm": 2.2205808799791393, "learning_rate": 7.014187984139702e-06, "loss": 0.3803, "step": 7955 }, { "epoch": 1.2174445294567713, "grad_norm": 2.0751870027824526, "learning_rate": 7.0118229370164545e-06, "loss": 0.3547, "step": 7956 }, { "epoch": 1.2175975516449886, "grad_norm": 2.2315342903135393, "learning_rate": 7.009458073408852e-06, "loss": 0.4626, "step": 7957 }, { "epoch": 1.2177505738332057, "grad_norm": 2.1810279357679, "learning_rate": 7.007093393462124e-06, "loss": 0.4244, "step": 7958 }, { "epoch": 1.217903596021423, "grad_norm": 2.0422908944115594, "learning_rate": 7.004728897321501e-06, "loss": 0.5326, "step": 7959 }, { "epoch": 1.2180566182096404, "grad_norm": 2.20462359397819, "learning_rate": 7.002364585132192e-06, "loss": 0.3102, "step": 7960 }, { "epoch": 1.2182096403978577, "grad_norm": 2.3536974554981125, "learning_rate": 7.000000457039404e-06, "loss": 0.4173, "step": 7961 }, { "epoch": 1.218362662586075, "grad_norm": 2.038361961156172, "learning_rate": 6.997636513188324e-06, "loss": 0.39, "step": 7962 }, { "epoch": 1.2185156847742924, "grad_norm": 2.210987365831342, "learning_rate": 6.995272753724127e-06, "loss": 0.4057, "step": 7963 }, { "epoch": 1.2186687069625095, "grad_norm": 2.1544425173767907, "learning_rate": 6.99290917879199e-06, "loss": 0.3487, "step": 7964 }, { "epoch": 1.2188217291507268, "grad_norm": 2.345572013946747, "learning_rate": 6.990545788537062e-06, "loss": 0.4078, "step": 7965 }, { "epoch": 1.2189747513389442, "grad_norm": 2.0550826810477854, "learning_rate": 6.988182583104488e-06, "loss": 0.3286, "step": 7966 }, { "epoch": 1.2191277735271615, "grad_norm": 2.474145020752616, "learning_rate": 6.985819562639406e-06, "loss": 0.3959, "step": 7967 }, { "epoch": 1.2192807957153788, "grad_norm": 2.1499005174076764, "learning_rate": 6.9834567272869345e-06, "loss": 0.4703, "step": 7968 }, { "epoch": 1.219433817903596, "grad_norm": 1.9949459289594664, "learning_rate": 6.981094077192188e-06, "loss": 0.4444, "step": 7969 }, { "epoch": 1.2195868400918133, "grad_norm": 2.159287629056209, "learning_rate": 6.978731612500266e-06, "loss": 0.4424, "step": 7970 }, { "epoch": 1.2197398622800306, "grad_norm": 2.319018078003906, "learning_rate": 6.97636933335625e-06, "loss": 0.456, "step": 7971 }, { "epoch": 1.219892884468248, "grad_norm": 2.2501379725026194, "learning_rate": 6.974007239905225e-06, "loss": 0.3941, "step": 7972 }, { "epoch": 1.2200459066564653, "grad_norm": 2.376289275218683, "learning_rate": 6.971645332292255e-06, "loss": 0.479, "step": 7973 }, { "epoch": 1.2201989288446824, "grad_norm": 2.4438939561516597, "learning_rate": 6.969283610662387e-06, "loss": 0.4189, "step": 7974 }, { "epoch": 1.2203519510328997, "grad_norm": 2.0860769225758347, "learning_rate": 6.966922075160671e-06, "loss": 0.3817, "step": 7975 }, { "epoch": 1.220504973221117, "grad_norm": 2.2571172531938952, "learning_rate": 6.964560725932136e-06, "loss": 0.3865, "step": 7976 }, { "epoch": 1.2206579954093344, "grad_norm": 2.0870809930095686, "learning_rate": 6.962199563121803e-06, "loss": 0.4098, "step": 7977 }, { "epoch": 1.2208110175975517, "grad_norm": 2.3337013901306065, "learning_rate": 6.959838586874679e-06, "loss": 0.4347, "step": 7978 }, { "epoch": 1.2209640397857688, "grad_norm": 1.6879922077725298, "learning_rate": 6.957477797335757e-06, "loss": 0.3284, "step": 7979 }, { "epoch": 1.2211170619739862, "grad_norm": 2.21500011103774, "learning_rate": 6.955117194650029e-06, "loss": 0.4315, "step": 7980 }, { "epoch": 1.2212700841622035, "grad_norm": 2.365885263450686, "learning_rate": 6.952756778962463e-06, "loss": 0.4451, "step": 7981 }, { "epoch": 1.2214231063504208, "grad_norm": 2.1970439077941983, "learning_rate": 6.950396550418026e-06, "loss": 0.3978, "step": 7982 }, { "epoch": 1.2215761285386382, "grad_norm": 2.00539883100848, "learning_rate": 6.9480365091616685e-06, "loss": 0.3948, "step": 7983 }, { "epoch": 1.2217291507268553, "grad_norm": 2.4169358456913175, "learning_rate": 6.945676655338324e-06, "loss": 0.4454, "step": 7984 }, { "epoch": 1.2218821729150726, "grad_norm": 1.9858033554942487, "learning_rate": 6.943316989092928e-06, "loss": 0.3527, "step": 7985 }, { "epoch": 1.22203519510329, "grad_norm": 2.432567266913346, "learning_rate": 6.940957510570395e-06, "loss": 0.4424, "step": 7986 }, { "epoch": 1.2221882172915073, "grad_norm": 2.0487590024042253, "learning_rate": 6.938598219915624e-06, "loss": 0.4262, "step": 7987 }, { "epoch": 1.2223412394797246, "grad_norm": 2.1278806090330153, "learning_rate": 6.9362391172735155e-06, "loss": 0.4153, "step": 7988 }, { "epoch": 1.2224942616679417, "grad_norm": 1.9392705667390682, "learning_rate": 6.933880202788945e-06, "loss": 0.2756, "step": 7989 }, { "epoch": 1.222647283856159, "grad_norm": 2.0982874308568746, "learning_rate": 6.931521476606791e-06, "loss": 0.3839, "step": 7990 }, { "epoch": 1.2228003060443764, "grad_norm": 1.9905106427152186, "learning_rate": 6.929162938871905e-06, "loss": 0.3439, "step": 7991 }, { "epoch": 1.2229533282325937, "grad_norm": 2.137110660745122, "learning_rate": 6.926804589729133e-06, "loss": 0.3376, "step": 7992 }, { "epoch": 1.223106350420811, "grad_norm": 2.0719186341939326, "learning_rate": 6.924446429323318e-06, "loss": 0.3714, "step": 7993 }, { "epoch": 1.2232593726090284, "grad_norm": 1.9949333583357212, "learning_rate": 6.922088457799278e-06, "loss": 0.3984, "step": 7994 }, { "epoch": 1.2234123947972455, "grad_norm": 2.0901002764905763, "learning_rate": 6.919730675301824e-06, "loss": 0.3392, "step": 7995 }, { "epoch": 1.2235654169854628, "grad_norm": 2.0467946793168212, "learning_rate": 6.917373081975764e-06, "loss": 0.4335, "step": 7996 }, { "epoch": 1.2237184391736802, "grad_norm": 2.099938498749769, "learning_rate": 6.9150156779658775e-06, "loss": 0.3334, "step": 7997 }, { "epoch": 1.2238714613618975, "grad_norm": 2.315397070406997, "learning_rate": 6.91265846341695e-06, "loss": 0.4228, "step": 7998 }, { "epoch": 1.2240244835501148, "grad_norm": 2.3280652559278465, "learning_rate": 6.910301438473746e-06, "loss": 0.4021, "step": 7999 }, { "epoch": 1.2241775057383322, "grad_norm": 2.3985582799227076, "learning_rate": 6.907944603281011e-06, "loss": 0.3723, "step": 8000 }, { "epoch": 1.2243305279265493, "grad_norm": 2.130601512927696, "learning_rate": 6.905587957983499e-06, "loss": 0.3039, "step": 8001 }, { "epoch": 1.2244835501147666, "grad_norm": 2.1247620729874748, "learning_rate": 6.903231502725936e-06, "loss": 0.4024, "step": 8002 }, { "epoch": 1.224636572302984, "grad_norm": 2.497906614459289, "learning_rate": 6.900875237653039e-06, "loss": 0.4527, "step": 8003 }, { "epoch": 1.2247895944912013, "grad_norm": 2.234719038902452, "learning_rate": 6.8985191629095185e-06, "loss": 0.3417, "step": 8004 }, { "epoch": 1.2249426166794186, "grad_norm": 2.5449686391944337, "learning_rate": 6.8961632786400665e-06, "loss": 0.479, "step": 8005 }, { "epoch": 1.2250956388676357, "grad_norm": 2.0912533506618227, "learning_rate": 6.893807584989375e-06, "loss": 0.3885, "step": 8006 }, { "epoch": 1.225248661055853, "grad_norm": 2.061740186429149, "learning_rate": 6.891452082102108e-06, "loss": 0.3069, "step": 8007 }, { "epoch": 1.2254016832440704, "grad_norm": 2.4195881426112176, "learning_rate": 6.889096770122928e-06, "loss": 0.3886, "step": 8008 }, { "epoch": 1.2255547054322877, "grad_norm": 2.257160486240451, "learning_rate": 6.88674164919649e-06, "loss": 0.442, "step": 8009 }, { "epoch": 1.225707727620505, "grad_norm": 1.9940025514019444, "learning_rate": 6.8843867194674244e-06, "loss": 0.3627, "step": 8010 }, { "epoch": 1.2258607498087222, "grad_norm": 2.0383222857719385, "learning_rate": 6.882031981080355e-06, "loss": 0.4049, "step": 8011 }, { "epoch": 1.2260137719969395, "grad_norm": 2.0190329535880545, "learning_rate": 6.879677434179904e-06, "loss": 0.2951, "step": 8012 }, { "epoch": 1.2261667941851568, "grad_norm": 2.055375099684583, "learning_rate": 6.877323078910665e-06, "loss": 0.3451, "step": 8013 }, { "epoch": 1.2263198163733742, "grad_norm": 2.4333399428901563, "learning_rate": 6.874968915417234e-06, "loss": 0.3886, "step": 8014 }, { "epoch": 1.2264728385615915, "grad_norm": 2.526027909918158, "learning_rate": 6.872614943844189e-06, "loss": 0.4255, "step": 8015 }, { "epoch": 1.2266258607498086, "grad_norm": 2.1961298260330318, "learning_rate": 6.870261164336089e-06, "loss": 0.3917, "step": 8016 }, { "epoch": 1.226778882938026, "grad_norm": 1.9173991175517395, "learning_rate": 6.867907577037498e-06, "loss": 0.3306, "step": 8017 }, { "epoch": 1.2269319051262433, "grad_norm": 1.9365177251844081, "learning_rate": 6.865554182092954e-06, "loss": 0.3061, "step": 8018 }, { "epoch": 1.2270849273144606, "grad_norm": 1.8093240085090097, "learning_rate": 6.8632009796469935e-06, "loss": 0.4069, "step": 8019 }, { "epoch": 1.227237949502678, "grad_norm": 2.149860287286806, "learning_rate": 6.860847969844129e-06, "loss": 0.4024, "step": 8020 }, { "epoch": 1.227390971690895, "grad_norm": 2.2115737004229428, "learning_rate": 6.85849515282887e-06, "loss": 0.3812, "step": 8021 }, { "epoch": 1.2275439938791124, "grad_norm": 1.9495383178771113, "learning_rate": 6.856142528745717e-06, "loss": 0.3352, "step": 8022 }, { "epoch": 1.2276970160673297, "grad_norm": 2.271878221084577, "learning_rate": 6.853790097739148e-06, "loss": 0.392, "step": 8023 }, { "epoch": 1.227850038255547, "grad_norm": 2.0960449848601144, "learning_rate": 6.851437859953636e-06, "loss": 0.4094, "step": 8024 }, { "epoch": 1.2280030604437644, "grad_norm": 2.1636547073328654, "learning_rate": 6.849085815533644e-06, "loss": 0.4111, "step": 8025 }, { "epoch": 1.2281560826319817, "grad_norm": 2.4084541084097606, "learning_rate": 6.846733964623618e-06, "loss": 0.3485, "step": 8026 }, { "epoch": 1.2283091048201988, "grad_norm": 1.9854369939079013, "learning_rate": 6.844382307367994e-06, "loss": 0.42, "step": 8027 }, { "epoch": 1.2284621270084162, "grad_norm": 2.1105981109720138, "learning_rate": 6.842030843911201e-06, "loss": 0.3964, "step": 8028 }, { "epoch": 1.2286151491966335, "grad_norm": 2.218758898126614, "learning_rate": 6.839679574397641e-06, "loss": 0.3978, "step": 8029 }, { "epoch": 1.2287681713848508, "grad_norm": 2.2227044245916154, "learning_rate": 6.837328498971727e-06, "loss": 0.4151, "step": 8030 }, { "epoch": 1.2289211935730682, "grad_norm": 2.140680430133614, "learning_rate": 6.834977617777844e-06, "loss": 0.3219, "step": 8031 }, { "epoch": 1.2290742157612855, "grad_norm": 1.9854371782404445, "learning_rate": 6.83262693096036e-06, "loss": 0.3803, "step": 8032 }, { "epoch": 1.2292272379495026, "grad_norm": 2.205663362151382, "learning_rate": 6.830276438663654e-06, "loss": 0.3955, "step": 8033 }, { "epoch": 1.22938026013772, "grad_norm": 2.10792051555875, "learning_rate": 6.827926141032066e-06, "loss": 0.3967, "step": 8034 }, { "epoch": 1.2295332823259373, "grad_norm": 2.0734884900888386, "learning_rate": 6.8255760382099465e-06, "loss": 0.4093, "step": 8035 }, { "epoch": 1.2296863045141546, "grad_norm": 2.2376128890445077, "learning_rate": 6.823226130341623e-06, "loss": 0.4023, "step": 8036 }, { "epoch": 1.229839326702372, "grad_norm": 2.401436260509043, "learning_rate": 6.820876417571405e-06, "loss": 0.461, "step": 8037 }, { "epoch": 1.229992348890589, "grad_norm": 2.0884246258149406, "learning_rate": 6.818526900043606e-06, "loss": 0.3659, "step": 8038 }, { "epoch": 1.2301453710788064, "grad_norm": 2.0483232952456545, "learning_rate": 6.816177577902518e-06, "loss": 0.3641, "step": 8039 }, { "epoch": 1.2302983932670237, "grad_norm": 2.382850648132974, "learning_rate": 6.813828451292417e-06, "loss": 0.4182, "step": 8040 }, { "epoch": 1.230451415455241, "grad_norm": 2.2418447789270948, "learning_rate": 6.811479520357576e-06, "loss": 0.4607, "step": 8041 }, { "epoch": 1.2306044376434584, "grad_norm": 1.9382911340909974, "learning_rate": 6.80913078524225e-06, "loss": 0.3438, "step": 8042 }, { "epoch": 1.2307574598316755, "grad_norm": 1.8422515197448954, "learning_rate": 6.806782246090688e-06, "loss": 0.3331, "step": 8043 }, { "epoch": 1.2309104820198928, "grad_norm": 2.225159881582689, "learning_rate": 6.804433903047118e-06, "loss": 0.3753, "step": 8044 }, { "epoch": 1.2310635042081102, "grad_norm": 2.1226629248453763, "learning_rate": 6.802085756255763e-06, "loss": 0.357, "step": 8045 }, { "epoch": 1.2312165263963275, "grad_norm": 2.281272129316277, "learning_rate": 6.7997378058608355e-06, "loss": 0.4267, "step": 8046 }, { "epoch": 1.2313695485845448, "grad_norm": 2.409255082789223, "learning_rate": 6.797390052006526e-06, "loss": 0.4512, "step": 8047 }, { "epoch": 1.231522570772762, "grad_norm": 2.022442772507196, "learning_rate": 6.7950424948370205e-06, "loss": 0.3694, "step": 8048 }, { "epoch": 1.2316755929609793, "grad_norm": 2.048689723681348, "learning_rate": 6.792695134496497e-06, "loss": 0.3168, "step": 8049 }, { "epoch": 1.2318286151491966, "grad_norm": 2.1639453889819347, "learning_rate": 6.7903479711291095e-06, "loss": 0.373, "step": 8050 }, { "epoch": 1.231981637337414, "grad_norm": 2.306482688733974, "learning_rate": 6.78800100487901e-06, "loss": 0.41, "step": 8051 }, { "epoch": 1.2321346595256313, "grad_norm": 2.1460361773704255, "learning_rate": 6.785654235890338e-06, "loss": 0.3688, "step": 8052 }, { "epoch": 1.2322876817138484, "grad_norm": 2.5735775872854028, "learning_rate": 6.7833076643072085e-06, "loss": 0.4729, "step": 8053 }, { "epoch": 1.2324407039020657, "grad_norm": 2.332483797336673, "learning_rate": 6.78096129027374e-06, "loss": 0.4682, "step": 8054 }, { "epoch": 1.232593726090283, "grad_norm": 2.271105481517045, "learning_rate": 6.778615113934031e-06, "loss": 0.4461, "step": 8055 }, { "epoch": 1.2327467482785004, "grad_norm": 2.1420714053603236, "learning_rate": 6.776269135432174e-06, "loss": 0.3699, "step": 8056 }, { "epoch": 1.2328997704667177, "grad_norm": 2.333313250766813, "learning_rate": 6.773923354912239e-06, "loss": 0.349, "step": 8057 }, { "epoch": 1.233052792654935, "grad_norm": 2.3201571423666594, "learning_rate": 6.7715777725182875e-06, "loss": 0.4587, "step": 8058 }, { "epoch": 1.2332058148431522, "grad_norm": 2.350700286167741, "learning_rate": 6.76923238839438e-06, "loss": 0.4382, "step": 8059 }, { "epoch": 1.2333588370313695, "grad_norm": 2.4481880445812005, "learning_rate": 6.766887202684549e-06, "loss": 0.4408, "step": 8060 }, { "epoch": 1.2335118592195868, "grad_norm": 2.1630515616270882, "learning_rate": 6.76454221553282e-06, "loss": 0.3706, "step": 8061 }, { "epoch": 1.2336648814078042, "grad_norm": 2.0092456944627766, "learning_rate": 6.7621974270832145e-06, "loss": 0.3611, "step": 8062 }, { "epoch": 1.2338179035960215, "grad_norm": 2.1523204344513838, "learning_rate": 6.7598528374797275e-06, "loss": 0.386, "step": 8063 }, { "epoch": 1.2339709257842388, "grad_norm": 2.3086250501937777, "learning_rate": 6.757508446866357e-06, "loss": 0.4011, "step": 8064 }, { "epoch": 1.234123947972456, "grad_norm": 2.4251052079549575, "learning_rate": 6.755164255387078e-06, "loss": 0.3798, "step": 8065 }, { "epoch": 1.2342769701606733, "grad_norm": 2.24168987184976, "learning_rate": 6.752820263185851e-06, "loss": 0.3947, "step": 8066 }, { "epoch": 1.2344299923488906, "grad_norm": 2.097126542099647, "learning_rate": 6.750476470406638e-06, "loss": 0.4268, "step": 8067 }, { "epoch": 1.234583014537108, "grad_norm": 2.289927186551288, "learning_rate": 6.748132877193379e-06, "loss": 0.3988, "step": 8068 }, { "epoch": 1.2347360367253253, "grad_norm": 2.122315650716571, "learning_rate": 6.745789483689997e-06, "loss": 0.4257, "step": 8069 }, { "epoch": 1.2348890589135424, "grad_norm": 1.9968670376809796, "learning_rate": 6.743446290040417e-06, "loss": 0.4458, "step": 8070 }, { "epoch": 1.2350420811017597, "grad_norm": 1.8989966989084186, "learning_rate": 6.7411032963885355e-06, "loss": 0.411, "step": 8071 }, { "epoch": 1.235195103289977, "grad_norm": 2.1196167992227637, "learning_rate": 6.738760502878254e-06, "loss": 0.3371, "step": 8072 }, { "epoch": 1.2353481254781944, "grad_norm": 2.27319423233866, "learning_rate": 6.736417909653448e-06, "loss": 0.4286, "step": 8073 }, { "epoch": 1.2355011476664117, "grad_norm": 2.1531922370964884, "learning_rate": 6.734075516857981e-06, "loss": 0.3935, "step": 8074 }, { "epoch": 1.2356541698546288, "grad_norm": 2.169345191566362, "learning_rate": 6.731733324635717e-06, "loss": 0.3872, "step": 8075 }, { "epoch": 1.2358071920428462, "grad_norm": 2.3456728853488804, "learning_rate": 6.729391333130495e-06, "loss": 0.4262, "step": 8076 }, { "epoch": 1.2359602142310635, "grad_norm": 2.116438016922821, "learning_rate": 6.727049542486142e-06, "loss": 0.3507, "step": 8077 }, { "epoch": 1.2361132364192808, "grad_norm": 2.2905850835193675, "learning_rate": 6.724707952846485e-06, "loss": 0.474, "step": 8078 }, { "epoch": 1.2362662586074982, "grad_norm": 2.126345341606041, "learning_rate": 6.722366564355322e-06, "loss": 0.4478, "step": 8079 }, { "epoch": 1.2364192807957153, "grad_norm": 2.2306359092945516, "learning_rate": 6.720025377156453e-06, "loss": 0.4082, "step": 8080 }, { "epoch": 1.2365723029839326, "grad_norm": 2.4936280034611946, "learning_rate": 6.7176843913936584e-06, "loss": 0.3708, "step": 8081 }, { "epoch": 1.23672532517215, "grad_norm": 2.060763607982872, "learning_rate": 6.715343607210701e-06, "loss": 0.4245, "step": 8082 }, { "epoch": 1.2368783473603673, "grad_norm": 2.4140780179422334, "learning_rate": 6.713003024751345e-06, "loss": 0.4348, "step": 8083 }, { "epoch": 1.2370313695485846, "grad_norm": 2.0437392400835552, "learning_rate": 6.7106626441593315e-06, "loss": 0.4317, "step": 8084 }, { "epoch": 1.2371843917368017, "grad_norm": 2.166562578027147, "learning_rate": 6.708322465578394e-06, "loss": 0.412, "step": 8085 }, { "epoch": 1.237337413925019, "grad_norm": 2.1138549681897807, "learning_rate": 6.705982489152252e-06, "loss": 0.4011, "step": 8086 }, { "epoch": 1.2374904361132364, "grad_norm": 2.031131307063908, "learning_rate": 6.703642715024608e-06, "loss": 0.3462, "step": 8087 }, { "epoch": 1.2376434583014537, "grad_norm": 2.1949317305374674, "learning_rate": 6.701303143339164e-06, "loss": 0.3736, "step": 8088 }, { "epoch": 1.237796480489671, "grad_norm": 2.1993897401590603, "learning_rate": 6.6989637742395975e-06, "loss": 0.3788, "step": 8089 }, { "epoch": 1.2379495026778882, "grad_norm": 2.5296632495849334, "learning_rate": 6.696624607869576e-06, "loss": 0.4256, "step": 8090 }, { "epoch": 1.2381025248661055, "grad_norm": 2.2823327820853794, "learning_rate": 6.694285644372765e-06, "loss": 0.3772, "step": 8091 }, { "epoch": 1.2382555470543228, "grad_norm": 2.0040278866366528, "learning_rate": 6.691946883892802e-06, "loss": 0.3082, "step": 8092 }, { "epoch": 1.2384085692425402, "grad_norm": 1.928916255534059, "learning_rate": 6.689608326573322e-06, "loss": 0.4043, "step": 8093 }, { "epoch": 1.2385615914307575, "grad_norm": 1.92290953923648, "learning_rate": 6.6872699725579485e-06, "loss": 0.3487, "step": 8094 }, { "epoch": 1.2387146136189748, "grad_norm": 2.3340519002998414, "learning_rate": 6.684931821990279e-06, "loss": 0.4364, "step": 8095 }, { "epoch": 1.238867635807192, "grad_norm": 2.212440455612955, "learning_rate": 6.6825938750139185e-06, "loss": 0.4273, "step": 8096 }, { "epoch": 1.2390206579954093, "grad_norm": 2.111364367290438, "learning_rate": 6.680256131772449e-06, "loss": 0.3948, "step": 8097 }, { "epoch": 1.2391736801836266, "grad_norm": 2.2003773037744505, "learning_rate": 6.677918592409432e-06, "loss": 0.4448, "step": 8098 }, { "epoch": 1.239326702371844, "grad_norm": 1.8536781014593984, "learning_rate": 6.675581257068433e-06, "loss": 0.3131, "step": 8099 }, { "epoch": 1.2394797245600613, "grad_norm": 2.0146176424945805, "learning_rate": 6.673244125892991e-06, "loss": 0.3832, "step": 8100 }, { "epoch": 1.2396327467482786, "grad_norm": 2.346769756302333, "learning_rate": 6.670907199026647e-06, "loss": 0.4161, "step": 8101 }, { "epoch": 1.2397857689364957, "grad_norm": 2.1642485454000058, "learning_rate": 6.668570476612912e-06, "loss": 0.344, "step": 8102 }, { "epoch": 1.239938791124713, "grad_norm": 2.0988507243659997, "learning_rate": 6.6662339587952966e-06, "loss": 0.3651, "step": 8103 }, { "epoch": 1.2400918133129304, "grad_norm": 1.9921147928691025, "learning_rate": 6.6638976457172985e-06, "loss": 0.3652, "step": 8104 }, { "epoch": 1.2402448355011477, "grad_norm": 2.0682253549190404, "learning_rate": 6.6615615375223945e-06, "loss": 0.3633, "step": 8105 }, { "epoch": 1.240397857689365, "grad_norm": 2.0394982635306493, "learning_rate": 6.659225634354055e-06, "loss": 0.3469, "step": 8106 }, { "epoch": 1.2405508798775822, "grad_norm": 2.017362842165284, "learning_rate": 6.656889936355742e-06, "loss": 0.3196, "step": 8107 }, { "epoch": 1.2407039020657995, "grad_norm": 2.1998384971210565, "learning_rate": 6.654554443670893e-06, "loss": 0.4183, "step": 8108 }, { "epoch": 1.2408569242540168, "grad_norm": 2.194051345196769, "learning_rate": 6.652219156442945e-06, "loss": 0.3716, "step": 8109 }, { "epoch": 1.2410099464422342, "grad_norm": 1.8924912286597517, "learning_rate": 6.649884074815316e-06, "loss": 0.3739, "step": 8110 }, { "epoch": 1.2411629686304515, "grad_norm": 2.105527117813446, "learning_rate": 6.647549198931407e-06, "loss": 0.358, "step": 8111 }, { "epoch": 1.2413159908186686, "grad_norm": 2.2597521686833333, "learning_rate": 6.645214528934623e-06, "loss": 0.3437, "step": 8112 }, { "epoch": 1.241469013006886, "grad_norm": 2.200249191211589, "learning_rate": 6.6428800649683374e-06, "loss": 0.3841, "step": 8113 }, { "epoch": 1.2416220351951033, "grad_norm": 2.3942357392351474, "learning_rate": 6.640545807175914e-06, "loss": 0.4409, "step": 8114 }, { "epoch": 1.2417750573833206, "grad_norm": 2.2397158209349715, "learning_rate": 6.638211755700722e-06, "loss": 0.4461, "step": 8115 }, { "epoch": 1.241928079571538, "grad_norm": 2.348508538360569, "learning_rate": 6.6358779106860924e-06, "loss": 0.4139, "step": 8116 }, { "epoch": 1.242081101759755, "grad_norm": 2.189259772021628, "learning_rate": 6.633544272275364e-06, "loss": 0.4399, "step": 8117 }, { "epoch": 1.2422341239479724, "grad_norm": 2.0453409050667197, "learning_rate": 6.631210840611852e-06, "loss": 0.4082, "step": 8118 }, { "epoch": 1.2423871461361897, "grad_norm": 2.639988084591451, "learning_rate": 6.628877615838859e-06, "loss": 0.4131, "step": 8119 }, { "epoch": 1.242540168324407, "grad_norm": 2.4274196063214677, "learning_rate": 6.62654459809968e-06, "loss": 0.4368, "step": 8120 }, { "epoch": 1.2426931905126244, "grad_norm": 2.14359024994293, "learning_rate": 6.624211787537594e-06, "loss": 0.4156, "step": 8121 }, { "epoch": 1.2428462127008415, "grad_norm": 2.292958397908944, "learning_rate": 6.621879184295869e-06, "loss": 0.4314, "step": 8122 }, { "epoch": 1.2429992348890588, "grad_norm": 1.9339362064576864, "learning_rate": 6.619546788517759e-06, "loss": 0.3408, "step": 8123 }, { "epoch": 1.2431522570772762, "grad_norm": 2.3692855461901927, "learning_rate": 6.617214600346503e-06, "loss": 0.4277, "step": 8124 }, { "epoch": 1.2433052792654935, "grad_norm": 2.3900418782194226, "learning_rate": 6.614882619925336e-06, "loss": 0.4539, "step": 8125 }, { "epoch": 1.2434583014537108, "grad_norm": 2.1281143454275444, "learning_rate": 6.612550847397468e-06, "loss": 0.3837, "step": 8126 }, { "epoch": 1.2436113236419282, "grad_norm": 1.9287068128994407, "learning_rate": 6.610219282906101e-06, "loss": 0.391, "step": 8127 }, { "epoch": 1.2437643458301453, "grad_norm": 2.2439153783777033, "learning_rate": 6.607887926594434e-06, "loss": 0.3952, "step": 8128 }, { "epoch": 1.2439173680183626, "grad_norm": 2.4364819100764668, "learning_rate": 6.605556778605635e-06, "loss": 0.4225, "step": 8129 }, { "epoch": 1.24407039020658, "grad_norm": 1.950810491447188, "learning_rate": 6.603225839082875e-06, "loss": 0.3582, "step": 8130 }, { "epoch": 1.2442234123947973, "grad_norm": 2.1343110248130905, "learning_rate": 6.600895108169308e-06, "loss": 0.3882, "step": 8131 }, { "epoch": 1.2443764345830146, "grad_norm": 2.133074845103394, "learning_rate": 6.598564586008064e-06, "loss": 0.3467, "step": 8132 }, { "epoch": 1.244529456771232, "grad_norm": 2.4865499659321606, "learning_rate": 6.596234272742278e-06, "loss": 0.3426, "step": 8133 }, { "epoch": 1.244682478959449, "grad_norm": 2.2551479642677403, "learning_rate": 6.593904168515062e-06, "loss": 0.3678, "step": 8134 }, { "epoch": 1.2448355011476664, "grad_norm": 2.4241006823885494, "learning_rate": 6.591574273469513e-06, "loss": 0.3655, "step": 8135 }, { "epoch": 1.2449885233358837, "grad_norm": 1.9992035540833695, "learning_rate": 6.589244587748723e-06, "loss": 0.3564, "step": 8136 }, { "epoch": 1.245141545524101, "grad_norm": 2.3241941093092335, "learning_rate": 6.586915111495764e-06, "loss": 0.4287, "step": 8137 }, { "epoch": 1.2452945677123184, "grad_norm": 2.287427281357684, "learning_rate": 6.5845858448537045e-06, "loss": 0.4589, "step": 8138 }, { "epoch": 1.2454475899005355, "grad_norm": 1.857212005781437, "learning_rate": 6.582256787965586e-06, "loss": 0.3074, "step": 8139 }, { "epoch": 1.2456006120887528, "grad_norm": 2.195369821049141, "learning_rate": 6.579927940974449e-06, "loss": 0.3611, "step": 8140 }, { "epoch": 1.2457536342769702, "grad_norm": 2.199397634076349, "learning_rate": 6.577599304023319e-06, "loss": 0.3836, "step": 8141 }, { "epoch": 1.2459066564651875, "grad_norm": 2.3932894708522023, "learning_rate": 6.575270877255202e-06, "loss": 0.4379, "step": 8142 }, { "epoch": 1.2460596786534048, "grad_norm": 2.4586397278695524, "learning_rate": 6.572942660813096e-06, "loss": 0.4097, "step": 8143 }, { "epoch": 1.246212700841622, "grad_norm": 2.2564384469631396, "learning_rate": 6.570614654839993e-06, "loss": 0.616, "step": 8144 }, { "epoch": 1.2463657230298393, "grad_norm": 2.027426896315064, "learning_rate": 6.568286859478854e-06, "loss": 0.2986, "step": 8145 }, { "epoch": 1.2465187452180566, "grad_norm": 1.9998258578563708, "learning_rate": 6.565959274872648e-06, "loss": 0.3829, "step": 8146 }, { "epoch": 1.246671767406274, "grad_norm": 2.294962618851666, "learning_rate": 6.563631901164318e-06, "loss": 0.3712, "step": 8147 }, { "epoch": 1.2468247895944913, "grad_norm": 2.2436925088267703, "learning_rate": 6.561304738496792e-06, "loss": 0.3324, "step": 8148 }, { "epoch": 1.2469778117827084, "grad_norm": 2.4639587946632284, "learning_rate": 6.558977787012997e-06, "loss": 0.4681, "step": 8149 }, { "epoch": 1.2471308339709257, "grad_norm": 2.381414016939421, "learning_rate": 6.556651046855838e-06, "loss": 0.3656, "step": 8150 }, { "epoch": 1.247283856159143, "grad_norm": 1.8783872615246844, "learning_rate": 6.554324518168206e-06, "loss": 0.2745, "step": 8151 }, { "epoch": 1.2474368783473604, "grad_norm": 2.259284316104315, "learning_rate": 6.551998201092986e-06, "loss": 0.44, "step": 8152 }, { "epoch": 1.2475899005355777, "grad_norm": 2.0064902849502424, "learning_rate": 6.549672095773043e-06, "loss": 0.3091, "step": 8153 }, { "epoch": 1.2477429227237948, "grad_norm": 2.0524252981887967, "learning_rate": 6.547346202351239e-06, "loss": 0.3426, "step": 8154 }, { "epoch": 1.2478959449120122, "grad_norm": 2.0469596722487564, "learning_rate": 6.54502052097041e-06, "loss": 0.3511, "step": 8155 }, { "epoch": 1.2480489671002295, "grad_norm": 2.2105301690125487, "learning_rate": 6.542695051773384e-06, "loss": 0.3652, "step": 8156 }, { "epoch": 1.2482019892884468, "grad_norm": 2.0530242393989413, "learning_rate": 6.540369794902984e-06, "loss": 0.3917, "step": 8157 }, { "epoch": 1.2483550114766642, "grad_norm": 1.936389285927461, "learning_rate": 6.538044750502005e-06, "loss": 0.3831, "step": 8158 }, { "epoch": 1.2485080336648815, "grad_norm": 2.1162540345200056, "learning_rate": 6.535719918713244e-06, "loss": 0.3493, "step": 8159 }, { "epoch": 1.2486610558530986, "grad_norm": 2.275333849051231, "learning_rate": 6.533395299679475e-06, "loss": 0.4457, "step": 8160 }, { "epoch": 1.248814078041316, "grad_norm": 2.150509646064332, "learning_rate": 6.531070893543461e-06, "loss": 0.3997, "step": 8161 }, { "epoch": 1.2489671002295333, "grad_norm": 2.0081462458065764, "learning_rate": 6.5287467004479545e-06, "loss": 0.3871, "step": 8162 }, { "epoch": 1.2491201224177506, "grad_norm": 2.5256472370868592, "learning_rate": 6.526422720535694e-06, "loss": 0.4369, "step": 8163 }, { "epoch": 1.249273144605968, "grad_norm": 2.264997586366189, "learning_rate": 6.5240989539494e-06, "loss": 0.3933, "step": 8164 }, { "epoch": 1.2494261667941853, "grad_norm": 2.0935312426949495, "learning_rate": 6.521775400831789e-06, "loss": 0.4138, "step": 8165 }, { "epoch": 1.2495791889824024, "grad_norm": 1.993012753242544, "learning_rate": 6.519452061325555e-06, "loss": 0.3732, "step": 8166 }, { "epoch": 1.2497322111706197, "grad_norm": 2.0981709407547404, "learning_rate": 6.517128935573389e-06, "loss": 0.3587, "step": 8167 }, { "epoch": 1.249885233358837, "grad_norm": 2.126075996875917, "learning_rate": 6.51480602371796e-06, "loss": 0.4134, "step": 8168 }, { "epoch": 1.2500382555470544, "grad_norm": 2.2515728182518178, "learning_rate": 6.5124833259019235e-06, "loss": 0.4175, "step": 8169 }, { "epoch": 1.2501912777352717, "grad_norm": 2.1486259337174594, "learning_rate": 6.510160842267935e-06, "loss": 0.3988, "step": 8170 }, { "epoch": 1.2503442999234888, "grad_norm": 2.1867889880458597, "learning_rate": 6.507838572958619e-06, "loss": 0.3892, "step": 8171 }, { "epoch": 1.2504973221117062, "grad_norm": 2.2552520050421427, "learning_rate": 6.505516518116595e-06, "loss": 0.3685, "step": 8172 }, { "epoch": 1.2506503442999235, "grad_norm": 2.1887592404297695, "learning_rate": 6.503194677884477e-06, "loss": 0.4032, "step": 8173 }, { "epoch": 1.2508033664881408, "grad_norm": 2.124074218625281, "learning_rate": 6.500873052404848e-06, "loss": 0.364, "step": 8174 }, { "epoch": 1.2509563886763582, "grad_norm": 2.333056411610224, "learning_rate": 6.4985516418202965e-06, "loss": 0.3767, "step": 8175 }, { "epoch": 1.2511094108645753, "grad_norm": 2.213655759545077, "learning_rate": 6.496230446273389e-06, "loss": 0.3687, "step": 8176 }, { "epoch": 1.2512624330527926, "grad_norm": 2.192929258564723, "learning_rate": 6.493909465906671e-06, "loss": 0.3897, "step": 8177 }, { "epoch": 1.25141545524101, "grad_norm": 1.9952666186839858, "learning_rate": 6.491588700862692e-06, "loss": 0.3551, "step": 8178 }, { "epoch": 1.2515684774292273, "grad_norm": 2.2618334986409523, "learning_rate": 6.489268151283978e-06, "loss": 0.3391, "step": 8179 }, { "epoch": 1.2517214996174446, "grad_norm": 2.0576909482129793, "learning_rate": 6.486947817313036e-06, "loss": 0.3226, "step": 8180 }, { "epoch": 1.2518745218056617, "grad_norm": 2.7975000133563808, "learning_rate": 6.484627699092374e-06, "loss": 0.4377, "step": 8181 }, { "epoch": 1.252027543993879, "grad_norm": 2.2709472936432453, "learning_rate": 6.482307796764476e-06, "loss": 0.3881, "step": 8182 }, { "epoch": 1.2521805661820964, "grad_norm": 2.331279784622257, "learning_rate": 6.4799881104718194e-06, "loss": 0.4555, "step": 8183 }, { "epoch": 1.2523335883703137, "grad_norm": 1.9791757619354962, "learning_rate": 6.477668640356862e-06, "loss": 0.3695, "step": 8184 }, { "epoch": 1.252486610558531, "grad_norm": 2.2578902118790785, "learning_rate": 6.475349386562052e-06, "loss": 0.3684, "step": 8185 }, { "epoch": 1.2526396327467482, "grad_norm": 2.0440767072571435, "learning_rate": 6.4730303492298276e-06, "loss": 0.3873, "step": 8186 }, { "epoch": 1.2527926549349655, "grad_norm": 2.2598153800347975, "learning_rate": 6.4707115285026065e-06, "loss": 0.4313, "step": 8187 }, { "epoch": 1.2529456771231828, "grad_norm": 2.2472633970388163, "learning_rate": 6.468392924522793e-06, "loss": 0.3577, "step": 8188 }, { "epoch": 1.2530986993114002, "grad_norm": 2.000066195188573, "learning_rate": 6.4660745374327916e-06, "loss": 0.3561, "step": 8189 }, { "epoch": 1.2532517214996175, "grad_norm": 1.998668284331149, "learning_rate": 6.46375636737497e-06, "loss": 0.3622, "step": 8190 }, { "epoch": 1.2534047436878346, "grad_norm": 2.2063495642394155, "learning_rate": 6.461438414491712e-06, "loss": 0.4527, "step": 8191 }, { "epoch": 1.2535577658760522, "grad_norm": 2.246557833214377, "learning_rate": 6.459120678925361e-06, "loss": 0.4203, "step": 8192 }, { "epoch": 1.2537107880642693, "grad_norm": 2.2063828436285715, "learning_rate": 6.456803160818257e-06, "loss": 0.3825, "step": 8193 }, { "epoch": 1.2538638102524866, "grad_norm": 1.9092101174408787, "learning_rate": 6.454485860312738e-06, "loss": 0.3348, "step": 8194 }, { "epoch": 1.254016832440704, "grad_norm": 2.0977756057796055, "learning_rate": 6.452168777551108e-06, "loss": 0.4035, "step": 8195 }, { "epoch": 1.2541698546289213, "grad_norm": 2.1005447185578676, "learning_rate": 6.449851912675674e-06, "loss": 0.3674, "step": 8196 }, { "epoch": 1.2543228768171386, "grad_norm": 2.558298193490183, "learning_rate": 6.447535265828726e-06, "loss": 0.4957, "step": 8197 }, { "epoch": 1.2544758990053557, "grad_norm": 2.024092000979385, "learning_rate": 6.44521883715253e-06, "loss": 0.3693, "step": 8198 }, { "epoch": 1.254628921193573, "grad_norm": 2.2401108910393646, "learning_rate": 6.4429026267893515e-06, "loss": 0.3757, "step": 8199 }, { "epoch": 1.2547819433817904, "grad_norm": 2.2399359164291486, "learning_rate": 6.440586634881443e-06, "loss": 0.3903, "step": 8200 }, { "epoch": 1.2549349655700077, "grad_norm": 2.231638113223591, "learning_rate": 6.438270861571028e-06, "loss": 0.3898, "step": 8201 }, { "epoch": 1.255087987758225, "grad_norm": 2.3003887351119485, "learning_rate": 6.435955307000334e-06, "loss": 0.4559, "step": 8202 }, { "epoch": 1.2552410099464422, "grad_norm": 2.342065540275981, "learning_rate": 6.4336399713115675e-06, "loss": 0.4731, "step": 8203 }, { "epoch": 1.2553940321346595, "grad_norm": 2.159649433148729, "learning_rate": 6.431324854646925e-06, "loss": 0.4246, "step": 8204 }, { "epoch": 1.2555470543228768, "grad_norm": 2.1847647079301815, "learning_rate": 6.429009957148582e-06, "loss": 0.4069, "step": 8205 }, { "epoch": 1.2557000765110942, "grad_norm": 2.1882504529272904, "learning_rate": 6.426695278958706e-06, "loss": 0.3894, "step": 8206 }, { "epoch": 1.2558530986993115, "grad_norm": 2.09420529387032, "learning_rate": 6.424380820219455e-06, "loss": 0.3681, "step": 8207 }, { "epoch": 1.2560061208875286, "grad_norm": 2.428772300102065, "learning_rate": 6.422066581072964e-06, "loss": 0.4136, "step": 8208 }, { "epoch": 1.256159143075746, "grad_norm": 2.213077828037295, "learning_rate": 6.419752561661358e-06, "loss": 0.3422, "step": 8209 }, { "epoch": 1.2563121652639633, "grad_norm": 2.0985586609316913, "learning_rate": 6.41743876212676e-06, "loss": 0.4081, "step": 8210 }, { "epoch": 1.2564651874521806, "grad_norm": 2.2509752229595543, "learning_rate": 6.415125182611257e-06, "loss": 0.4167, "step": 8211 }, { "epoch": 1.256618209640398, "grad_norm": 1.9046730532077025, "learning_rate": 6.412811823256942e-06, "loss": 0.3838, "step": 8212 }, { "epoch": 1.256771231828615, "grad_norm": 2.2792001975357454, "learning_rate": 6.410498684205889e-06, "loss": 0.4713, "step": 8213 }, { "epoch": 1.2569242540168324, "grad_norm": 2.3477391524234297, "learning_rate": 6.40818576560015e-06, "loss": 0.4213, "step": 8214 }, { "epoch": 1.2570772762050497, "grad_norm": 2.1809286043256364, "learning_rate": 6.405873067581776e-06, "loss": 0.3181, "step": 8215 }, { "epoch": 1.257230298393267, "grad_norm": 2.2438204832034225, "learning_rate": 6.4035605902927985e-06, "loss": 0.3751, "step": 8216 }, { "epoch": 1.2573833205814844, "grad_norm": 2.2567114261781756, "learning_rate": 6.401248333875232e-06, "loss": 0.3622, "step": 8217 }, { "epoch": 1.2575363427697015, "grad_norm": 1.9982564455204277, "learning_rate": 6.398936298471086e-06, "loss": 0.3942, "step": 8218 }, { "epoch": 1.2576893649579188, "grad_norm": 2.2711651757471576, "learning_rate": 6.3966244842223455e-06, "loss": 0.4798, "step": 8219 }, { "epoch": 1.2578423871461362, "grad_norm": 2.6053006150189892, "learning_rate": 6.394312891270997e-06, "loss": 0.4886, "step": 8220 }, { "epoch": 1.2579954093343535, "grad_norm": 2.1850007261040543, "learning_rate": 6.392001519758997e-06, "loss": 0.3153, "step": 8221 }, { "epoch": 1.2581484315225708, "grad_norm": 1.9624789585008473, "learning_rate": 6.389690369828297e-06, "loss": 0.3293, "step": 8222 }, { "epoch": 1.258301453710788, "grad_norm": 1.8446939371406101, "learning_rate": 6.387379441620838e-06, "loss": 0.3596, "step": 8223 }, { "epoch": 1.2584544758990053, "grad_norm": 1.9329320390958946, "learning_rate": 6.3850687352785365e-06, "loss": 0.3903, "step": 8224 }, { "epoch": 1.2586074980872226, "grad_norm": 2.18800037719509, "learning_rate": 6.3827582509433085e-06, "loss": 0.4227, "step": 8225 }, { "epoch": 1.25876052027544, "grad_norm": 1.9241949681406423, "learning_rate": 6.380447988757048e-06, "loss": 0.3641, "step": 8226 }, { "epoch": 1.2589135424636573, "grad_norm": 2.172606220056416, "learning_rate": 6.378137948861633e-06, "loss": 0.3112, "step": 8227 }, { "epoch": 1.2590665646518744, "grad_norm": 2.5231403921304647, "learning_rate": 6.375828131398937e-06, "loss": 0.4162, "step": 8228 }, { "epoch": 1.259219586840092, "grad_norm": 2.196984189818374, "learning_rate": 6.373518536510815e-06, "loss": 0.4081, "step": 8229 }, { "epoch": 1.259372609028309, "grad_norm": 2.262628444670184, "learning_rate": 6.371209164339103e-06, "loss": 0.3758, "step": 8230 }, { "epoch": 1.2595256312165264, "grad_norm": 2.1348336472126883, "learning_rate": 6.3689000150256345e-06, "loss": 0.4025, "step": 8231 }, { "epoch": 1.2596786534047437, "grad_norm": 2.2223035828990434, "learning_rate": 6.3665910887122175e-06, "loss": 0.3868, "step": 8232 }, { "epoch": 1.259831675592961, "grad_norm": 2.3435045332318207, "learning_rate": 6.364282385540661e-06, "loss": 0.3535, "step": 8233 }, { "epoch": 1.2599846977811784, "grad_norm": 2.110334468447516, "learning_rate": 6.361973905652743e-06, "loss": 0.3774, "step": 8234 }, { "epoch": 1.2601377199693955, "grad_norm": 2.3897983562604272, "learning_rate": 6.359665649190239e-06, "loss": 0.4594, "step": 8235 }, { "epoch": 1.2602907421576128, "grad_norm": 2.402575115221835, "learning_rate": 6.357357616294912e-06, "loss": 0.4075, "step": 8236 }, { "epoch": 1.2604437643458302, "grad_norm": 2.263508452347503, "learning_rate": 6.3550498071085025e-06, "loss": 0.3954, "step": 8237 }, { "epoch": 1.2605967865340475, "grad_norm": 2.2037969854889, "learning_rate": 6.3527422217727405e-06, "loss": 0.427, "step": 8238 }, { "epoch": 1.2607498087222648, "grad_norm": 2.2239380934976687, "learning_rate": 6.350434860429351e-06, "loss": 0.4106, "step": 8239 }, { "epoch": 1.260902830910482, "grad_norm": 2.468996984247626, "learning_rate": 6.34812772322003e-06, "loss": 0.4408, "step": 8240 }, { "epoch": 1.2610558530986993, "grad_norm": 2.0541356098332915, "learning_rate": 6.3458208102864735e-06, "loss": 0.3487, "step": 8241 }, { "epoch": 1.2612088752869166, "grad_norm": 2.30173111222071, "learning_rate": 6.343514121770358e-06, "loss": 0.4123, "step": 8242 }, { "epoch": 1.261361897475134, "grad_norm": 2.4270570534290963, "learning_rate": 6.341207657813342e-06, "loss": 0.4424, "step": 8243 }, { "epoch": 1.2615149196633513, "grad_norm": 2.399051121363175, "learning_rate": 6.338901418557078e-06, "loss": 0.4351, "step": 8244 }, { "epoch": 1.2616679418515684, "grad_norm": 2.0292806752648724, "learning_rate": 6.336595404143202e-06, "loss": 0.3252, "step": 8245 }, { "epoch": 1.2618209640397857, "grad_norm": 2.3769670869429755, "learning_rate": 6.334289614713329e-06, "loss": 0.4465, "step": 8246 }, { "epoch": 1.261973986228003, "grad_norm": 2.217733053161381, "learning_rate": 6.331984050409075e-06, "loss": 0.3758, "step": 8247 }, { "epoch": 1.2621270084162204, "grad_norm": 2.142845566249004, "learning_rate": 6.329678711372025e-06, "loss": 0.3559, "step": 8248 }, { "epoch": 1.2622800306044377, "grad_norm": 2.2208892699743004, "learning_rate": 6.32737359774377e-06, "loss": 0.4055, "step": 8249 }, { "epoch": 1.2624330527926548, "grad_norm": 2.1323922539303015, "learning_rate": 6.325068709665867e-06, "loss": 0.3995, "step": 8250 }, { "epoch": 1.2625860749808722, "grad_norm": 1.8538505473912326, "learning_rate": 6.322764047279869e-06, "loss": 0.3345, "step": 8251 }, { "epoch": 1.2627390971690895, "grad_norm": 2.086198855742557, "learning_rate": 6.320459610727321e-06, "loss": 0.3561, "step": 8252 }, { "epoch": 1.2628921193573068, "grad_norm": 2.293317461350589, "learning_rate": 6.318155400149739e-06, "loss": 0.4263, "step": 8253 }, { "epoch": 1.2630451415455242, "grad_norm": 2.057507239735757, "learning_rate": 6.315851415688637e-06, "loss": 0.3783, "step": 8254 }, { "epoch": 1.2631981637337413, "grad_norm": 2.0939084818628655, "learning_rate": 6.313547657485516e-06, "loss": 0.4935, "step": 8255 }, { "epoch": 1.2633511859219586, "grad_norm": 2.0975012438371197, "learning_rate": 6.31124412568185e-06, "loss": 0.4144, "step": 8256 }, { "epoch": 1.263504208110176, "grad_norm": 2.212720946192638, "learning_rate": 6.3089408204191146e-06, "loss": 0.4442, "step": 8257 }, { "epoch": 1.2636572302983933, "grad_norm": 2.1433392764607047, "learning_rate": 6.306637741838766e-06, "loss": 0.4074, "step": 8258 }, { "epoch": 1.2638102524866106, "grad_norm": 1.9780751620760415, "learning_rate": 6.304334890082238e-06, "loss": 0.347, "step": 8259 }, { "epoch": 1.2639632746748277, "grad_norm": 1.8717079822783924, "learning_rate": 6.302032265290963e-06, "loss": 0.3133, "step": 8260 }, { "epoch": 1.2641162968630453, "grad_norm": 1.991064644715254, "learning_rate": 6.299729867606352e-06, "loss": 0.4103, "step": 8261 }, { "epoch": 1.2642693190512624, "grad_norm": 2.0474793196900722, "learning_rate": 6.297427697169809e-06, "loss": 0.3109, "step": 8262 }, { "epoch": 1.2644223412394797, "grad_norm": 2.0837694344823885, "learning_rate": 6.295125754122713e-06, "loss": 0.3487, "step": 8263 }, { "epoch": 1.264575363427697, "grad_norm": 2.0638674344607497, "learning_rate": 6.292824038606437e-06, "loss": 0.3289, "step": 8264 }, { "epoch": 1.2647283856159144, "grad_norm": 2.322091446527354, "learning_rate": 6.290522550762343e-06, "loss": 0.4061, "step": 8265 }, { "epoch": 1.2648814078041317, "grad_norm": 2.4768178687633124, "learning_rate": 6.288221290731769e-06, "loss": 0.4325, "step": 8266 }, { "epoch": 1.2650344299923488, "grad_norm": 2.193177524623937, "learning_rate": 6.285920258656043e-06, "loss": 0.4204, "step": 8267 }, { "epoch": 1.2651874521805662, "grad_norm": 2.428941265384786, "learning_rate": 6.283619454676489e-06, "loss": 0.4369, "step": 8268 }, { "epoch": 1.2653404743687835, "grad_norm": 2.3254280713571265, "learning_rate": 6.281318878934397e-06, "loss": 0.3982, "step": 8269 }, { "epoch": 1.2654934965570008, "grad_norm": 2.272008199503931, "learning_rate": 6.279018531571064e-06, "loss": 0.4191, "step": 8270 }, { "epoch": 1.2656465187452182, "grad_norm": 2.305652085924085, "learning_rate": 6.27671841272776e-06, "loss": 0.3955, "step": 8271 }, { "epoch": 1.2657995409334353, "grad_norm": 1.9353755654140936, "learning_rate": 6.27441852254574e-06, "loss": 0.3637, "step": 8272 }, { "epoch": 1.2659525631216526, "grad_norm": 1.8828491506877458, "learning_rate": 6.2721188611662575e-06, "loss": 0.2892, "step": 8273 }, { "epoch": 1.26610558530987, "grad_norm": 2.1776025532535623, "learning_rate": 6.269819428730539e-06, "loss": 0.4541, "step": 8274 }, { "epoch": 1.2662586074980873, "grad_norm": 1.999490178432348, "learning_rate": 6.267520225379797e-06, "loss": 0.3681, "step": 8275 }, { "epoch": 1.2664116296863046, "grad_norm": 2.253935070804977, "learning_rate": 6.265221251255245e-06, "loss": 0.3812, "step": 8276 }, { "epoch": 1.2665646518745217, "grad_norm": 2.002987514155831, "learning_rate": 6.262922506498063e-06, "loss": 0.3807, "step": 8277 }, { "epoch": 1.266717674062739, "grad_norm": 2.0505934288097167, "learning_rate": 6.26062399124943e-06, "loss": 0.3383, "step": 8278 }, { "epoch": 1.2668706962509564, "grad_norm": 2.0558883228802864, "learning_rate": 6.25832570565051e-06, "loss": 0.3349, "step": 8279 }, { "epoch": 1.2670237184391737, "grad_norm": 2.1637451051227834, "learning_rate": 6.25602764984244e-06, "loss": 0.3656, "step": 8280 }, { "epoch": 1.267176740627391, "grad_norm": 2.101792748438887, "learning_rate": 6.2537298239663615e-06, "loss": 0.3983, "step": 8281 }, { "epoch": 1.2673297628156082, "grad_norm": 1.9893459953979338, "learning_rate": 6.251432228163392e-06, "loss": 0.3411, "step": 8282 }, { "epoch": 1.2674827850038255, "grad_norm": 2.2123349417488734, "learning_rate": 6.249134862574631e-06, "loss": 0.4035, "step": 8283 }, { "epoch": 1.2676358071920428, "grad_norm": 1.9567029487218803, "learning_rate": 6.2468377273411715e-06, "loss": 0.3217, "step": 8284 }, { "epoch": 1.2677888293802602, "grad_norm": 2.25711180116385, "learning_rate": 6.244540822604088e-06, "loss": 0.3533, "step": 8285 }, { "epoch": 1.2679418515684775, "grad_norm": 2.1290431639737477, "learning_rate": 6.24224414850445e-06, "loss": 0.4025, "step": 8286 }, { "epoch": 1.2680948737566946, "grad_norm": 2.4206966590733434, "learning_rate": 6.239947705183296e-06, "loss": 0.4232, "step": 8287 }, { "epoch": 1.268247895944912, "grad_norm": 2.2201454182364677, "learning_rate": 6.23765149278166e-06, "loss": 0.3474, "step": 8288 }, { "epoch": 1.2684009181331293, "grad_norm": 2.0819815759252935, "learning_rate": 6.2353555114405695e-06, "loss": 0.3625, "step": 8289 }, { "epoch": 1.2685539403213466, "grad_norm": 2.6327659066634244, "learning_rate": 6.233059761301022e-06, "loss": 0.3831, "step": 8290 }, { "epoch": 1.268706962509564, "grad_norm": 2.358386566791711, "learning_rate": 6.2307642425040085e-06, "loss": 0.4258, "step": 8291 }, { "epoch": 1.268859984697781, "grad_norm": 2.406902397025713, "learning_rate": 6.228468955190512e-06, "loss": 0.4622, "step": 8292 }, { "epoch": 1.2690130068859986, "grad_norm": 2.1879106838578375, "learning_rate": 6.2261738995014885e-06, "loss": 0.4255, "step": 8293 }, { "epoch": 1.2691660290742157, "grad_norm": 2.0277613512524058, "learning_rate": 6.22387907557789e-06, "loss": 0.4056, "step": 8294 }, { "epoch": 1.269319051262433, "grad_norm": 2.172511714493054, "learning_rate": 6.221584483560652e-06, "loss": 0.3701, "step": 8295 }, { "epoch": 1.2694720734506504, "grad_norm": 1.9385020661538073, "learning_rate": 6.219290123590689e-06, "loss": 0.3819, "step": 8296 }, { "epoch": 1.2696250956388677, "grad_norm": 2.0184288428167183, "learning_rate": 6.21699599580891e-06, "loss": 0.3895, "step": 8297 }, { "epoch": 1.269778117827085, "grad_norm": 2.275096189344347, "learning_rate": 6.214702100356205e-06, "loss": 0.3442, "step": 8298 }, { "epoch": 1.2699311400153022, "grad_norm": 2.6762797079326064, "learning_rate": 6.212408437373456e-06, "loss": 0.3961, "step": 8299 }, { "epoch": 1.2700841622035195, "grad_norm": 2.2741365286187025, "learning_rate": 6.21011500700152e-06, "loss": 0.4131, "step": 8300 }, { "epoch": 1.2702371843917368, "grad_norm": 1.9190830243481853, "learning_rate": 6.207821809381247e-06, "loss": 0.2969, "step": 8301 }, { "epoch": 1.2703902065799542, "grad_norm": 2.2401792695423497, "learning_rate": 6.205528844653474e-06, "loss": 0.3656, "step": 8302 }, { "epoch": 1.2705432287681715, "grad_norm": 1.9741605060912868, "learning_rate": 6.2032361129590165e-06, "loss": 0.3255, "step": 8303 }, { "epoch": 1.2706962509563886, "grad_norm": 1.9776819900089568, "learning_rate": 6.200943614438681e-06, "loss": 0.3396, "step": 8304 }, { "epoch": 1.270849273144606, "grad_norm": 2.4727351802146096, "learning_rate": 6.1986513492332645e-06, "loss": 0.4541, "step": 8305 }, { "epoch": 1.2710022953328233, "grad_norm": 1.762131739082361, "learning_rate": 6.196359317483536e-06, "loss": 0.2889, "step": 8306 }, { "epoch": 1.2711553175210406, "grad_norm": 2.077140126012181, "learning_rate": 6.1940675193302625e-06, "loss": 0.3449, "step": 8307 }, { "epoch": 1.271308339709258, "grad_norm": 1.966671711976856, "learning_rate": 6.191775954914195e-06, "loss": 0.3855, "step": 8308 }, { "epoch": 1.271461361897475, "grad_norm": 1.9752626408364398, "learning_rate": 6.189484624376059e-06, "loss": 0.321, "step": 8309 }, { "epoch": 1.2716143840856924, "grad_norm": 2.1434276190951, "learning_rate": 6.187193527856581e-06, "loss": 0.4265, "step": 8310 }, { "epoch": 1.2717674062739097, "grad_norm": 2.145563444426424, "learning_rate": 6.184902665496466e-06, "loss": 0.34, "step": 8311 }, { "epoch": 1.271920428462127, "grad_norm": 1.9283577771018996, "learning_rate": 6.182612037436399e-06, "loss": 0.3911, "step": 8312 }, { "epoch": 1.2720734506503444, "grad_norm": 2.3122756860692784, "learning_rate": 6.180321643817062e-06, "loss": 0.3537, "step": 8313 }, { "epoch": 1.2722264728385615, "grad_norm": 2.164697408819195, "learning_rate": 6.178031484779115e-06, "loss": 0.353, "step": 8314 }, { "epoch": 1.2723794950267788, "grad_norm": 2.0017443136895654, "learning_rate": 6.1757415604632084e-06, "loss": 0.3517, "step": 8315 }, { "epoch": 1.2725325172149962, "grad_norm": 2.4725242851848463, "learning_rate": 6.173451871009974e-06, "loss": 0.3811, "step": 8316 }, { "epoch": 1.2726855394032135, "grad_norm": 1.9614402340989665, "learning_rate": 6.1711624165600255e-06, "loss": 0.3094, "step": 8317 }, { "epoch": 1.2728385615914308, "grad_norm": 2.4254988450390393, "learning_rate": 6.168873197253975e-06, "loss": 0.3652, "step": 8318 }, { "epoch": 1.272991583779648, "grad_norm": 2.3981313138203686, "learning_rate": 6.1665842132324095e-06, "loss": 0.4186, "step": 8319 }, { "epoch": 1.2731446059678653, "grad_norm": 2.1409247382783057, "learning_rate": 6.164295464635899e-06, "loss": 0.3752, "step": 8320 }, { "epoch": 1.2732976281560826, "grad_norm": 2.1396974459941616, "learning_rate": 6.162006951605015e-06, "loss": 0.3566, "step": 8321 }, { "epoch": 1.2734506503443, "grad_norm": 1.9810680981670772, "learning_rate": 6.159718674280294e-06, "loss": 0.3446, "step": 8322 }, { "epoch": 1.2736036725325173, "grad_norm": 2.049489820508499, "learning_rate": 6.157430632802275e-06, "loss": 0.3551, "step": 8323 }, { "epoch": 1.2737566947207344, "grad_norm": 2.1250524285136674, "learning_rate": 6.1551428273114745e-06, "loss": 0.3756, "step": 8324 }, { "epoch": 1.2739097169089517, "grad_norm": 2.0211598009609637, "learning_rate": 6.15285525794839e-06, "loss": 0.3541, "step": 8325 }, { "epoch": 1.274062739097169, "grad_norm": 2.1857253921429525, "learning_rate": 6.150567924853516e-06, "loss": 0.4467, "step": 8326 }, { "epoch": 1.2742157612853864, "grad_norm": 2.4665073319611563, "learning_rate": 6.1482808281673275e-06, "loss": 0.4236, "step": 8327 }, { "epoch": 1.2743687834736037, "grad_norm": 2.0913011820808998, "learning_rate": 6.1459939680302774e-06, "loss": 0.3339, "step": 8328 }, { "epoch": 1.2745218056618208, "grad_norm": 1.952019493038919, "learning_rate": 6.143707344582817e-06, "loss": 0.3398, "step": 8329 }, { "epoch": 1.2746748278500384, "grad_norm": 2.2720660090661906, "learning_rate": 6.141420957965372e-06, "loss": 0.4193, "step": 8330 }, { "epoch": 1.2748278500382555, "grad_norm": 1.9846585102425593, "learning_rate": 6.1391348083183635e-06, "loss": 0.2835, "step": 8331 }, { "epoch": 1.2749808722264728, "grad_norm": 2.2036433802674287, "learning_rate": 6.13684889578219e-06, "loss": 0.3277, "step": 8332 }, { "epoch": 1.2751338944146902, "grad_norm": 1.9806487568109303, "learning_rate": 6.134563220497236e-06, "loss": 0.2871, "step": 8333 }, { "epoch": 1.2752869166029075, "grad_norm": 1.9513522399246421, "learning_rate": 6.132277782603879e-06, "loss": 0.3365, "step": 8334 }, { "epoch": 1.2754399387911248, "grad_norm": 2.165274496595406, "learning_rate": 6.129992582242472e-06, "loss": 0.3678, "step": 8335 }, { "epoch": 1.275592960979342, "grad_norm": 1.9969987826577444, "learning_rate": 6.127707619553361e-06, "loss": 0.4157, "step": 8336 }, { "epoch": 1.2757459831675593, "grad_norm": 2.265249391828096, "learning_rate": 6.125422894676876e-06, "loss": 0.394, "step": 8337 }, { "epoch": 1.2758990053557766, "grad_norm": 2.116122460738184, "learning_rate": 6.123138407753324e-06, "loss": 0.3288, "step": 8338 }, { "epoch": 1.276052027543994, "grad_norm": 1.8821468642653791, "learning_rate": 6.120854158923011e-06, "loss": 0.3307, "step": 8339 }, { "epoch": 1.2762050497322113, "grad_norm": 1.9513607257134291, "learning_rate": 6.118570148326222e-06, "loss": 0.2738, "step": 8340 }, { "epoch": 1.2763580719204284, "grad_norm": 1.9936208113589038, "learning_rate": 6.11628637610322e-06, "loss": 0.3224, "step": 8341 }, { "epoch": 1.2765110941086457, "grad_norm": 2.3496749593098785, "learning_rate": 6.114002842394267e-06, "loss": 0.5041, "step": 8342 }, { "epoch": 1.276664116296863, "grad_norm": 2.0713929832468616, "learning_rate": 6.1117195473395985e-06, "loss": 0.3629, "step": 8343 }, { "epoch": 1.2768171384850804, "grad_norm": 2.2931273716137355, "learning_rate": 6.109436491079449e-06, "loss": 0.401, "step": 8344 }, { "epoch": 1.2769701606732977, "grad_norm": 1.9863701102849063, "learning_rate": 6.1071536737540225e-06, "loss": 0.3444, "step": 8345 }, { "epoch": 1.2771231828615148, "grad_norm": 1.7369545129822166, "learning_rate": 6.104871095503515e-06, "loss": 0.3197, "step": 8346 }, { "epoch": 1.2772762050497322, "grad_norm": 2.077382461800277, "learning_rate": 6.102588756468115e-06, "loss": 0.382, "step": 8347 }, { "epoch": 1.2774292272379495, "grad_norm": 2.494916291650205, "learning_rate": 6.100306656787986e-06, "loss": 0.3721, "step": 8348 }, { "epoch": 1.2775822494261668, "grad_norm": 1.9543398821885236, "learning_rate": 6.098024796603277e-06, "loss": 0.374, "step": 8349 }, { "epoch": 1.2777352716143842, "grad_norm": 2.0822556782993784, "learning_rate": 6.095743176054134e-06, "loss": 0.3969, "step": 8350 }, { "epoch": 1.2778882938026013, "grad_norm": 2.0266313057284635, "learning_rate": 6.093461795280673e-06, "loss": 0.3629, "step": 8351 }, { "epoch": 1.2780413159908186, "grad_norm": 2.2597584533701247, "learning_rate": 6.091180654423008e-06, "loss": 0.3956, "step": 8352 }, { "epoch": 1.278194338179036, "grad_norm": 2.156520273496613, "learning_rate": 6.0888997536212314e-06, "loss": 0.3296, "step": 8353 }, { "epoch": 1.2783473603672533, "grad_norm": 2.0934949462261416, "learning_rate": 6.086619093015416e-06, "loss": 0.3407, "step": 8354 }, { "epoch": 1.2785003825554706, "grad_norm": 2.169281922759286, "learning_rate": 6.0843386727456355e-06, "loss": 0.3667, "step": 8355 }, { "epoch": 1.2786534047436877, "grad_norm": 2.22918601294946, "learning_rate": 6.0820584929519375e-06, "loss": 0.3654, "step": 8356 }, { "epoch": 1.278806426931905, "grad_norm": 2.6905374498040087, "learning_rate": 6.079778553774347e-06, "loss": 0.4124, "step": 8357 }, { "epoch": 1.2789594491201224, "grad_norm": 2.2477897591567015, "learning_rate": 6.077498855352899e-06, "loss": 0.4542, "step": 8358 }, { "epoch": 1.2791124713083397, "grad_norm": 2.2985892832882864, "learning_rate": 6.075219397827585e-06, "loss": 0.3577, "step": 8359 }, { "epoch": 1.279265493496557, "grad_norm": 2.1801673874314336, "learning_rate": 6.072940181338406e-06, "loss": 0.4154, "step": 8360 }, { "epoch": 1.2794185156847742, "grad_norm": 2.069130573415728, "learning_rate": 6.070661206025334e-06, "loss": 0.3956, "step": 8361 }, { "epoch": 1.2795715378729917, "grad_norm": 2.0048130565920155, "learning_rate": 6.068382472028325e-06, "loss": 0.3135, "step": 8362 }, { "epoch": 1.2797245600612088, "grad_norm": 1.88601860046552, "learning_rate": 6.066103979487332e-06, "loss": 0.3595, "step": 8363 }, { "epoch": 1.2798775822494262, "grad_norm": 2.308966911833846, "learning_rate": 6.0638257285422845e-06, "loss": 0.3559, "step": 8364 }, { "epoch": 1.2800306044376435, "grad_norm": 1.9954425438063566, "learning_rate": 6.061547719333092e-06, "loss": 0.4064, "step": 8365 }, { "epoch": 1.2801836266258608, "grad_norm": 2.2874589176202775, "learning_rate": 6.059269951999665e-06, "loss": 0.3284, "step": 8366 }, { "epoch": 1.2803366488140782, "grad_norm": 2.281622447058958, "learning_rate": 6.0569924266818844e-06, "loss": 0.4108, "step": 8367 }, { "epoch": 1.2804896710022953, "grad_norm": 2.206075216573966, "learning_rate": 6.054715143519627e-06, "loss": 0.366, "step": 8368 }, { "epoch": 1.2806426931905126, "grad_norm": 2.2776572125639842, "learning_rate": 6.052438102652746e-06, "loss": 0.4196, "step": 8369 }, { "epoch": 1.28079571537873, "grad_norm": 1.7869163264626549, "learning_rate": 6.05016130422108e-06, "loss": 0.3237, "step": 8370 }, { "epoch": 1.2809487375669473, "grad_norm": 2.1341648244510423, "learning_rate": 6.047884748364467e-06, "loss": 0.3737, "step": 8371 }, { "epoch": 1.2811017597551646, "grad_norm": 2.18049711268148, "learning_rate": 6.0456084352227065e-06, "loss": 0.409, "step": 8372 }, { "epoch": 1.2812547819433817, "grad_norm": 2.1175669065339093, "learning_rate": 6.043332364935603e-06, "loss": 0.3558, "step": 8373 }, { "epoch": 1.281407804131599, "grad_norm": 2.173177324049422, "learning_rate": 6.041056537642942e-06, "loss": 0.336, "step": 8374 }, { "epoch": 1.2815608263198164, "grad_norm": 2.0800240094069697, "learning_rate": 6.0387809534844795e-06, "loss": 0.3442, "step": 8375 }, { "epoch": 1.2817138485080337, "grad_norm": 1.906890683579724, "learning_rate": 6.03650561259998e-06, "loss": 0.3571, "step": 8376 }, { "epoch": 1.281866870696251, "grad_norm": 1.989073502947194, "learning_rate": 6.0342305151291755e-06, "loss": 0.3221, "step": 8377 }, { "epoch": 1.2820198928844682, "grad_norm": 1.816191193612736, "learning_rate": 6.031955661211788e-06, "loss": 0.3345, "step": 8378 }, { "epoch": 1.2821729150726855, "grad_norm": 2.0046077691510362, "learning_rate": 6.029681050987526e-06, "loss": 0.3253, "step": 8379 }, { "epoch": 1.2823259372609028, "grad_norm": 1.998886441065847, "learning_rate": 6.027406684596082e-06, "loss": 0.358, "step": 8380 }, { "epoch": 1.2824789594491202, "grad_norm": 2.301187711651734, "learning_rate": 6.025132562177139e-06, "loss": 0.3304, "step": 8381 }, { "epoch": 1.2826319816373375, "grad_norm": 2.1476577961675627, "learning_rate": 6.022858683870351e-06, "loss": 0.3894, "step": 8382 }, { "epoch": 1.2827850038255546, "grad_norm": 2.245599970159706, "learning_rate": 6.020585049815368e-06, "loss": 0.402, "step": 8383 }, { "epoch": 1.282938026013772, "grad_norm": 1.8652376913870043, "learning_rate": 6.01831166015183e-06, "loss": 0.3539, "step": 8384 }, { "epoch": 1.2830910482019893, "grad_norm": 2.091989916493354, "learning_rate": 6.016038515019347e-06, "loss": 0.3117, "step": 8385 }, { "epoch": 1.2832440703902066, "grad_norm": 2.2039189929629255, "learning_rate": 6.013765614557522e-06, "loss": 0.4243, "step": 8386 }, { "epoch": 1.283397092578424, "grad_norm": 2.2239495888325886, "learning_rate": 6.011492958905949e-06, "loss": 0.3114, "step": 8387 }, { "epoch": 1.283550114766641, "grad_norm": 2.1535282910124747, "learning_rate": 6.009220548204193e-06, "loss": 0.383, "step": 8388 }, { "epoch": 1.2837031369548584, "grad_norm": 1.8290585796153886, "learning_rate": 6.006948382591816e-06, "loss": 0.2985, "step": 8389 }, { "epoch": 1.2838561591430757, "grad_norm": 2.023290060036757, "learning_rate": 6.004676462208363e-06, "loss": 0.3981, "step": 8390 }, { "epoch": 1.284009181331293, "grad_norm": 1.914516702960148, "learning_rate": 6.002404787193352e-06, "loss": 0.2963, "step": 8391 }, { "epoch": 1.2841622035195104, "grad_norm": 2.1996232859634914, "learning_rate": 6.000133357686306e-06, "loss": 0.3343, "step": 8392 }, { "epoch": 1.2843152257077275, "grad_norm": 2.190271452100412, "learning_rate": 5.99786217382672e-06, "loss": 0.3885, "step": 8393 }, { "epoch": 1.284468247895945, "grad_norm": 2.3640070841460195, "learning_rate": 5.995591235754069e-06, "loss": 0.4181, "step": 8394 }, { "epoch": 1.2846212700841622, "grad_norm": 2.0388498245605, "learning_rate": 5.993320543607828e-06, "loss": 0.3608, "step": 8395 }, { "epoch": 1.2847742922723795, "grad_norm": 2.2357999558982247, "learning_rate": 5.9910500975274456e-06, "loss": 0.3957, "step": 8396 }, { "epoch": 1.2849273144605968, "grad_norm": 2.041737865466858, "learning_rate": 5.988779897652364e-06, "loss": 0.4002, "step": 8397 }, { "epoch": 1.2850803366488142, "grad_norm": 2.239729576172599, "learning_rate": 5.986509944121996e-06, "loss": 0.3372, "step": 8398 }, { "epoch": 1.2852333588370315, "grad_norm": 2.2708251624696763, "learning_rate": 5.984240237075754e-06, "loss": 0.3929, "step": 8399 }, { "epoch": 1.2853863810252486, "grad_norm": 1.9576468807793188, "learning_rate": 5.981970776653032e-06, "loss": 0.3, "step": 8400 }, { "epoch": 1.285539403213466, "grad_norm": 2.348142708232122, "learning_rate": 5.979701562993199e-06, "loss": 0.4128, "step": 8401 }, { "epoch": 1.2856924254016833, "grad_norm": 2.225493045946411, "learning_rate": 5.977432596235623e-06, "loss": 0.4226, "step": 8402 }, { "epoch": 1.2858454475899006, "grad_norm": 2.0902073882347283, "learning_rate": 5.9751638765196494e-06, "loss": 0.3488, "step": 8403 }, { "epoch": 1.285998469778118, "grad_norm": 2.0223018514828786, "learning_rate": 5.972895403984603e-06, "loss": 0.3408, "step": 8404 }, { "epoch": 1.286151491966335, "grad_norm": 2.171673961595489, "learning_rate": 5.970627178769806e-06, "loss": 0.4067, "step": 8405 }, { "epoch": 1.2863045141545524, "grad_norm": 2.216297180724609, "learning_rate": 5.96835920101456e-06, "loss": 0.3865, "step": 8406 }, { "epoch": 1.2864575363427697, "grad_norm": 2.224480053286395, "learning_rate": 5.966091470858142e-06, "loss": 0.3895, "step": 8407 }, { "epoch": 1.286610558530987, "grad_norm": 2.0159160285134994, "learning_rate": 5.96382398843983e-06, "loss": 0.3411, "step": 8408 }, { "epoch": 1.2867635807192044, "grad_norm": 1.9803037406134543, "learning_rate": 5.961556753898874e-06, "loss": 0.3131, "step": 8409 }, { "epoch": 1.2869166029074215, "grad_norm": 1.9246640134321054, "learning_rate": 5.959289767374521e-06, "loss": 0.2991, "step": 8410 }, { "epoch": 1.2870696250956388, "grad_norm": 2.29223275943616, "learning_rate": 5.957023029005988e-06, "loss": 0.3997, "step": 8411 }, { "epoch": 1.2872226472838562, "grad_norm": 2.0759714800081297, "learning_rate": 5.9547565389324865e-06, "loss": 0.3184, "step": 8412 }, { "epoch": 1.2873756694720735, "grad_norm": 2.345956476444365, "learning_rate": 5.952490297293214e-06, "loss": 0.406, "step": 8413 }, { "epoch": 1.2875286916602908, "grad_norm": 2.1234583404676997, "learning_rate": 5.950224304227345e-06, "loss": 0.3872, "step": 8414 }, { "epoch": 1.287681713848508, "grad_norm": 2.3341118330349926, "learning_rate": 5.947958559874041e-06, "loss": 0.3964, "step": 8415 }, { "epoch": 1.2878347360367253, "grad_norm": 2.346952170027564, "learning_rate": 5.945693064372457e-06, "loss": 0.3331, "step": 8416 }, { "epoch": 1.2879877582249426, "grad_norm": 2.0502283032629234, "learning_rate": 5.94342781786172e-06, "loss": 0.3677, "step": 8417 }, { "epoch": 1.28814078041316, "grad_norm": 2.3096833762404616, "learning_rate": 5.941162820480952e-06, "loss": 0.3422, "step": 8418 }, { "epoch": 1.2882938026013773, "grad_norm": 2.138229007243028, "learning_rate": 5.9388980723692545e-06, "loss": 0.3992, "step": 8419 }, { "epoch": 1.2884468247895944, "grad_norm": 2.262793774066718, "learning_rate": 5.936633573665711e-06, "loss": 0.3245, "step": 8420 }, { "epoch": 1.2885998469778117, "grad_norm": 1.9477409894816895, "learning_rate": 5.934369324509397e-06, "loss": 0.2831, "step": 8421 }, { "epoch": 1.288752869166029, "grad_norm": 2.2281573276173394, "learning_rate": 5.93210532503937e-06, "loss": 0.3967, "step": 8422 }, { "epoch": 1.2889058913542464, "grad_norm": 2.22880743175084, "learning_rate": 5.929841575394663e-06, "loss": 0.3822, "step": 8423 }, { "epoch": 1.2890589135424637, "grad_norm": 1.965550070449436, "learning_rate": 5.9275780757143105e-06, "loss": 0.3124, "step": 8424 }, { "epoch": 1.2892119357306808, "grad_norm": 2.027513364688824, "learning_rate": 5.9253148261373185e-06, "loss": 0.3242, "step": 8425 }, { "epoch": 1.2893649579188982, "grad_norm": 2.2444218795153486, "learning_rate": 5.923051826802686e-06, "loss": 0.3393, "step": 8426 }, { "epoch": 1.2895179801071155, "grad_norm": 2.4148438662955822, "learning_rate": 5.92078907784939e-06, "loss": 0.3669, "step": 8427 }, { "epoch": 1.2896710022953328, "grad_norm": 2.2021012890604483, "learning_rate": 5.9185265794163905e-06, "loss": 0.3546, "step": 8428 }, { "epoch": 1.2898240244835502, "grad_norm": 2.1678300062988622, "learning_rate": 5.916264331642645e-06, "loss": 0.3763, "step": 8429 }, { "epoch": 1.2899770466717673, "grad_norm": 2.070445823425188, "learning_rate": 5.9140023346670815e-06, "loss": 0.3678, "step": 8430 }, { "epoch": 1.2901300688599848, "grad_norm": 2.2804935664834383, "learning_rate": 5.911740588628616e-06, "loss": 0.379, "step": 8431 }, { "epoch": 1.290283091048202, "grad_norm": 2.0648217974475305, "learning_rate": 5.9094790936661594e-06, "loss": 0.3711, "step": 8432 }, { "epoch": 1.2904361132364193, "grad_norm": 2.133264133751307, "learning_rate": 5.907217849918588e-06, "loss": 0.4078, "step": 8433 }, { "epoch": 1.2905891354246366, "grad_norm": 2.0061436737092135, "learning_rate": 5.904956857524784e-06, "loss": 0.4, "step": 8434 }, { "epoch": 1.290742157612854, "grad_norm": 1.9897296088214493, "learning_rate": 5.902696116623599e-06, "loss": 0.3381, "step": 8435 }, { "epoch": 1.2908951798010713, "grad_norm": 1.9672303748095752, "learning_rate": 5.900435627353868e-06, "loss": 0.3526, "step": 8436 }, { "epoch": 1.2910482019892884, "grad_norm": 2.3082399010119192, "learning_rate": 5.898175389854431e-06, "loss": 0.4296, "step": 8437 }, { "epoch": 1.2912012241775057, "grad_norm": 2.043754552160689, "learning_rate": 5.8959154042640855e-06, "loss": 0.3501, "step": 8438 }, { "epoch": 1.291354246365723, "grad_norm": 2.0111371417155053, "learning_rate": 5.893655670721632e-06, "loss": 0.3295, "step": 8439 }, { "epoch": 1.2915072685539404, "grad_norm": 2.1845344064148677, "learning_rate": 5.891396189365849e-06, "loss": 0.3611, "step": 8440 }, { "epoch": 1.2916602907421577, "grad_norm": 2.3597698023948865, "learning_rate": 5.889136960335496e-06, "loss": 0.3903, "step": 8441 }, { "epoch": 1.2918133129303748, "grad_norm": 2.2960601544175567, "learning_rate": 5.886877983769324e-06, "loss": 0.3677, "step": 8442 }, { "epoch": 1.2919663351185922, "grad_norm": 2.4107761408220028, "learning_rate": 5.884619259806069e-06, "loss": 0.357, "step": 8443 }, { "epoch": 1.2921193573068095, "grad_norm": 2.211778674971672, "learning_rate": 5.882360788584441e-06, "loss": 0.3633, "step": 8444 }, { "epoch": 1.2922723794950268, "grad_norm": 2.210377762764795, "learning_rate": 5.880102570243147e-06, "loss": 0.4351, "step": 8445 }, { "epoch": 1.2924254016832442, "grad_norm": 2.0353077772274246, "learning_rate": 5.877844604920869e-06, "loss": 0.3138, "step": 8446 }, { "epoch": 1.2925784238714613, "grad_norm": 2.1796447244885417, "learning_rate": 5.875586892756284e-06, "loss": 0.4046, "step": 8447 }, { "epoch": 1.2927314460596786, "grad_norm": 2.1991555410920003, "learning_rate": 5.873329433888042e-06, "loss": 0.3785, "step": 8448 }, { "epoch": 1.292884468247896, "grad_norm": 2.214367249698276, "learning_rate": 5.87107222845478e-06, "loss": 0.3908, "step": 8449 }, { "epoch": 1.2930374904361133, "grad_norm": 2.261557704663619, "learning_rate": 5.8688152765951274e-06, "loss": 0.4244, "step": 8450 }, { "epoch": 1.2931905126243306, "grad_norm": 2.099925902912094, "learning_rate": 5.86655857844769e-06, "loss": 0.359, "step": 8451 }, { "epoch": 1.2933435348125477, "grad_norm": 2.5580065217380406, "learning_rate": 5.8643021341510576e-06, "loss": 0.3966, "step": 8452 }, { "epoch": 1.293496557000765, "grad_norm": 2.334103559025696, "learning_rate": 5.8620459438438145e-06, "loss": 0.3454, "step": 8453 }, { "epoch": 1.2936495791889824, "grad_norm": 2.208197675578791, "learning_rate": 5.8597900076645135e-06, "loss": 0.4068, "step": 8454 }, { "epoch": 1.2938026013771997, "grad_norm": 2.158637856805305, "learning_rate": 5.857534325751706e-06, "loss": 0.4008, "step": 8455 }, { "epoch": 1.293955623565417, "grad_norm": 2.076159021580232, "learning_rate": 5.8552788982439234e-06, "loss": 0.3684, "step": 8456 }, { "epoch": 1.2941086457536342, "grad_norm": 2.2118198370758497, "learning_rate": 5.853023725279675e-06, "loss": 0.4319, "step": 8457 }, { "epoch": 1.2942616679418515, "grad_norm": 2.0053899150292107, "learning_rate": 5.850768806997465e-06, "loss": 0.3374, "step": 8458 }, { "epoch": 1.2944146901300688, "grad_norm": 2.1800900300923005, "learning_rate": 5.848514143535774e-06, "loss": 0.4282, "step": 8459 }, { "epoch": 1.2945677123182862, "grad_norm": 2.2040001586963505, "learning_rate": 5.846259735033068e-06, "loss": 0.3899, "step": 8460 }, { "epoch": 1.2947207345065035, "grad_norm": 2.14628589047775, "learning_rate": 5.844005581627803e-06, "loss": 0.3827, "step": 8461 }, { "epoch": 1.2948737566947206, "grad_norm": 1.955815144083365, "learning_rate": 5.84175168345841e-06, "loss": 0.2907, "step": 8462 }, { "epoch": 1.2950267788829382, "grad_norm": 2.2990986634509496, "learning_rate": 5.8394980406633185e-06, "loss": 0.3976, "step": 8463 }, { "epoch": 1.2951798010711553, "grad_norm": 2.3551470293805488, "learning_rate": 5.837244653380929e-06, "loss": 0.4031, "step": 8464 }, { "epoch": 1.2953328232593726, "grad_norm": 2.0455913786984032, "learning_rate": 5.834991521749623e-06, "loss": 0.3854, "step": 8465 }, { "epoch": 1.29548584544759, "grad_norm": 2.295691500876195, "learning_rate": 5.832738645907786e-06, "loss": 0.4205, "step": 8466 }, { "epoch": 1.2956388676358073, "grad_norm": 2.0712979508795573, "learning_rate": 5.830486025993775e-06, "loss": 0.3819, "step": 8467 }, { "epoch": 1.2957918898240246, "grad_norm": 2.1216828024914287, "learning_rate": 5.82823366214592e-06, "loss": 0.3732, "step": 8468 }, { "epoch": 1.2959449120122417, "grad_norm": 1.850364780767729, "learning_rate": 5.82598155450256e-06, "loss": 0.3449, "step": 8469 }, { "epoch": 1.296097934200459, "grad_norm": 2.0162996634036245, "learning_rate": 5.823729703202001e-06, "loss": 0.3521, "step": 8470 }, { "epoch": 1.2962509563886764, "grad_norm": 1.8674820045225895, "learning_rate": 5.82147810838254e-06, "loss": 0.3005, "step": 8471 }, { "epoch": 1.2964039785768937, "grad_norm": 2.190393768245424, "learning_rate": 5.819226770182453e-06, "loss": 0.4191, "step": 8472 }, { "epoch": 1.296557000765111, "grad_norm": 2.494076811857044, "learning_rate": 5.816975688740007e-06, "loss": 0.4447, "step": 8473 }, { "epoch": 1.2967100229533282, "grad_norm": 2.113249050025513, "learning_rate": 5.814724864193449e-06, "loss": 0.3346, "step": 8474 }, { "epoch": 1.2968630451415455, "grad_norm": 2.3310835110243144, "learning_rate": 5.8124742966810075e-06, "loss": 0.5078, "step": 8475 }, { "epoch": 1.2970160673297628, "grad_norm": 2.083303137693278, "learning_rate": 5.810223986340901e-06, "loss": 0.3528, "step": 8476 }, { "epoch": 1.2971690895179802, "grad_norm": 2.081771653631222, "learning_rate": 5.807973933311332e-06, "loss": 0.3542, "step": 8477 }, { "epoch": 1.2973221117061975, "grad_norm": 2.2464382632521436, "learning_rate": 5.805724137730482e-06, "loss": 0.3513, "step": 8478 }, { "epoch": 1.2974751338944146, "grad_norm": 2.0902690420170305, "learning_rate": 5.803474599736521e-06, "loss": 0.334, "step": 8479 }, { "epoch": 1.297628156082632, "grad_norm": 1.977981981638016, "learning_rate": 5.801225319467602e-06, "loss": 0.35, "step": 8480 }, { "epoch": 1.2977811782708493, "grad_norm": 2.6637236988962694, "learning_rate": 5.798976297061861e-06, "loss": 0.4043, "step": 8481 }, { "epoch": 1.2979342004590666, "grad_norm": 2.2270672736281747, "learning_rate": 5.79672753265742e-06, "loss": 0.3494, "step": 8482 }, { "epoch": 1.298087222647284, "grad_norm": 2.0626813380311, "learning_rate": 5.794479026392381e-06, "loss": 0.3174, "step": 8483 }, { "epoch": 1.298240244835501, "grad_norm": 2.392483617852613, "learning_rate": 5.792230778404846e-06, "loss": 0.3716, "step": 8484 }, { "epoch": 1.2983932670237184, "grad_norm": 1.9548439126657664, "learning_rate": 5.789982788832875e-06, "loss": 0.3328, "step": 8485 }, { "epoch": 1.2985462892119357, "grad_norm": 2.2712771203487097, "learning_rate": 5.7877350578145265e-06, "loss": 0.4683, "step": 8486 }, { "epoch": 1.298699311400153, "grad_norm": 2.1349081647901396, "learning_rate": 5.785487585487855e-06, "loss": 0.3355, "step": 8487 }, { "epoch": 1.2988523335883704, "grad_norm": 2.0144446923386585, "learning_rate": 5.7832403719908735e-06, "loss": 0.3535, "step": 8488 }, { "epoch": 1.2990053557765875, "grad_norm": 2.323913427366677, "learning_rate": 5.7809934174615935e-06, "loss": 0.3846, "step": 8489 }, { "epoch": 1.2991583779648048, "grad_norm": 2.2065277565739647, "learning_rate": 5.77874672203802e-06, "loss": 0.405, "step": 8490 }, { "epoch": 1.2993114001530222, "grad_norm": 2.0701072367879694, "learning_rate": 5.776500285858115e-06, "loss": 0.3591, "step": 8491 }, { "epoch": 1.2994644223412395, "grad_norm": 2.026467391159173, "learning_rate": 5.774254109059856e-06, "loss": 0.3801, "step": 8492 }, { "epoch": 1.2996174445294568, "grad_norm": 2.03636713492077, "learning_rate": 5.772008191781187e-06, "loss": 0.4068, "step": 8493 }, { "epoch": 1.299770466717674, "grad_norm": 2.235728964125411, "learning_rate": 5.769762534160026e-06, "loss": 0.4005, "step": 8494 }, { "epoch": 1.2999234889058915, "grad_norm": 2.2226179990971597, "learning_rate": 5.7675171363343e-06, "loss": 0.3848, "step": 8495 }, { "epoch": 1.3000765110941086, "grad_norm": 1.8548276674323887, "learning_rate": 5.765271998441909e-06, "loss": 0.2832, "step": 8496 }, { "epoch": 1.300229533282326, "grad_norm": 2.058654357219985, "learning_rate": 5.763027120620723e-06, "loss": 0.3759, "step": 8497 }, { "epoch": 1.3003825554705433, "grad_norm": 2.1669192089657128, "learning_rate": 5.760782503008622e-06, "loss": 0.3865, "step": 8498 }, { "epoch": 1.3005355776587606, "grad_norm": 2.2800499466539854, "learning_rate": 5.758538145743448e-06, "loss": 0.4721, "step": 8499 }, { "epoch": 1.300688599846978, "grad_norm": 1.8692911785721809, "learning_rate": 5.7562940489630424e-06, "loss": 0.3094, "step": 8500 }, { "epoch": 1.300841622035195, "grad_norm": 2.3481643066715803, "learning_rate": 5.75405021280522e-06, "loss": 0.3394, "step": 8501 }, { "epoch": 1.3009946442234124, "grad_norm": 2.3364862272013154, "learning_rate": 5.751806637407783e-06, "loss": 0.4305, "step": 8502 }, { "epoch": 1.3011476664116297, "grad_norm": 2.106806897595338, "learning_rate": 5.7495633229085205e-06, "loss": 0.3885, "step": 8503 }, { "epoch": 1.301300688599847, "grad_norm": 2.131244287327404, "learning_rate": 5.747320269445203e-06, "loss": 0.3757, "step": 8504 }, { "epoch": 1.3014537107880644, "grad_norm": 1.9915545641442498, "learning_rate": 5.745077477155582e-06, "loss": 0.3304, "step": 8505 }, { "epoch": 1.3016067329762815, "grad_norm": 1.932676633650167, "learning_rate": 5.7428349461774e-06, "loss": 0.3865, "step": 8506 }, { "epoch": 1.3017597551644988, "grad_norm": 2.3799151262160496, "learning_rate": 5.740592676648377e-06, "loss": 0.366, "step": 8507 }, { "epoch": 1.3019127773527162, "grad_norm": 2.394625708639643, "learning_rate": 5.7383506687062215e-06, "loss": 0.4086, "step": 8508 }, { "epoch": 1.3020657995409335, "grad_norm": 1.9055085009689208, "learning_rate": 5.736108922488624e-06, "loss": 0.3348, "step": 8509 }, { "epoch": 1.3022188217291508, "grad_norm": 2.053303277029518, "learning_rate": 5.733867438133256e-06, "loss": 0.3045, "step": 8510 }, { "epoch": 1.302371843917368, "grad_norm": 2.169386364248647, "learning_rate": 5.731626215777779e-06, "loss": 0.3458, "step": 8511 }, { "epoch": 1.3025248661055853, "grad_norm": 2.127208555931068, "learning_rate": 5.72938525555983e-06, "loss": 0.3355, "step": 8512 }, { "epoch": 1.3026778882938026, "grad_norm": 2.0541456236284463, "learning_rate": 5.727144557617047e-06, "loss": 0.4235, "step": 8513 }, { "epoch": 1.30283091048202, "grad_norm": 2.1306174022427053, "learning_rate": 5.724904122087028e-06, "loss": 0.3497, "step": 8514 }, { "epoch": 1.3029839326702373, "grad_norm": 1.9520284290277785, "learning_rate": 5.722663949107368e-06, "loss": 0.4199, "step": 8515 }, { "epoch": 1.3031369548584544, "grad_norm": 2.319002474956079, "learning_rate": 5.720424038815655e-06, "loss": 0.3666, "step": 8516 }, { "epoch": 1.3032899770466717, "grad_norm": 2.193066933790192, "learning_rate": 5.71818439134944e-06, "loss": 0.3682, "step": 8517 }, { "epoch": 1.303442999234889, "grad_norm": 2.064281725208092, "learning_rate": 5.715945006846269e-06, "loss": 0.3089, "step": 8518 }, { "epoch": 1.3035960214231064, "grad_norm": 2.165584682981705, "learning_rate": 5.713705885443681e-06, "loss": 0.4026, "step": 8519 }, { "epoch": 1.3037490436113237, "grad_norm": 2.1493436478964374, "learning_rate": 5.711467027279174e-06, "loss": 0.342, "step": 8520 }, { "epoch": 1.3039020657995408, "grad_norm": 1.7198630036611893, "learning_rate": 5.709228432490259e-06, "loss": 0.3119, "step": 8521 }, { "epoch": 1.3040550879877582, "grad_norm": 2.053429899418811, "learning_rate": 5.7069901012144155e-06, "loss": 0.3461, "step": 8522 }, { "epoch": 1.3042081101759755, "grad_norm": 2.191895371424377, "learning_rate": 5.704752033589096e-06, "loss": 0.3762, "step": 8523 }, { "epoch": 1.3043611323641928, "grad_norm": 2.176273855972572, "learning_rate": 5.702514229751761e-06, "loss": 0.3912, "step": 8524 }, { "epoch": 1.3045141545524102, "grad_norm": 2.1364346441788316, "learning_rate": 5.70027668983984e-06, "loss": 0.3702, "step": 8525 }, { "epoch": 1.3046671767406273, "grad_norm": 2.1394636414100354, "learning_rate": 5.698039413990748e-06, "loss": 0.3959, "step": 8526 }, { "epoch": 1.3048201989288448, "grad_norm": 2.0727055307523643, "learning_rate": 5.6958024023418834e-06, "loss": 0.3547, "step": 8527 }, { "epoch": 1.304973221117062, "grad_norm": 2.044965766447203, "learning_rate": 5.6935656550306326e-06, "loss": 0.3701, "step": 8528 }, { "epoch": 1.3051262433052793, "grad_norm": 2.259607204648534, "learning_rate": 5.691329172194362e-06, "loss": 0.3761, "step": 8529 }, { "epoch": 1.3052792654934966, "grad_norm": 2.1227844121777326, "learning_rate": 5.689092953970421e-06, "loss": 0.4002, "step": 8530 }, { "epoch": 1.3054322876817137, "grad_norm": 2.2885565622270008, "learning_rate": 5.6868570004961474e-06, "loss": 0.3782, "step": 8531 }, { "epoch": 1.3055853098699313, "grad_norm": 2.2684685389350303, "learning_rate": 5.684621311908859e-06, "loss": 0.3547, "step": 8532 }, { "epoch": 1.3057383320581484, "grad_norm": 2.2468869954835675, "learning_rate": 5.682385888345856e-06, "loss": 0.315, "step": 8533 }, { "epoch": 1.3058913542463657, "grad_norm": 2.2885821903709296, "learning_rate": 5.680150729944427e-06, "loss": 0.3492, "step": 8534 }, { "epoch": 1.306044376434583, "grad_norm": 2.2283745517249582, "learning_rate": 5.677915836841842e-06, "loss": 0.4227, "step": 8535 }, { "epoch": 1.3061973986228004, "grad_norm": 2.10231696253404, "learning_rate": 5.675681209175349e-06, "loss": 0.3635, "step": 8536 }, { "epoch": 1.3063504208110177, "grad_norm": 2.233717020395182, "learning_rate": 5.673446847082197e-06, "loss": 0.4036, "step": 8537 }, { "epoch": 1.3065034429992348, "grad_norm": 1.9310872009083184, "learning_rate": 5.671212750699597e-06, "loss": 0.2954, "step": 8538 }, { "epoch": 1.3066564651874522, "grad_norm": 2.030845760878608, "learning_rate": 5.6689789201647505e-06, "loss": 0.3745, "step": 8539 }, { "epoch": 1.3068094873756695, "grad_norm": 1.8189427337342248, "learning_rate": 5.66674535561486e-06, "loss": 0.2948, "step": 8540 }, { "epoch": 1.3069625095638868, "grad_norm": 2.0469441672632027, "learning_rate": 5.664512057187081e-06, "loss": 0.3323, "step": 8541 }, { "epoch": 1.3071155317521042, "grad_norm": 2.0924553585486403, "learning_rate": 5.662279025018582e-06, "loss": 0.407, "step": 8542 }, { "epoch": 1.3072685539403213, "grad_norm": 2.3896033125336005, "learning_rate": 5.660046259246501e-06, "loss": 0.4156, "step": 8543 }, { "epoch": 1.3074215761285386, "grad_norm": 2.1963052778718253, "learning_rate": 5.65781376000795e-06, "loss": 0.3667, "step": 8544 }, { "epoch": 1.307574598316756, "grad_norm": 2.2404897511730404, "learning_rate": 5.6555815274400464e-06, "loss": 0.3412, "step": 8545 }, { "epoch": 1.3077276205049733, "grad_norm": 1.976985343614817, "learning_rate": 5.65334956167988e-06, "loss": 0.3942, "step": 8546 }, { "epoch": 1.3078806426931906, "grad_norm": 2.260212423328555, "learning_rate": 5.651117862864512e-06, "loss": 0.3259, "step": 8547 }, { "epoch": 1.3080336648814077, "grad_norm": 2.164028887318763, "learning_rate": 5.6488864311310155e-06, "loss": 0.3614, "step": 8548 }, { "epoch": 1.308186687069625, "grad_norm": 2.30626799094876, "learning_rate": 5.646655266616423e-06, "loss": 0.4431, "step": 8549 }, { "epoch": 1.3083397092578424, "grad_norm": 1.9692214406859057, "learning_rate": 5.6444243694577615e-06, "loss": 0.4161, "step": 8550 }, { "epoch": 1.3084927314460597, "grad_norm": 2.1853722471880594, "learning_rate": 5.642193739792039e-06, "loss": 0.3402, "step": 8551 }, { "epoch": 1.308645753634277, "grad_norm": 2.0970827282400992, "learning_rate": 5.639963377756247e-06, "loss": 0.4279, "step": 8552 }, { "epoch": 1.3087987758224942, "grad_norm": 2.177870812573683, "learning_rate": 5.63773328348736e-06, "loss": 0.3626, "step": 8553 }, { "epoch": 1.3089517980107115, "grad_norm": 2.591283101525571, "learning_rate": 5.635503457122338e-06, "loss": 0.4168, "step": 8554 }, { "epoch": 1.3091048201989288, "grad_norm": 1.9694916240067648, "learning_rate": 5.633273898798122e-06, "loss": 0.3234, "step": 8555 }, { "epoch": 1.3092578423871462, "grad_norm": 2.290182102724067, "learning_rate": 5.631044608651639e-06, "loss": 0.3949, "step": 8556 }, { "epoch": 1.3094108645753635, "grad_norm": 2.276801072822145, "learning_rate": 5.628815586819797e-06, "loss": 0.3722, "step": 8557 }, { "epoch": 1.3095638867635806, "grad_norm": 2.1922935542275392, "learning_rate": 5.626586833439491e-06, "loss": 0.2876, "step": 8558 }, { "epoch": 1.309716908951798, "grad_norm": 2.125655685633201, "learning_rate": 5.6243583486475965e-06, "loss": 0.3184, "step": 8559 }, { "epoch": 1.3098699311400153, "grad_norm": 2.290520817699174, "learning_rate": 5.622130132580973e-06, "loss": 0.4141, "step": 8560 }, { "epoch": 1.3100229533282326, "grad_norm": 2.162194717394431, "learning_rate": 5.619902185376465e-06, "loss": 0.4075, "step": 8561 }, { "epoch": 1.31017597551645, "grad_norm": 2.300845062414474, "learning_rate": 5.617674507170899e-06, "loss": 0.3332, "step": 8562 }, { "epoch": 1.310328997704667, "grad_norm": 2.2539854405846516, "learning_rate": 5.615447098101083e-06, "loss": 0.3666, "step": 8563 }, { "epoch": 1.3104820198928846, "grad_norm": 2.0871045714190015, "learning_rate": 5.613219958303815e-06, "loss": 0.3525, "step": 8564 }, { "epoch": 1.3106350420811017, "grad_norm": 2.2476746690566896, "learning_rate": 5.6109930879158656e-06, "loss": 0.3941, "step": 8565 }, { "epoch": 1.310788064269319, "grad_norm": 2.111502301320883, "learning_rate": 5.60876648707401e-06, "loss": 0.3222, "step": 8566 }, { "epoch": 1.3109410864575364, "grad_norm": 2.217927916834475, "learning_rate": 5.606540155914978e-06, "loss": 0.3817, "step": 8567 }, { "epoch": 1.3110941086457537, "grad_norm": 2.1948887182227312, "learning_rate": 5.604314094575498e-06, "loss": 0.3711, "step": 8568 }, { "epoch": 1.311247130833971, "grad_norm": 2.376746522845046, "learning_rate": 5.602088303192295e-06, "loss": 0.4375, "step": 8569 }, { "epoch": 1.3114001530221882, "grad_norm": 2.0035516123023993, "learning_rate": 5.599862781902049e-06, "loss": 0.3363, "step": 8570 }, { "epoch": 1.3115531752104055, "grad_norm": 2.0738010508442755, "learning_rate": 5.59763753084144e-06, "loss": 0.3137, "step": 8571 }, { "epoch": 1.3117061973986228, "grad_norm": 2.0745210501379217, "learning_rate": 5.595412550147139e-06, "loss": 0.3249, "step": 8572 }, { "epoch": 1.3118592195868402, "grad_norm": 2.267526336468913, "learning_rate": 5.593187839955776e-06, "loss": 0.3783, "step": 8573 }, { "epoch": 1.3120122417750575, "grad_norm": 2.3694203084709935, "learning_rate": 5.590963400403993e-06, "loss": 0.3513, "step": 8574 }, { "epoch": 1.3121652639632746, "grad_norm": 2.012157483197845, "learning_rate": 5.588739231628397e-06, "loss": 0.2914, "step": 8575 }, { "epoch": 1.312318286151492, "grad_norm": 2.0659451699838502, "learning_rate": 5.586515333765577e-06, "loss": 0.3701, "step": 8576 }, { "epoch": 1.3124713083397093, "grad_norm": 1.921731703819784, "learning_rate": 5.584291706952119e-06, "loss": 0.2673, "step": 8577 }, { "epoch": 1.3126243305279266, "grad_norm": 2.1435100630609893, "learning_rate": 5.58206835132458e-06, "loss": 0.3065, "step": 8578 }, { "epoch": 1.312777352716144, "grad_norm": 2.178374984629396, "learning_rate": 5.579845267019508e-06, "loss": 0.3687, "step": 8579 }, { "epoch": 1.312930374904361, "grad_norm": 2.4781271076321207, "learning_rate": 5.577622454173429e-06, "loss": 0.4722, "step": 8580 }, { "epoch": 1.3130833970925784, "grad_norm": 2.085509746046495, "learning_rate": 5.5753999129228565e-06, "loss": 0.3132, "step": 8581 }, { "epoch": 1.3132364192807957, "grad_norm": 1.9262893935758099, "learning_rate": 5.5731776434042835e-06, "loss": 0.3986, "step": 8582 }, { "epoch": 1.313389441469013, "grad_norm": 1.9838375146009155, "learning_rate": 5.57095564575419e-06, "loss": 0.2953, "step": 8583 }, { "epoch": 1.3135424636572304, "grad_norm": 2.10472370597914, "learning_rate": 5.568733920109038e-06, "loss": 0.3327, "step": 8584 }, { "epoch": 1.3136954858454475, "grad_norm": 1.9445119541733116, "learning_rate": 5.5665124666052695e-06, "loss": 0.3065, "step": 8585 }, { "epoch": 1.3138485080336648, "grad_norm": 2.603442112632636, "learning_rate": 5.564291285379314e-06, "loss": 0.3505, "step": 8586 }, { "epoch": 1.3140015302218822, "grad_norm": 2.0445384664789885, "learning_rate": 5.562070376567585e-06, "loss": 0.3704, "step": 8587 }, { "epoch": 1.3141545524100995, "grad_norm": 2.1157344605778827, "learning_rate": 5.559849740306474e-06, "loss": 0.3614, "step": 8588 }, { "epoch": 1.3143075745983168, "grad_norm": 2.13504060924164, "learning_rate": 5.5576293767323606e-06, "loss": 0.3657, "step": 8589 }, { "epoch": 1.314460596786534, "grad_norm": 2.289321737350508, "learning_rate": 5.555409285981605e-06, "loss": 0.3704, "step": 8590 }, { "epoch": 1.3146136189747513, "grad_norm": 2.1213736767351055, "learning_rate": 5.553189468190554e-06, "loss": 0.3293, "step": 8591 }, { "epoch": 1.3147666411629686, "grad_norm": 2.2923070600542075, "learning_rate": 5.550969923495533e-06, "loss": 0.3757, "step": 8592 }, { "epoch": 1.314919663351186, "grad_norm": 1.9832544875286664, "learning_rate": 5.548750652032855e-06, "loss": 0.2989, "step": 8593 }, { "epoch": 1.3150726855394033, "grad_norm": 2.1527441947882116, "learning_rate": 5.546531653938807e-06, "loss": 0.3381, "step": 8594 }, { "epoch": 1.3152257077276204, "grad_norm": 2.212591146712386, "learning_rate": 5.5443129293496825e-06, "loss": 0.3609, "step": 8595 }, { "epoch": 1.315378729915838, "grad_norm": 2.2950267556502757, "learning_rate": 5.542094478401725e-06, "loss": 0.4125, "step": 8596 }, { "epoch": 1.315531752104055, "grad_norm": 2.0985948379512593, "learning_rate": 5.539876301231182e-06, "loss": 0.3706, "step": 8597 }, { "epoch": 1.3156847742922724, "grad_norm": 2.167223206670299, "learning_rate": 5.537658397974293e-06, "loss": 0.3895, "step": 8598 }, { "epoch": 1.3158377964804897, "grad_norm": 2.4820509629216074, "learning_rate": 5.535440768767253e-06, "loss": 0.3807, "step": 8599 }, { "epoch": 1.315990818668707, "grad_norm": 2.2159112092687896, "learning_rate": 5.533223413746256e-06, "loss": 0.3477, "step": 8600 }, { "epoch": 1.3161438408569244, "grad_norm": 2.2364380030208326, "learning_rate": 5.531006333047493e-06, "loss": 0.3696, "step": 8601 }, { "epoch": 1.3162968630451415, "grad_norm": 1.829042244844607, "learning_rate": 5.528789526807105e-06, "loss": 0.2988, "step": 8602 }, { "epoch": 1.3164498852333588, "grad_norm": 2.061590745235742, "learning_rate": 5.526572995161248e-06, "loss": 0.3311, "step": 8603 }, { "epoch": 1.3166029074215762, "grad_norm": 2.133150535391964, "learning_rate": 5.524356738246048e-06, "loss": 0.3052, "step": 8604 }, { "epoch": 1.3167559296097935, "grad_norm": 2.0168388184124106, "learning_rate": 5.5221407561976e-06, "loss": 0.3656, "step": 8605 }, { "epoch": 1.3169089517980108, "grad_norm": 2.2695540004967096, "learning_rate": 5.519925049152011e-06, "loss": 0.3959, "step": 8606 }, { "epoch": 1.317061973986228, "grad_norm": 2.305770870691185, "learning_rate": 5.51770961724535e-06, "loss": 0.4459, "step": 8607 }, { "epoch": 1.3172149961744453, "grad_norm": 2.2371126516169797, "learning_rate": 5.515494460613678e-06, "loss": 0.3856, "step": 8608 }, { "epoch": 1.3173680183626626, "grad_norm": 2.2476766777179322, "learning_rate": 5.513279579393034e-06, "loss": 0.3237, "step": 8609 }, { "epoch": 1.31752104055088, "grad_norm": 2.010718411909031, "learning_rate": 5.511064973719442e-06, "loss": 0.4544, "step": 8610 }, { "epoch": 1.3176740627390973, "grad_norm": 1.8265150494873308, "learning_rate": 5.508850643728911e-06, "loss": 0.2927, "step": 8611 }, { "epoch": 1.3178270849273144, "grad_norm": 2.1922469791566797, "learning_rate": 5.506636589557433e-06, "loss": 0.3636, "step": 8612 }, { "epoch": 1.3179801071155317, "grad_norm": 1.9522370911195468, "learning_rate": 5.504422811340977e-06, "loss": 0.345, "step": 8613 }, { "epoch": 1.318133129303749, "grad_norm": 1.8985775896312533, "learning_rate": 5.502209309215505e-06, "loss": 0.3437, "step": 8614 }, { "epoch": 1.3182861514919664, "grad_norm": 2.531908097820924, "learning_rate": 5.499996083316952e-06, "loss": 0.3961, "step": 8615 }, { "epoch": 1.3184391736801837, "grad_norm": 2.300599284977995, "learning_rate": 5.49778313378125e-06, "loss": 0.3706, "step": 8616 }, { "epoch": 1.3185921958684008, "grad_norm": 1.9660351647994214, "learning_rate": 5.495570460744295e-06, "loss": 0.3349, "step": 8617 }, { "epoch": 1.3187452180566182, "grad_norm": 2.320828623988891, "learning_rate": 5.4933580643419744e-06, "loss": 0.3809, "step": 8618 }, { "epoch": 1.3188982402448355, "grad_norm": 2.2384342613355557, "learning_rate": 5.491145944710174e-06, "loss": 0.3401, "step": 8619 }, { "epoch": 1.3190512624330528, "grad_norm": 1.8784193029515464, "learning_rate": 5.488934101984737e-06, "loss": 0.3359, "step": 8620 }, { "epoch": 1.3192042846212702, "grad_norm": 2.22885101285974, "learning_rate": 5.486722536301499e-06, "loss": 0.3814, "step": 8621 }, { "epoch": 1.3193573068094873, "grad_norm": 2.518679488884114, "learning_rate": 5.4845112477962936e-06, "loss": 0.4485, "step": 8622 }, { "epoch": 1.3195103289977046, "grad_norm": 2.2918568209225616, "learning_rate": 5.482300236604909e-06, "loss": 0.3204, "step": 8623 }, { "epoch": 1.319663351185922, "grad_norm": 2.483464519209941, "learning_rate": 5.4800895028631466e-06, "loss": 0.3592, "step": 8624 }, { "epoch": 1.3198163733741393, "grad_norm": 2.2598966067125104, "learning_rate": 5.477879046706771e-06, "loss": 0.373, "step": 8625 }, { "epoch": 1.3199693955623566, "grad_norm": 2.162251025832532, "learning_rate": 5.475668868271527e-06, "loss": 0.3023, "step": 8626 }, { "epoch": 1.3201224177505737, "grad_norm": 2.055830955710422, "learning_rate": 5.473458967693162e-06, "loss": 0.3665, "step": 8627 }, { "epoch": 1.3202754399387913, "grad_norm": 2.3676870900519495, "learning_rate": 5.471249345107392e-06, "loss": 0.3866, "step": 8628 }, { "epoch": 1.3204284621270084, "grad_norm": 2.2301256287820377, "learning_rate": 5.469040000649909e-06, "loss": 0.45, "step": 8629 }, { "epoch": 1.3205814843152257, "grad_norm": 2.250482208887682, "learning_rate": 5.466830934456409e-06, "loss": 0.2956, "step": 8630 }, { "epoch": 1.320734506503443, "grad_norm": 1.9602055867205517, "learning_rate": 5.464622146662555e-06, "loss": 0.3221, "step": 8631 }, { "epoch": 1.3208875286916604, "grad_norm": 1.991138809691683, "learning_rate": 5.462413637403997e-06, "loss": 0.3425, "step": 8632 }, { "epoch": 1.3210405508798777, "grad_norm": 2.265706180431464, "learning_rate": 5.4602054068163705e-06, "loss": 0.3718, "step": 8633 }, { "epoch": 1.3211935730680948, "grad_norm": 2.437289055001441, "learning_rate": 5.457997455035286e-06, "loss": 0.3938, "step": 8634 }, { "epoch": 1.3213465952563122, "grad_norm": 2.093157026239112, "learning_rate": 5.45578978219635e-06, "loss": 0.3366, "step": 8635 }, { "epoch": 1.3214996174445295, "grad_norm": 2.113182376884977, "learning_rate": 5.453582388435139e-06, "loss": 0.3187, "step": 8636 }, { "epoch": 1.3216526396327468, "grad_norm": 2.3073790380831802, "learning_rate": 5.451375273887219e-06, "loss": 0.3507, "step": 8637 }, { "epoch": 1.3218056618209642, "grad_norm": 2.0134841418393075, "learning_rate": 5.449168438688138e-06, "loss": 0.3791, "step": 8638 }, { "epoch": 1.3219586840091813, "grad_norm": 2.1274435392661935, "learning_rate": 5.446961882973427e-06, "loss": 0.3967, "step": 8639 }, { "epoch": 1.3221117061973986, "grad_norm": 2.2861859533824056, "learning_rate": 5.444755606878598e-06, "loss": 0.3898, "step": 8640 }, { "epoch": 1.322264728385616, "grad_norm": 2.4796974235976386, "learning_rate": 5.442549610539146e-06, "loss": 0.428, "step": 8641 }, { "epoch": 1.3224177505738333, "grad_norm": 2.338041372087975, "learning_rate": 5.440343894090552e-06, "loss": 0.3597, "step": 8642 }, { "epoch": 1.3225707727620506, "grad_norm": 2.2580701341379497, "learning_rate": 5.438138457668277e-06, "loss": 0.4203, "step": 8643 }, { "epoch": 1.3227237949502677, "grad_norm": 2.1502655499210204, "learning_rate": 5.435933301407765e-06, "loss": 0.2846, "step": 8644 }, { "epoch": 1.322876817138485, "grad_norm": 2.3728101853670616, "learning_rate": 5.433728425444444e-06, "loss": 0.3727, "step": 8645 }, { "epoch": 1.3230298393267024, "grad_norm": 2.45813606253095, "learning_rate": 5.431523829913725e-06, "loss": 0.4305, "step": 8646 }, { "epoch": 1.3231828615149197, "grad_norm": 1.9190440302219542, "learning_rate": 5.4293195149509945e-06, "loss": 0.3219, "step": 8647 }, { "epoch": 1.323335883703137, "grad_norm": 2.0409914653263064, "learning_rate": 5.427115480691642e-06, "loss": 0.33, "step": 8648 }, { "epoch": 1.3234889058913542, "grad_norm": 2.264838190331384, "learning_rate": 5.424911727271012e-06, "loss": 0.3573, "step": 8649 }, { "epoch": 1.3236419280795715, "grad_norm": 2.1410706263548462, "learning_rate": 5.422708254824447e-06, "loss": 0.3522, "step": 8650 }, { "epoch": 1.3237949502677888, "grad_norm": 2.4540891731226884, "learning_rate": 5.420505063487282e-06, "loss": 0.4584, "step": 8651 }, { "epoch": 1.3239479724560062, "grad_norm": 2.248809644345026, "learning_rate": 5.418302153394809e-06, "loss": 0.2998, "step": 8652 }, { "epoch": 1.3241009946442235, "grad_norm": 2.0834229656833516, "learning_rate": 5.4160995246823275e-06, "loss": 0.3484, "step": 8653 }, { "epoch": 1.3242540168324406, "grad_norm": 2.017276455572411, "learning_rate": 5.413897177485112e-06, "loss": 0.3551, "step": 8654 }, { "epoch": 1.324407039020658, "grad_norm": 2.1939725955162674, "learning_rate": 5.411695111938402e-06, "loss": 0.3737, "step": 8655 }, { "epoch": 1.3245600612088753, "grad_norm": 2.5776750204395222, "learning_rate": 5.409493328177451e-06, "loss": 0.3604, "step": 8656 }, { "epoch": 1.3247130833970926, "grad_norm": 2.1881840047583956, "learning_rate": 5.407291826337475e-06, "loss": 0.3046, "step": 8657 }, { "epoch": 1.32486610558531, "grad_norm": 2.282370930865902, "learning_rate": 5.405090606553667e-06, "loss": 0.3554, "step": 8658 }, { "epoch": 1.325019127773527, "grad_norm": 2.366658092343715, "learning_rate": 5.402889668961225e-06, "loss": 0.4061, "step": 8659 }, { "epoch": 1.3251721499617444, "grad_norm": 2.232569419388663, "learning_rate": 5.400689013695311e-06, "loss": 0.3978, "step": 8660 }, { "epoch": 1.3253251721499617, "grad_norm": 2.129469851628214, "learning_rate": 5.398488640891079e-06, "loss": 0.3334, "step": 8661 }, { "epoch": 1.325478194338179, "grad_norm": 2.4309004407524863, "learning_rate": 5.3962885506836594e-06, "loss": 0.415, "step": 8662 }, { "epoch": 1.3256312165263964, "grad_norm": 2.0085523161120835, "learning_rate": 5.39408874320817e-06, "loss": 0.3572, "step": 8663 }, { "epoch": 1.3257842387146135, "grad_norm": 2.117619652748608, "learning_rate": 5.391889218599711e-06, "loss": 0.3793, "step": 8664 }, { "epoch": 1.325937260902831, "grad_norm": 2.451783913784382, "learning_rate": 5.38968997699336e-06, "loss": 0.4311, "step": 8665 }, { "epoch": 1.3260902830910481, "grad_norm": 2.1107048901080963, "learning_rate": 5.387491018524184e-06, "loss": 0.3542, "step": 8666 }, { "epoch": 1.3262433052792655, "grad_norm": 2.220269332344632, "learning_rate": 5.385292343327229e-06, "loss": 0.3762, "step": 8667 }, { "epoch": 1.3263963274674828, "grad_norm": 2.3319220579245328, "learning_rate": 5.383093951537524e-06, "loss": 0.4135, "step": 8668 }, { "epoch": 1.3265493496557001, "grad_norm": 2.1507183519468236, "learning_rate": 5.380895843290079e-06, "loss": 0.3431, "step": 8669 }, { "epoch": 1.3267023718439175, "grad_norm": 2.010715555568859, "learning_rate": 5.3786980187198925e-06, "loss": 0.3584, "step": 8670 }, { "epoch": 1.3268553940321346, "grad_norm": 2.335760014272626, "learning_rate": 5.376500477961939e-06, "loss": 0.3866, "step": 8671 }, { "epoch": 1.327008416220352, "grad_norm": 2.3548091082451705, "learning_rate": 5.374303221151178e-06, "loss": 0.4641, "step": 8672 }, { "epoch": 1.3271614384085693, "grad_norm": 2.077535323157751, "learning_rate": 5.372106248422551e-06, "loss": 0.3462, "step": 8673 }, { "epoch": 1.3273144605967866, "grad_norm": 2.150316112354047, "learning_rate": 5.369909559910985e-06, "loss": 0.3316, "step": 8674 }, { "epoch": 1.327467482785004, "grad_norm": 2.0408943761398035, "learning_rate": 5.367713155751386e-06, "loss": 0.2938, "step": 8675 }, { "epoch": 1.327620504973221, "grad_norm": 2.0607758587739786, "learning_rate": 5.36551703607864e-06, "loss": 0.3099, "step": 8676 }, { "epoch": 1.3277735271614384, "grad_norm": 2.173805194803525, "learning_rate": 5.3633212010276295e-06, "loss": 0.3144, "step": 8677 }, { "epoch": 1.3279265493496557, "grad_norm": 2.1294183601816563, "learning_rate": 5.3611256507332e-06, "loss": 0.3466, "step": 8678 }, { "epoch": 1.328079571537873, "grad_norm": 2.0559929103742736, "learning_rate": 5.358930385330188e-06, "loss": 0.3814, "step": 8679 }, { "epoch": 1.3282325937260904, "grad_norm": 2.164937780009964, "learning_rate": 5.356735404953424e-06, "loss": 0.336, "step": 8680 }, { "epoch": 1.3283856159143075, "grad_norm": 2.0680556793090505, "learning_rate": 5.354540709737696e-06, "loss": 0.3517, "step": 8681 }, { "epoch": 1.3285386381025248, "grad_norm": 2.2848427066316033, "learning_rate": 5.352346299817799e-06, "loss": 0.3799, "step": 8682 }, { "epoch": 1.3286916602907421, "grad_norm": 1.862547574716012, "learning_rate": 5.350152175328502e-06, "loss": 0.3432, "step": 8683 }, { "epoch": 1.3288446824789595, "grad_norm": 2.1285348894501404, "learning_rate": 5.347958336404544e-06, "loss": 0.3473, "step": 8684 }, { "epoch": 1.3289977046671768, "grad_norm": 2.15008899321762, "learning_rate": 5.345764783180665e-06, "loss": 0.356, "step": 8685 }, { "epoch": 1.329150726855394, "grad_norm": 2.3855541790397847, "learning_rate": 5.343571515791584e-06, "loss": 0.4541, "step": 8686 }, { "epoch": 1.3293037490436113, "grad_norm": 2.0715480983178383, "learning_rate": 5.3413785343719835e-06, "loss": 0.3749, "step": 8687 }, { "epoch": 1.3294567712318286, "grad_norm": 2.2500551363113463, "learning_rate": 5.339185839056555e-06, "loss": 0.3332, "step": 8688 }, { "epoch": 1.329609793420046, "grad_norm": 2.391261775337038, "learning_rate": 5.336993429979958e-06, "loss": 0.4095, "step": 8689 }, { "epoch": 1.3297628156082633, "grad_norm": 2.1836965124937944, "learning_rate": 5.3348013072768365e-06, "loss": 0.3513, "step": 8690 }, { "epoch": 1.3299158377964804, "grad_norm": 2.3000977897605535, "learning_rate": 5.332609471081818e-06, "loss": 0.4085, "step": 8691 }, { "epoch": 1.3300688599846977, "grad_norm": 1.9898087771318698, "learning_rate": 5.330417921529509e-06, "loss": 0.3931, "step": 8692 }, { "epoch": 1.330221882172915, "grad_norm": 2.323604812423909, "learning_rate": 5.328226658754503e-06, "loss": 0.3886, "step": 8693 }, { "epoch": 1.3303749043611324, "grad_norm": 1.9935416913349375, "learning_rate": 5.326035682891375e-06, "loss": 0.2802, "step": 8694 }, { "epoch": 1.3305279265493497, "grad_norm": 2.0321753727504985, "learning_rate": 5.3238449940746805e-06, "loss": 0.3618, "step": 8695 }, { "epoch": 1.3306809487375668, "grad_norm": 2.543698718100647, "learning_rate": 5.321654592438958e-06, "loss": 0.3664, "step": 8696 }, { "epoch": 1.3308339709257844, "grad_norm": 2.321863025318773, "learning_rate": 5.319464478118723e-06, "loss": 0.3786, "step": 8697 }, { "epoch": 1.3309869931140015, "grad_norm": 2.1807540364309705, "learning_rate": 5.317274651248495e-06, "loss": 0.3745, "step": 8698 }, { "epoch": 1.3311400153022188, "grad_norm": 1.9410874369549784, "learning_rate": 5.3150851119627445e-06, "loss": 0.2741, "step": 8699 }, { "epoch": 1.3312930374904361, "grad_norm": 2.2986422088732272, "learning_rate": 5.3128958603959415e-06, "loss": 0.3781, "step": 8700 }, { "epoch": 1.3314460596786535, "grad_norm": 1.8028486611526706, "learning_rate": 5.310706896682547e-06, "loss": 0.3409, "step": 8701 }, { "epoch": 1.3315990818668708, "grad_norm": 1.9787947600298408, "learning_rate": 5.308518220956983e-06, "loss": 0.2612, "step": 8702 }, { "epoch": 1.331752104055088, "grad_norm": 1.9880430933707562, "learning_rate": 5.306329833353664e-06, "loss": 0.3417, "step": 8703 }, { "epoch": 1.3319051262433053, "grad_norm": 2.2725659984139157, "learning_rate": 5.3041417340070005e-06, "loss": 0.3574, "step": 8704 }, { "epoch": 1.3320581484315226, "grad_norm": 2.1827794106466465, "learning_rate": 5.301953923051354e-06, "loss": 0.4218, "step": 8705 }, { "epoch": 1.33221117061974, "grad_norm": 2.2996389614314876, "learning_rate": 5.2997664006211e-06, "loss": 0.3736, "step": 8706 }, { "epoch": 1.3323641928079573, "grad_norm": 2.250697167924539, "learning_rate": 5.297579166850584e-06, "loss": 0.3826, "step": 8707 }, { "epoch": 1.3325172149961744, "grad_norm": 2.0961534983528147, "learning_rate": 5.295392221874118e-06, "loss": 0.4577, "step": 8708 }, { "epoch": 1.3326702371843917, "grad_norm": 2.178073703599122, "learning_rate": 5.293205565826025e-06, "loss": 0.3955, "step": 8709 }, { "epoch": 1.332823259372609, "grad_norm": 2.582183841226845, "learning_rate": 5.2910191988405966e-06, "loss": 0.429, "step": 8710 }, { "epoch": 1.3329762815608264, "grad_norm": 2.6280302076259265, "learning_rate": 5.28883312105209e-06, "loss": 0.4491, "step": 8711 }, { "epoch": 1.3331293037490437, "grad_norm": 2.4395766994785677, "learning_rate": 5.286647332594779e-06, "loss": 0.4561, "step": 8712 }, { "epoch": 1.3332823259372608, "grad_norm": 2.257750469790304, "learning_rate": 5.284461833602892e-06, "loss": 0.3722, "step": 8713 }, { "epoch": 1.3334353481254781, "grad_norm": 2.4393338162992078, "learning_rate": 5.2822766242106505e-06, "loss": 0.3524, "step": 8714 }, { "epoch": 1.3335883703136955, "grad_norm": 2.0470819190945777, "learning_rate": 5.280091704552257e-06, "loss": 0.3516, "step": 8715 }, { "epoch": 1.3337413925019128, "grad_norm": 1.8879596223256987, "learning_rate": 5.2779070747618985e-06, "loss": 0.3157, "step": 8716 }, { "epoch": 1.3338944146901301, "grad_norm": 2.1578376231813072, "learning_rate": 5.275722734973739e-06, "loss": 0.3593, "step": 8717 }, { "epoch": 1.3340474368783473, "grad_norm": 2.3849493596618565, "learning_rate": 5.273538685321926e-06, "loss": 0.3537, "step": 8718 }, { "epoch": 1.3342004590665646, "grad_norm": 2.0641129751115175, "learning_rate": 5.2713549259405936e-06, "loss": 0.3433, "step": 8719 }, { "epoch": 1.334353481254782, "grad_norm": 2.0207189663188747, "learning_rate": 5.269171456963853e-06, "loss": 0.504, "step": 8720 }, { "epoch": 1.3345065034429993, "grad_norm": 1.9309222928308578, "learning_rate": 5.266988278525802e-06, "loss": 0.3243, "step": 8721 }, { "epoch": 1.3346595256312166, "grad_norm": 2.417786904966103, "learning_rate": 5.264805390760515e-06, "loss": 0.3591, "step": 8722 }, { "epoch": 1.3348125478194337, "grad_norm": 2.0559938066081114, "learning_rate": 5.262622793802055e-06, "loss": 0.325, "step": 8723 }, { "epoch": 1.334965570007651, "grad_norm": 2.154604476772168, "learning_rate": 5.26044048778446e-06, "loss": 0.3335, "step": 8724 }, { "epoch": 1.3351185921958684, "grad_norm": 2.0975545420310717, "learning_rate": 5.258258472841755e-06, "loss": 0.4099, "step": 8725 }, { "epoch": 1.3352716143840857, "grad_norm": 1.898966736601991, "learning_rate": 5.256076749107946e-06, "loss": 0.3336, "step": 8726 }, { "epoch": 1.335424636572303, "grad_norm": 2.126852100718994, "learning_rate": 5.253895316717028e-06, "loss": 0.3528, "step": 8727 }, { "epoch": 1.3355776587605201, "grad_norm": 2.0082127986354785, "learning_rate": 5.251714175802962e-06, "loss": 0.349, "step": 8728 }, { "epoch": 1.3357306809487377, "grad_norm": 2.424781858285472, "learning_rate": 5.249533326499701e-06, "loss": 0.3659, "step": 8729 }, { "epoch": 1.3358837031369548, "grad_norm": 2.3398283834939924, "learning_rate": 5.247352768941189e-06, "loss": 0.3618, "step": 8730 }, { "epoch": 1.3360367253251721, "grad_norm": 2.0472105115040256, "learning_rate": 5.245172503261331e-06, "loss": 0.3646, "step": 8731 }, { "epoch": 1.3361897475133895, "grad_norm": 2.223173368949995, "learning_rate": 5.242992529594028e-06, "loss": 0.3406, "step": 8732 }, { "epoch": 1.3363427697016068, "grad_norm": 2.12524462674687, "learning_rate": 5.240812848073172e-06, "loss": 0.3169, "step": 8733 }, { "epoch": 1.3364957918898241, "grad_norm": 2.285717102884861, "learning_rate": 5.238633458832607e-06, "loss": 0.3333, "step": 8734 }, { "epoch": 1.3366488140780413, "grad_norm": 2.1594353097887504, "learning_rate": 5.2364543620061935e-06, "loss": 0.3743, "step": 8735 }, { "epoch": 1.3368018362662586, "grad_norm": 2.1247874494512238, "learning_rate": 5.234275557727756e-06, "loss": 0.407, "step": 8736 }, { "epoch": 1.336954858454476, "grad_norm": 2.2331924409935633, "learning_rate": 5.232097046131093e-06, "loss": 0.3355, "step": 8737 }, { "epoch": 1.3371078806426933, "grad_norm": 2.035105727000153, "learning_rate": 5.2299188273500045e-06, "loss": 0.3884, "step": 8738 }, { "epoch": 1.3372609028309106, "grad_norm": 2.419555581257721, "learning_rate": 5.227740901518268e-06, "loss": 0.3536, "step": 8739 }, { "epoch": 1.3374139250191277, "grad_norm": 2.3728108043849385, "learning_rate": 5.225563268769622e-06, "loss": 0.3485, "step": 8740 }, { "epoch": 1.337566947207345, "grad_norm": 1.9635099062925558, "learning_rate": 5.223385929237819e-06, "loss": 0.3223, "step": 8741 }, { "epoch": 1.3377199693955624, "grad_norm": 2.0871070571533488, "learning_rate": 5.221208883056571e-06, "loss": 0.3663, "step": 8742 }, { "epoch": 1.3378729915837797, "grad_norm": 2.1517294117700234, "learning_rate": 5.2190321303595805e-06, "loss": 0.3299, "step": 8743 }, { "epoch": 1.338026013771997, "grad_norm": 2.2349571562423094, "learning_rate": 5.2168556712805295e-06, "loss": 0.3746, "step": 8744 }, { "epoch": 1.3381790359602141, "grad_norm": 2.1111718913928983, "learning_rate": 5.214679505953084e-06, "loss": 0.355, "step": 8745 }, { "epoch": 1.3383320581484315, "grad_norm": 2.071140771626689, "learning_rate": 5.212503634510891e-06, "loss": 0.319, "step": 8746 }, { "epoch": 1.3384850803366488, "grad_norm": 2.1976321183634453, "learning_rate": 5.210328057087579e-06, "loss": 0.327, "step": 8747 }, { "epoch": 1.3386381025248661, "grad_norm": 2.3923131141546423, "learning_rate": 5.208152773816757e-06, "loss": 0.4028, "step": 8748 }, { "epoch": 1.3387911247130835, "grad_norm": 2.283782792526242, "learning_rate": 5.205977784832019e-06, "loss": 0.3377, "step": 8749 }, { "epoch": 1.3389441469013006, "grad_norm": 2.101349490871268, "learning_rate": 5.203803090266941e-06, "loss": 0.281, "step": 8750 }, { "epoch": 1.339097169089518, "grad_norm": 2.0532807907074253, "learning_rate": 5.201628690255076e-06, "loss": 0.3238, "step": 8751 }, { "epoch": 1.3392501912777353, "grad_norm": 2.362591506710098, "learning_rate": 5.199454584929966e-06, "loss": 0.4185, "step": 8752 }, { "epoch": 1.3394032134659526, "grad_norm": 1.8928761243937846, "learning_rate": 5.197280774425129e-06, "loss": 0.3364, "step": 8753 }, { "epoch": 1.33955623565417, "grad_norm": 2.198285759440423, "learning_rate": 5.195107258874068e-06, "loss": 0.3444, "step": 8754 }, { "epoch": 1.339709257842387, "grad_norm": 2.189265677914583, "learning_rate": 5.1929340384102645e-06, "loss": 0.3567, "step": 8755 }, { "epoch": 1.3398622800306044, "grad_norm": 1.9479819167709145, "learning_rate": 5.1907611131671935e-06, "loss": 0.3587, "step": 8756 }, { "epoch": 1.3400153022188217, "grad_norm": 2.002771514837011, "learning_rate": 5.188588483278293e-06, "loss": 0.3138, "step": 8757 }, { "epoch": 1.340168324407039, "grad_norm": 2.510943872707632, "learning_rate": 5.186416148876991e-06, "loss": 0.3849, "step": 8758 }, { "epoch": 1.3403213465952564, "grad_norm": 1.9571166172475, "learning_rate": 5.184244110096715e-06, "loss": 0.3232, "step": 8759 }, { "epoch": 1.3404743687834735, "grad_norm": 1.9050058458205525, "learning_rate": 5.182072367070841e-06, "loss": 0.3442, "step": 8760 }, { "epoch": 1.3406273909716908, "grad_norm": 2.4189439891658333, "learning_rate": 5.1799009199327465e-06, "loss": 0.4003, "step": 8761 }, { "epoch": 1.3407804131599081, "grad_norm": 2.0763029109194515, "learning_rate": 5.177729768815802e-06, "loss": 0.3211, "step": 8762 }, { "epoch": 1.3409334353481255, "grad_norm": 2.116101575691697, "learning_rate": 5.175558913853329e-06, "loss": 0.3508, "step": 8763 }, { "epoch": 1.3410864575363428, "grad_norm": 1.7887941597743418, "learning_rate": 5.17338835517866e-06, "loss": 0.3165, "step": 8764 }, { "epoch": 1.34123947972456, "grad_norm": 2.2457710693606088, "learning_rate": 5.171218092925099e-06, "loss": 0.3803, "step": 8765 }, { "epoch": 1.3413925019127775, "grad_norm": 2.0683172511693626, "learning_rate": 5.169048127225915e-06, "loss": 0.3428, "step": 8766 }, { "epoch": 1.3415455241009946, "grad_norm": 2.1635182390813905, "learning_rate": 5.166878458214391e-06, "loss": 0.3602, "step": 8767 }, { "epoch": 1.341698546289212, "grad_norm": 2.050582046768065, "learning_rate": 5.16470908602377e-06, "loss": 0.3622, "step": 8768 }, { "epoch": 1.3418515684774293, "grad_norm": 2.026326185696356, "learning_rate": 5.162540010787273e-06, "loss": 0.3138, "step": 8769 }, { "epoch": 1.3420045906656466, "grad_norm": 2.0367330156221497, "learning_rate": 5.160371232638122e-06, "loss": 0.303, "step": 8770 }, { "epoch": 1.342157612853864, "grad_norm": 2.1169309129042215, "learning_rate": 5.1582027517095065e-06, "loss": 0.3154, "step": 8771 }, { "epoch": 1.342310635042081, "grad_norm": 2.2530862240372493, "learning_rate": 5.156034568134601e-06, "loss": 0.3594, "step": 8772 }, { "epoch": 1.3424636572302984, "grad_norm": 2.040678051046319, "learning_rate": 5.153866682046562e-06, "loss": 0.3179, "step": 8773 }, { "epoch": 1.3426166794185157, "grad_norm": 1.9484942018819191, "learning_rate": 5.151699093578528e-06, "loss": 0.348, "step": 8774 }, { "epoch": 1.342769701606733, "grad_norm": 2.49836830449986, "learning_rate": 5.149531802863621e-06, "loss": 0.4379, "step": 8775 }, { "epoch": 1.3429227237949504, "grad_norm": 2.272462907141933, "learning_rate": 5.14736481003494e-06, "loss": 0.3124, "step": 8776 }, { "epoch": 1.3430757459831675, "grad_norm": 2.0838929898866057, "learning_rate": 5.1451981152255695e-06, "loss": 0.3627, "step": 8777 }, { "epoch": 1.3432287681713848, "grad_norm": 2.0977369640414794, "learning_rate": 5.143031718568575e-06, "loss": 0.463, "step": 8778 }, { "epoch": 1.3433817903596021, "grad_norm": 2.074263154199314, "learning_rate": 5.140865620197e-06, "loss": 0.3553, "step": 8779 }, { "epoch": 1.3435348125478195, "grad_norm": 2.3498251424537955, "learning_rate": 5.138699820243882e-06, "loss": 0.4366, "step": 8780 }, { "epoch": 1.3436878347360368, "grad_norm": 2.132767275315044, "learning_rate": 5.1365343188422235e-06, "loss": 0.3064, "step": 8781 }, { "epoch": 1.343840856924254, "grad_norm": 2.0608549554257265, "learning_rate": 5.134369116125015e-06, "loss": 0.3042, "step": 8782 }, { "epoch": 1.3439938791124713, "grad_norm": 2.0378925886824732, "learning_rate": 5.13220421222524e-06, "loss": 0.3163, "step": 8783 }, { "epoch": 1.3441469013006886, "grad_norm": 2.2278406207902672, "learning_rate": 5.130039607275844e-06, "loss": 0.3278, "step": 8784 }, { "epoch": 1.344299923488906, "grad_norm": 2.4507146833070106, "learning_rate": 5.127875301409764e-06, "loss": 0.3432, "step": 8785 }, { "epoch": 1.3444529456771233, "grad_norm": 2.048076636441864, "learning_rate": 5.125711294759927e-06, "loss": 0.2924, "step": 8786 }, { "epoch": 1.3446059678653404, "grad_norm": 2.2006190582319496, "learning_rate": 5.123547587459221e-06, "loss": 0.3902, "step": 8787 }, { "epoch": 1.3447589900535577, "grad_norm": 2.1200415996383155, "learning_rate": 5.121384179640539e-06, "loss": 0.4043, "step": 8788 }, { "epoch": 1.344912012241775, "grad_norm": 1.9167752593186782, "learning_rate": 5.119221071436744e-06, "loss": 0.3091, "step": 8789 }, { "epoch": 1.3450650344299924, "grad_norm": 1.7729997894919598, "learning_rate": 5.117058262980668e-06, "loss": 0.2764, "step": 8790 }, { "epoch": 1.3452180566182097, "grad_norm": 2.277714016209161, "learning_rate": 5.114895754405151e-06, "loss": 0.3934, "step": 8791 }, { "epoch": 1.3453710788064268, "grad_norm": 2.04919853608827, "learning_rate": 5.112733545842996e-06, "loss": 0.3216, "step": 8792 }, { "epoch": 1.3455241009946441, "grad_norm": 2.063088383531425, "learning_rate": 5.110571637426992e-06, "loss": 0.3876, "step": 8793 }, { "epoch": 1.3456771231828615, "grad_norm": 2.3164550683532608, "learning_rate": 5.108410029289912e-06, "loss": 0.368, "step": 8794 }, { "epoch": 1.3458301453710788, "grad_norm": 1.9412845305314341, "learning_rate": 5.106248721564509e-06, "loss": 0.3319, "step": 8795 }, { "epoch": 1.3459831675592961, "grad_norm": 2.0398505288619684, "learning_rate": 5.1040877143835154e-06, "loss": 0.3048, "step": 8796 }, { "epoch": 1.3461361897475133, "grad_norm": 1.9405074387765255, "learning_rate": 5.1019270078796476e-06, "loss": 0.335, "step": 8797 }, { "epoch": 1.3462892119357308, "grad_norm": 3.0361304228685215, "learning_rate": 5.099766602185605e-06, "loss": 0.3535, "step": 8798 }, { "epoch": 1.346442234123948, "grad_norm": 2.0142600613872514, "learning_rate": 5.097606497434064e-06, "loss": 0.3425, "step": 8799 }, { "epoch": 1.3465952563121653, "grad_norm": 2.250187533982632, "learning_rate": 5.095446693757688e-06, "loss": 0.37, "step": 8800 }, { "epoch": 1.3467482785003826, "grad_norm": 2.2286570301304756, "learning_rate": 5.093287191289116e-06, "loss": 0.4101, "step": 8801 }, { "epoch": 1.3469013006886, "grad_norm": 2.0538342836101133, "learning_rate": 5.091127990160973e-06, "loss": 0.3505, "step": 8802 }, { "epoch": 1.3470543228768173, "grad_norm": 2.2048180157371773, "learning_rate": 5.088969090505864e-06, "loss": 0.3738, "step": 8803 }, { "epoch": 1.3472073450650344, "grad_norm": 1.901363181601386, "learning_rate": 5.086810492456375e-06, "loss": 0.294, "step": 8804 }, { "epoch": 1.3473603672532517, "grad_norm": 2.07977293833161, "learning_rate": 5.084652196145074e-06, "loss": 0.3492, "step": 8805 }, { "epoch": 1.347513389441469, "grad_norm": 2.2163815188002967, "learning_rate": 5.082494201704511e-06, "loss": 0.2793, "step": 8806 }, { "epoch": 1.3476664116296864, "grad_norm": 2.412112333946498, "learning_rate": 5.0803365092672165e-06, "loss": 0.3754, "step": 8807 }, { "epoch": 1.3478194338179037, "grad_norm": 1.7349318109113925, "learning_rate": 5.078179118965698e-06, "loss": 0.2788, "step": 8808 }, { "epoch": 1.3479724560061208, "grad_norm": 1.9627355364540608, "learning_rate": 5.076022030932461e-06, "loss": 0.3577, "step": 8809 }, { "epoch": 1.3481254781943381, "grad_norm": 1.7114120202932948, "learning_rate": 5.0738652452999715e-06, "loss": 0.3282, "step": 8810 }, { "epoch": 1.3482785003825555, "grad_norm": 2.2111716488185675, "learning_rate": 5.0717087622006844e-06, "loss": 0.2957, "step": 8811 }, { "epoch": 1.3484315225707728, "grad_norm": 1.9715870922408556, "learning_rate": 5.06955258176705e-06, "loss": 0.3563, "step": 8812 }, { "epoch": 1.3485845447589901, "grad_norm": 2.060344670247845, "learning_rate": 5.0673967041314755e-06, "loss": 0.2971, "step": 8813 }, { "epoch": 1.3487375669472073, "grad_norm": 2.13181718352457, "learning_rate": 5.0652411294263615e-06, "loss": 0.3509, "step": 8814 }, { "epoch": 1.3488905891354246, "grad_norm": 2.3479993718903294, "learning_rate": 5.063085857784102e-06, "loss": 0.3717, "step": 8815 }, { "epoch": 1.349043611323642, "grad_norm": 2.4033272572666613, "learning_rate": 5.060930889337047e-06, "loss": 0.3488, "step": 8816 }, { "epoch": 1.3491966335118593, "grad_norm": 2.062225137838282, "learning_rate": 5.05877622421755e-06, "loss": 0.3394, "step": 8817 }, { "epoch": 1.3493496557000766, "grad_norm": 2.1399741737163778, "learning_rate": 5.05662186255794e-06, "loss": 0.346, "step": 8818 }, { "epoch": 1.3495026778882937, "grad_norm": 2.044146746709054, "learning_rate": 5.054467804490513e-06, "loss": 0.3237, "step": 8819 }, { "epoch": 1.349655700076511, "grad_norm": 1.9006878517739, "learning_rate": 5.052314050147567e-06, "loss": 0.3543, "step": 8820 }, { "epoch": 1.3498087222647284, "grad_norm": 1.928889322164962, "learning_rate": 5.050160599661374e-06, "loss": 0.349, "step": 8821 }, { "epoch": 1.3499617444529457, "grad_norm": 2.402232435601587, "learning_rate": 5.048007453164178e-06, "loss": 0.3668, "step": 8822 }, { "epoch": 1.350114766641163, "grad_norm": 2.4871412589958264, "learning_rate": 5.04585461078822e-06, "loss": 0.3781, "step": 8823 }, { "epoch": 1.3502677888293801, "grad_norm": 2.106796208369143, "learning_rate": 5.043702072665711e-06, "loss": 0.3279, "step": 8824 }, { "epoch": 1.3504208110175975, "grad_norm": 2.365383560450284, "learning_rate": 5.041549838928845e-06, "loss": 0.4045, "step": 8825 }, { "epoch": 1.3505738332058148, "grad_norm": 2.451762479109529, "learning_rate": 5.039397909709802e-06, "loss": 0.3966, "step": 8826 }, { "epoch": 1.3507268553940321, "grad_norm": 1.8946555567238423, "learning_rate": 5.037246285140739e-06, "loss": 0.2784, "step": 8827 }, { "epoch": 1.3508798775822495, "grad_norm": 2.370556858656519, "learning_rate": 5.035094965353796e-06, "loss": 0.3991, "step": 8828 }, { "epoch": 1.3510328997704666, "grad_norm": 1.9665823637058073, "learning_rate": 5.032943950481094e-06, "loss": 0.3155, "step": 8829 }, { "epoch": 1.3511859219586841, "grad_norm": 2.1502421747517393, "learning_rate": 5.0307932406547355e-06, "loss": 0.3399, "step": 8830 }, { "epoch": 1.3513389441469013, "grad_norm": 1.9684009046771507, "learning_rate": 5.028642836006803e-06, "loss": 0.305, "step": 8831 }, { "epoch": 1.3514919663351186, "grad_norm": 2.3712114531183466, "learning_rate": 5.0264927366693635e-06, "loss": 0.3648, "step": 8832 }, { "epoch": 1.351644988523336, "grad_norm": 2.1471009526278237, "learning_rate": 5.0243429427744605e-06, "loss": 0.3872, "step": 8833 }, { "epoch": 1.3517980107115533, "grad_norm": 1.8739089991135973, "learning_rate": 5.0221934544541225e-06, "loss": 0.2828, "step": 8834 }, { "epoch": 1.3519510328997706, "grad_norm": 2.4082764083999817, "learning_rate": 5.020044271840358e-06, "loss": 0.3817, "step": 8835 }, { "epoch": 1.3521040550879877, "grad_norm": 2.155006057515205, "learning_rate": 5.017895395065156e-06, "loss": 0.2847, "step": 8836 }, { "epoch": 1.352257077276205, "grad_norm": 2.1032880782979957, "learning_rate": 5.0157468242604835e-06, "loss": 0.3277, "step": 8837 }, { "epoch": 1.3524100994644224, "grad_norm": 2.0750944376718614, "learning_rate": 5.013598559558306e-06, "loss": 0.3113, "step": 8838 }, { "epoch": 1.3525631216526397, "grad_norm": 2.1837175276655416, "learning_rate": 5.011450601090544e-06, "loss": 0.3165, "step": 8839 }, { "epoch": 1.352716143840857, "grad_norm": 2.101554636022105, "learning_rate": 5.009302948989111e-06, "loss": 0.3534, "step": 8840 }, { "epoch": 1.3528691660290741, "grad_norm": 2.259872323501473, "learning_rate": 5.007155603385916e-06, "loss": 0.3817, "step": 8841 }, { "epoch": 1.3530221882172915, "grad_norm": 2.1371804613305536, "learning_rate": 5.005008564412823e-06, "loss": 0.366, "step": 8842 }, { "epoch": 1.3531752104055088, "grad_norm": 1.783351213021392, "learning_rate": 5.002861832201691e-06, "loss": 0.2375, "step": 8843 }, { "epoch": 1.3533282325937261, "grad_norm": 2.4210398863460836, "learning_rate": 5.0007154068843714e-06, "loss": 0.4451, "step": 8844 }, { "epoch": 1.3534812547819435, "grad_norm": 2.109957799601956, "learning_rate": 4.9985692885926675e-06, "loss": 0.3213, "step": 8845 }, { "epoch": 1.3536342769701606, "grad_norm": 2.201835379953216, "learning_rate": 4.996423477458393e-06, "loss": 0.3522, "step": 8846 }, { "epoch": 1.353787299158378, "grad_norm": 2.2332666443797913, "learning_rate": 4.994277973613331e-06, "loss": 0.3499, "step": 8847 }, { "epoch": 1.3539403213465953, "grad_norm": 2.0828069249606247, "learning_rate": 4.992132777189234e-06, "loss": 0.3706, "step": 8848 }, { "epoch": 1.3540933435348126, "grad_norm": 2.067829402167768, "learning_rate": 4.9899878883178565e-06, "loss": 0.4055, "step": 8849 }, { "epoch": 1.35424636572303, "grad_norm": 2.234378046868571, "learning_rate": 4.987843307130923e-06, "loss": 0.318, "step": 8850 }, { "epoch": 1.354399387911247, "grad_norm": 2.2611687807693874, "learning_rate": 4.98569903376014e-06, "loss": 0.3289, "step": 8851 }, { "epoch": 1.3545524100994644, "grad_norm": 2.178487669182021, "learning_rate": 4.983555068337194e-06, "loss": 0.3294, "step": 8852 }, { "epoch": 1.3547054322876817, "grad_norm": 2.223404581120469, "learning_rate": 4.981411410993756e-06, "loss": 0.3945, "step": 8853 }, { "epoch": 1.354858454475899, "grad_norm": 1.9874431190206212, "learning_rate": 4.979268061861477e-06, "loss": 0.3015, "step": 8854 }, { "epoch": 1.3550114766641164, "grad_norm": 2.1072934649507506, "learning_rate": 4.977125021071988e-06, "loss": 0.3378, "step": 8855 }, { "epoch": 1.3551644988523335, "grad_norm": 2.341148964981265, "learning_rate": 4.9749822887569e-06, "loss": 0.3951, "step": 8856 }, { "epoch": 1.3553175210405508, "grad_norm": 2.208386708307971, "learning_rate": 4.9728398650478075e-06, "loss": 0.3591, "step": 8857 }, { "epoch": 1.3554705432287681, "grad_norm": 2.1163674385472375, "learning_rate": 4.970697750076282e-06, "loss": 0.3211, "step": 8858 }, { "epoch": 1.3556235654169855, "grad_norm": 1.960454860325222, "learning_rate": 4.96855594397389e-06, "loss": 0.304, "step": 8859 }, { "epoch": 1.3557765876052028, "grad_norm": 2.192707172753196, "learning_rate": 4.9664144468721565e-06, "loss": 0.3023, "step": 8860 }, { "epoch": 1.35592960979342, "grad_norm": 2.0167491244911315, "learning_rate": 4.9642732589025986e-06, "loss": 0.2747, "step": 8861 }, { "epoch": 1.3560826319816373, "grad_norm": 2.2142639849416836, "learning_rate": 4.962132380196729e-06, "loss": 0.3612, "step": 8862 }, { "epoch": 1.3562356541698546, "grad_norm": 2.2930940087859524, "learning_rate": 4.959991810886013e-06, "loss": 0.3922, "step": 8863 }, { "epoch": 1.356388676358072, "grad_norm": 2.0111407859260892, "learning_rate": 4.957851551101915e-06, "loss": 0.2614, "step": 8864 }, { "epoch": 1.3565416985462893, "grad_norm": 2.2532932543779105, "learning_rate": 4.955711600975883e-06, "loss": 0.3617, "step": 8865 }, { "epoch": 1.3566947207345064, "grad_norm": 2.3137796151354393, "learning_rate": 4.95357196063933e-06, "loss": 0.3794, "step": 8866 }, { "epoch": 1.356847742922724, "grad_norm": 2.261134143687513, "learning_rate": 4.9514326302236655e-06, "loss": 0.3776, "step": 8867 }, { "epoch": 1.357000765110941, "grad_norm": 2.071670886754076, "learning_rate": 4.94929360986028e-06, "loss": 0.3288, "step": 8868 }, { "epoch": 1.3571537872991584, "grad_norm": 2.1015927260956824, "learning_rate": 4.947154899680523e-06, "loss": 0.3423, "step": 8869 }, { "epoch": 1.3573068094873757, "grad_norm": 2.140147633865237, "learning_rate": 4.9450164998157545e-06, "loss": 0.339, "step": 8870 }, { "epoch": 1.357459831675593, "grad_norm": 2.014679294585402, "learning_rate": 4.9428784103973025e-06, "loss": 0.3075, "step": 8871 }, { "epoch": 1.3576128538638104, "grad_norm": 2.0835625273825706, "learning_rate": 4.940740631556462e-06, "loss": 0.3462, "step": 8872 }, { "epoch": 1.3577658760520275, "grad_norm": 2.182089133357909, "learning_rate": 4.9386031634245365e-06, "loss": 0.3325, "step": 8873 }, { "epoch": 1.3579188982402448, "grad_norm": 2.0004273732212234, "learning_rate": 4.936466006132791e-06, "loss": 0.3194, "step": 8874 }, { "epoch": 1.3580719204284621, "grad_norm": 2.1668334947043952, "learning_rate": 4.934329159812476e-06, "loss": 0.3887, "step": 8875 }, { "epoch": 1.3582249426166795, "grad_norm": 2.2245033231188946, "learning_rate": 4.932192624594824e-06, "loss": 0.3336, "step": 8876 }, { "epoch": 1.3583779648048968, "grad_norm": 2.4973226364697925, "learning_rate": 4.9300564006110486e-06, "loss": 0.4022, "step": 8877 }, { "epoch": 1.358530986993114, "grad_norm": 2.308538977320974, "learning_rate": 4.9279204879923425e-06, "loss": 0.3662, "step": 8878 }, { "epoch": 1.3586840091813313, "grad_norm": 2.2287780866163462, "learning_rate": 4.925784886869883e-06, "loss": 0.354, "step": 8879 }, { "epoch": 1.3588370313695486, "grad_norm": 2.26226499220998, "learning_rate": 4.923649597374823e-06, "loss": 0.3948, "step": 8880 }, { "epoch": 1.358990053557766, "grad_norm": 2.397765277999804, "learning_rate": 4.9215146196383e-06, "loss": 0.3911, "step": 8881 }, { "epoch": 1.3591430757459833, "grad_norm": 2.095255008366767, "learning_rate": 4.919379953791432e-06, "loss": 0.334, "step": 8882 }, { "epoch": 1.3592960979342004, "grad_norm": 2.3511037115621978, "learning_rate": 4.917245599965317e-06, "loss": 0.3601, "step": 8883 }, { "epoch": 1.3594491201224177, "grad_norm": 2.20916949291957, "learning_rate": 4.9151115582910324e-06, "loss": 0.357, "step": 8884 }, { "epoch": 1.359602142310635, "grad_norm": 2.2283270514852997, "learning_rate": 4.912977828899639e-06, "loss": 0.3641, "step": 8885 }, { "epoch": 1.3597551644988524, "grad_norm": 1.944639349996175, "learning_rate": 4.910844411922179e-06, "loss": 0.4083, "step": 8886 }, { "epoch": 1.3599081866870697, "grad_norm": 2.0157061926550712, "learning_rate": 4.908711307489671e-06, "loss": 0.3363, "step": 8887 }, { "epoch": 1.3600612088752868, "grad_norm": 1.9664016887617008, "learning_rate": 4.90657851573312e-06, "loss": 0.3352, "step": 8888 }, { "epoch": 1.3602142310635041, "grad_norm": 2.1423036098364703, "learning_rate": 4.904446036783508e-06, "loss": 0.3169, "step": 8889 }, { "epoch": 1.3603672532517215, "grad_norm": 2.1307687150076404, "learning_rate": 4.902313870771795e-06, "loss": 0.3136, "step": 8890 }, { "epoch": 1.3605202754399388, "grad_norm": 2.037116383219786, "learning_rate": 4.9001820178289365e-06, "loss": 0.3409, "step": 8891 }, { "epoch": 1.3606732976281561, "grad_norm": 2.04431583800202, "learning_rate": 4.898050478085846e-06, "loss": 0.3112, "step": 8892 }, { "epoch": 1.3608263198163733, "grad_norm": 2.1739991034642325, "learning_rate": 4.895919251673432e-06, "loss": 0.339, "step": 8893 }, { "epoch": 1.3609793420045906, "grad_norm": 1.9316321394254719, "learning_rate": 4.893788338722591e-06, "loss": 0.3547, "step": 8894 }, { "epoch": 1.361132364192808, "grad_norm": 1.8223272300386113, "learning_rate": 4.891657739364177e-06, "loss": 0.3305, "step": 8895 }, { "epoch": 1.3612853863810253, "grad_norm": 1.873250474892585, "learning_rate": 4.88952745372905e-06, "loss": 0.3082, "step": 8896 }, { "epoch": 1.3614384085692426, "grad_norm": 2.205921503536888, "learning_rate": 4.887397481948036e-06, "loss": 0.3402, "step": 8897 }, { "epoch": 1.3615914307574597, "grad_norm": 2.5016859094591055, "learning_rate": 4.885267824151937e-06, "loss": 0.3385, "step": 8898 }, { "epoch": 1.3617444529456773, "grad_norm": 2.085347385042572, "learning_rate": 4.883138480471553e-06, "loss": 0.3988, "step": 8899 }, { "epoch": 1.3618974751338944, "grad_norm": 2.2051244044589557, "learning_rate": 4.881009451037656e-06, "loss": 0.3533, "step": 8900 }, { "epoch": 1.3620504973221117, "grad_norm": 2.1863458371965683, "learning_rate": 4.878880735980986e-06, "loss": 0.3619, "step": 8901 }, { "epoch": 1.362203519510329, "grad_norm": 2.271622039133187, "learning_rate": 4.876752335432288e-06, "loss": 0.3355, "step": 8902 }, { "epoch": 1.3623565416985464, "grad_norm": 2.132277396146395, "learning_rate": 4.874624249522273e-06, "loss": 0.3664, "step": 8903 }, { "epoch": 1.3625095638867637, "grad_norm": 1.9125739210979713, "learning_rate": 4.872496478381632e-06, "loss": 0.2759, "step": 8904 }, { "epoch": 1.3626625860749808, "grad_norm": 2.2241444950777916, "learning_rate": 4.870369022141042e-06, "loss": 0.3585, "step": 8905 }, { "epoch": 1.3628156082631981, "grad_norm": 2.1451735908210527, "learning_rate": 4.868241880931157e-06, "loss": 0.3512, "step": 8906 }, { "epoch": 1.3629686304514155, "grad_norm": 1.9142230746245907, "learning_rate": 4.866115054882613e-06, "loss": 0.328, "step": 8907 }, { "epoch": 1.3631216526396328, "grad_norm": 2.368547829079924, "learning_rate": 4.863988544126028e-06, "loss": 0.3551, "step": 8908 }, { "epoch": 1.3632746748278501, "grad_norm": 2.04742148952542, "learning_rate": 4.861862348791999e-06, "loss": 0.3481, "step": 8909 }, { "epoch": 1.3634276970160673, "grad_norm": 2.1096845873170342, "learning_rate": 4.859736469011104e-06, "loss": 0.3453, "step": 8910 }, { "epoch": 1.3635807192042846, "grad_norm": 1.9345016136926325, "learning_rate": 4.8576109049139e-06, "loss": 0.3191, "step": 8911 }, { "epoch": 1.363733741392502, "grad_norm": 2.064452971674517, "learning_rate": 4.8554856566309286e-06, "loss": 0.3252, "step": 8912 }, { "epoch": 1.3638867635807193, "grad_norm": 2.164800719738148, "learning_rate": 4.853360724292707e-06, "loss": 0.3702, "step": 8913 }, { "epoch": 1.3640397857689366, "grad_norm": 2.0398684616518272, "learning_rate": 4.851236108029739e-06, "loss": 0.2936, "step": 8914 }, { "epoch": 1.3641928079571537, "grad_norm": 2.3919411510047968, "learning_rate": 4.849111807972502e-06, "loss": 0.4252, "step": 8915 }, { "epoch": 1.364345830145371, "grad_norm": 2.317113758644946, "learning_rate": 4.84698782425146e-06, "loss": 0.3417, "step": 8916 }, { "epoch": 1.3644988523335884, "grad_norm": 1.813125371501577, "learning_rate": 4.844864156997054e-06, "loss": 0.2696, "step": 8917 }, { "epoch": 1.3646518745218057, "grad_norm": 2.030357912038102, "learning_rate": 4.842740806339709e-06, "loss": 0.3526, "step": 8918 }, { "epoch": 1.364804896710023, "grad_norm": 1.9203863871860456, "learning_rate": 4.84061777240982e-06, "loss": 0.3105, "step": 8919 }, { "epoch": 1.3649579188982401, "grad_norm": 2.4389896556711, "learning_rate": 4.8384950553377865e-06, "loss": 0.4105, "step": 8920 }, { "epoch": 1.3651109410864575, "grad_norm": 2.156091012678082, "learning_rate": 4.8363726552539595e-06, "loss": 0.357, "step": 8921 }, { "epoch": 1.3652639632746748, "grad_norm": 2.2964416250356208, "learning_rate": 4.8342505722886835e-06, "loss": 0.4063, "step": 8922 }, { "epoch": 1.3654169854628921, "grad_norm": 2.103168718904348, "learning_rate": 4.832128806572296e-06, "loss": 0.3337, "step": 8923 }, { "epoch": 1.3655700076511095, "grad_norm": 1.8160059643896378, "learning_rate": 4.8300073582350924e-06, "loss": 0.3028, "step": 8924 }, { "epoch": 1.3657230298393266, "grad_norm": 2.2057911487268944, "learning_rate": 4.827886227407358e-06, "loss": 0.3627, "step": 8925 }, { "epoch": 1.365876052027544, "grad_norm": 2.1394622208954064, "learning_rate": 4.825765414219371e-06, "loss": 0.3153, "step": 8926 }, { "epoch": 1.3660290742157613, "grad_norm": 1.9790746960660583, "learning_rate": 4.823644918801363e-06, "loss": 0.2966, "step": 8927 }, { "epoch": 1.3661820964039786, "grad_norm": 2.1585848700459462, "learning_rate": 4.821524741283577e-06, "loss": 0.3889, "step": 8928 }, { "epoch": 1.366335118592196, "grad_norm": 1.9951219879291964, "learning_rate": 4.819404881796217e-06, "loss": 0.3879, "step": 8929 }, { "epoch": 1.366488140780413, "grad_norm": 2.0365099355278033, "learning_rate": 4.817285340469462e-06, "loss": 0.3067, "step": 8930 }, { "epoch": 1.3666411629686306, "grad_norm": 2.386301644489069, "learning_rate": 4.815166117433493e-06, "loss": 0.3994, "step": 8931 }, { "epoch": 1.3667941851568477, "grad_norm": 2.237602958997356, "learning_rate": 4.813047212818457e-06, "loss": 0.4376, "step": 8932 }, { "epoch": 1.366947207345065, "grad_norm": 2.2080395359941116, "learning_rate": 4.810928626754482e-06, "loss": 0.3744, "step": 8933 }, { "epoch": 1.3671002295332824, "grad_norm": 2.213416724594083, "learning_rate": 4.808810359371681e-06, "loss": 0.3219, "step": 8934 }, { "epoch": 1.3672532517214997, "grad_norm": 2.2757376646140717, "learning_rate": 4.806692410800142e-06, "loss": 0.3351, "step": 8935 }, { "epoch": 1.367406273909717, "grad_norm": 2.275814186271426, "learning_rate": 4.804574781169941e-06, "loss": 0.3565, "step": 8936 }, { "epoch": 1.3675592960979341, "grad_norm": 2.00049387430229, "learning_rate": 4.802457470611125e-06, "loss": 0.3045, "step": 8937 }, { "epoch": 1.3677123182861515, "grad_norm": 2.0273766683214838, "learning_rate": 4.800340479253729e-06, "loss": 0.3273, "step": 8938 }, { "epoch": 1.3678653404743688, "grad_norm": 2.100353583194502, "learning_rate": 4.798223807227767e-06, "loss": 0.3112, "step": 8939 }, { "epoch": 1.3680183626625861, "grad_norm": 2.173515984420499, "learning_rate": 4.796107454663225e-06, "loss": 0.3939, "step": 8940 }, { "epoch": 1.3681713848508035, "grad_norm": 2.2645920139454248, "learning_rate": 4.79399142169009e-06, "loss": 0.3357, "step": 8941 }, { "epoch": 1.3683244070390206, "grad_norm": 2.1304360217955156, "learning_rate": 4.791875708438304e-06, "loss": 0.3591, "step": 8942 }, { "epoch": 1.368477429227238, "grad_norm": 2.016668890017917, "learning_rate": 4.789760315037801e-06, "loss": 0.3164, "step": 8943 }, { "epoch": 1.3686304514154553, "grad_norm": 2.102649703832163, "learning_rate": 4.7876452416185075e-06, "loss": 0.3938, "step": 8944 }, { "epoch": 1.3687834736036726, "grad_norm": 2.3476389450020188, "learning_rate": 4.785530488310307e-06, "loss": 0.4252, "step": 8945 }, { "epoch": 1.36893649579189, "grad_norm": 1.8083889949743437, "learning_rate": 4.783416055243074e-06, "loss": 0.2638, "step": 8946 }, { "epoch": 1.369089517980107, "grad_norm": 1.6592216434472842, "learning_rate": 4.781301942546677e-06, "loss": 0.2525, "step": 8947 }, { "epoch": 1.3692425401683244, "grad_norm": 2.2613655519561515, "learning_rate": 4.779188150350934e-06, "loss": 0.4171, "step": 8948 }, { "epoch": 1.3693955623565417, "grad_norm": 2.0986112105450214, "learning_rate": 4.777074678785676e-06, "loss": 0.3432, "step": 8949 }, { "epoch": 1.369548584544759, "grad_norm": 2.16417414807789, "learning_rate": 4.774961527980697e-06, "loss": 0.3598, "step": 8950 }, { "epoch": 1.3697016067329764, "grad_norm": 2.5301061654646255, "learning_rate": 4.772848698065764e-06, "loss": 0.4318, "step": 8951 }, { "epoch": 1.3698546289211935, "grad_norm": 1.9819120670022627, "learning_rate": 4.770736189170645e-06, "loss": 0.3328, "step": 8952 }, { "epoch": 1.3700076511094108, "grad_norm": 2.079685492030615, "learning_rate": 4.768624001425077e-06, "loss": 0.4102, "step": 8953 }, { "epoch": 1.3701606732976281, "grad_norm": 2.0838078795974835, "learning_rate": 4.766512134958767e-06, "loss": 0.3124, "step": 8954 }, { "epoch": 1.3703136954858455, "grad_norm": 1.9182397991758027, "learning_rate": 4.764400589901424e-06, "loss": 0.3356, "step": 8955 }, { "epoch": 1.3704667176740628, "grad_norm": 2.0187379873078153, "learning_rate": 4.762289366382723e-06, "loss": 0.3335, "step": 8956 }, { "epoch": 1.37061973986228, "grad_norm": 2.392520798804102, "learning_rate": 4.7601784645323225e-06, "loss": 0.4488, "step": 8957 }, { "epoch": 1.3707727620504973, "grad_norm": 2.2128964626438536, "learning_rate": 4.75806788447986e-06, "loss": 0.3515, "step": 8958 }, { "epoch": 1.3709257842387146, "grad_norm": 2.1733235228362147, "learning_rate": 4.755957626354954e-06, "loss": 0.3589, "step": 8959 }, { "epoch": 1.371078806426932, "grad_norm": 2.0313878062789326, "learning_rate": 4.753847690287207e-06, "loss": 0.2957, "step": 8960 }, { "epoch": 1.3712318286151493, "grad_norm": 2.1544020326434588, "learning_rate": 4.751738076406196e-06, "loss": 0.345, "step": 8961 }, { "epoch": 1.3713848508033664, "grad_norm": 2.1167948771347382, "learning_rate": 4.74962878484148e-06, "loss": 0.4013, "step": 8962 }, { "epoch": 1.371537872991584, "grad_norm": 2.2116358842619195, "learning_rate": 4.747519815722601e-06, "loss": 0.347, "step": 8963 }, { "epoch": 1.371690895179801, "grad_norm": 2.2257642473680743, "learning_rate": 4.7454111691790785e-06, "loss": 0.4065, "step": 8964 }, { "epoch": 1.3718439173680184, "grad_norm": 2.373332905259864, "learning_rate": 4.743302845340411e-06, "loss": 0.3881, "step": 8965 }, { "epoch": 1.3719969395562357, "grad_norm": 1.9971503672491702, "learning_rate": 4.74119484433608e-06, "loss": 0.3295, "step": 8966 }, { "epoch": 1.372149961744453, "grad_norm": 2.164576400339109, "learning_rate": 4.739087166295546e-06, "loss": 0.368, "step": 8967 }, { "epoch": 1.3723029839326704, "grad_norm": 2.1304626352278735, "learning_rate": 4.73697981134825e-06, "loss": 0.3502, "step": 8968 }, { "epoch": 1.3724560061208875, "grad_norm": 1.9351491350127181, "learning_rate": 4.734872779623611e-06, "loss": 0.3074, "step": 8969 }, { "epoch": 1.3726090283091048, "grad_norm": 1.8804547668574492, "learning_rate": 4.732766071251037e-06, "loss": 0.3111, "step": 8970 }, { "epoch": 1.3727620504973221, "grad_norm": 2.2549524395110825, "learning_rate": 4.730659686359901e-06, "loss": 0.3449, "step": 8971 }, { "epoch": 1.3729150726855395, "grad_norm": 2.242806963979162, "learning_rate": 4.728553625079564e-06, "loss": 0.3164, "step": 8972 }, { "epoch": 1.3730680948737568, "grad_norm": 2.11930174434817, "learning_rate": 4.726447887539378e-06, "loss": 0.3364, "step": 8973 }, { "epoch": 1.373221117061974, "grad_norm": 2.470435497406207, "learning_rate": 4.724342473868655e-06, "loss": 0.4222, "step": 8974 }, { "epoch": 1.3733741392501913, "grad_norm": 2.3682646592639736, "learning_rate": 4.722237384196694e-06, "loss": 0.3675, "step": 8975 }, { "epoch": 1.3735271614384086, "grad_norm": 2.3977878266611667, "learning_rate": 4.72013261865279e-06, "loss": 0.3816, "step": 8976 }, { "epoch": 1.373680183626626, "grad_norm": 1.8686907034229359, "learning_rate": 4.71802817736619e-06, "loss": 0.2997, "step": 8977 }, { "epoch": 1.3738332058148433, "grad_norm": 2.14486465113667, "learning_rate": 4.715924060466145e-06, "loss": 0.3721, "step": 8978 }, { "epoch": 1.3739862280030604, "grad_norm": 2.2985453789295627, "learning_rate": 4.713820268081879e-06, "loss": 0.3437, "step": 8979 }, { "epoch": 1.3741392501912777, "grad_norm": 2.227704431604356, "learning_rate": 4.711716800342584e-06, "loss": 0.3468, "step": 8980 }, { "epoch": 1.374292272379495, "grad_norm": 2.204147290592917, "learning_rate": 4.709613657377449e-06, "loss": 0.3204, "step": 8981 }, { "epoch": 1.3744452945677124, "grad_norm": 2.1549027912943712, "learning_rate": 4.707510839315642e-06, "loss": 0.3232, "step": 8982 }, { "epoch": 1.3745983167559297, "grad_norm": 1.9921345110290458, "learning_rate": 4.705408346286291e-06, "loss": 0.3144, "step": 8983 }, { "epoch": 1.3747513389441468, "grad_norm": 2.0591857889441956, "learning_rate": 4.703306178418529e-06, "loss": 0.3305, "step": 8984 }, { "epoch": 1.3749043611323641, "grad_norm": 2.094890599785986, "learning_rate": 4.701204335841455e-06, "loss": 0.2791, "step": 8985 }, { "epoch": 1.3750573833205815, "grad_norm": 2.1299628438008753, "learning_rate": 4.699102818684151e-06, "loss": 0.2865, "step": 8986 }, { "epoch": 1.3752104055087988, "grad_norm": 2.378452431658748, "learning_rate": 4.6970016270756826e-06, "loss": 0.4039, "step": 8987 }, { "epoch": 1.3753634276970161, "grad_norm": 1.941247460390366, "learning_rate": 4.694900761145088e-06, "loss": 0.2924, "step": 8988 }, { "epoch": 1.3755164498852332, "grad_norm": 2.0735387694712673, "learning_rate": 4.692800221021392e-06, "loss": 0.283, "step": 8989 }, { "epoch": 1.3756694720734506, "grad_norm": 2.1748933607293783, "learning_rate": 4.690700006833595e-06, "loss": 0.3652, "step": 8990 }, { "epoch": 1.375822494261668, "grad_norm": 2.2313085481512647, "learning_rate": 4.688600118710682e-06, "loss": 0.3407, "step": 8991 }, { "epoch": 1.3759755164498852, "grad_norm": 2.125396345210953, "learning_rate": 4.686500556781614e-06, "loss": 0.3484, "step": 8992 }, { "epoch": 1.3761285386381026, "grad_norm": 2.442831053015856, "learning_rate": 4.684401321175333e-06, "loss": 0.3715, "step": 8993 }, { "epoch": 1.3762815608263197, "grad_norm": 2.264403792995592, "learning_rate": 4.6823024120207615e-06, "loss": 0.3353, "step": 8994 }, { "epoch": 1.376434583014537, "grad_norm": 2.153718862608068, "learning_rate": 4.680203829446802e-06, "loss": 0.3349, "step": 8995 }, { "epoch": 1.3765876052027544, "grad_norm": 2.2678638376517317, "learning_rate": 4.678105573582337e-06, "loss": 0.3347, "step": 8996 }, { "epoch": 1.3767406273909717, "grad_norm": 2.143262059852469, "learning_rate": 4.676007644556226e-06, "loss": 0.3476, "step": 8997 }, { "epoch": 1.376893649579189, "grad_norm": 1.8505114454183178, "learning_rate": 4.673910042497312e-06, "loss": 0.3635, "step": 8998 }, { "epoch": 1.3770466717674061, "grad_norm": 1.9620249089509953, "learning_rate": 4.671812767534424e-06, "loss": 0.2853, "step": 8999 }, { "epoch": 1.3771996939556237, "grad_norm": 2.2096240025262563, "learning_rate": 4.669715819796354e-06, "loss": 0.3826, "step": 9000 }, { "epoch": 1.3773527161438408, "grad_norm": 2.0471221789313048, "learning_rate": 4.667619199411886e-06, "loss": 0.3303, "step": 9001 }, { "epoch": 1.3775057383320581, "grad_norm": 2.219398951091194, "learning_rate": 4.665522906509789e-06, "loss": 0.3383, "step": 9002 }, { "epoch": 1.3776587605202755, "grad_norm": 2.1239169778671663, "learning_rate": 4.663426941218796e-06, "loss": 0.3643, "step": 9003 }, { "epoch": 1.3778117827084928, "grad_norm": 2.2161074087918413, "learning_rate": 4.661331303667627e-06, "loss": 0.3544, "step": 9004 }, { "epoch": 1.3779648048967101, "grad_norm": 1.8727061163511225, "learning_rate": 4.659235993984995e-06, "loss": 0.2673, "step": 9005 }, { "epoch": 1.3781178270849272, "grad_norm": 1.9336210558087923, "learning_rate": 4.657141012299567e-06, "loss": 0.3544, "step": 9006 }, { "epoch": 1.3782708492731446, "grad_norm": 2.126956772127913, "learning_rate": 4.655046358740014e-06, "loss": 0.4919, "step": 9007 }, { "epoch": 1.378423871461362, "grad_norm": 2.0412566209981478, "learning_rate": 4.652952033434978e-06, "loss": 0.3247, "step": 9008 }, { "epoch": 1.3785768936495792, "grad_norm": 1.9837314117934364, "learning_rate": 4.650858036513067e-06, "loss": 0.3384, "step": 9009 }, { "epoch": 1.3787299158377966, "grad_norm": 2.0790714883936134, "learning_rate": 4.648764368102894e-06, "loss": 0.2951, "step": 9010 }, { "epoch": 1.3788829380260137, "grad_norm": 2.0441032696122616, "learning_rate": 4.646671028333039e-06, "loss": 0.358, "step": 9011 }, { "epoch": 1.379035960214231, "grad_norm": 2.026421631547, "learning_rate": 4.644578017332052e-06, "loss": 0.4144, "step": 9012 }, { "epoch": 1.3791889824024484, "grad_norm": 2.2121542332323854, "learning_rate": 4.642485335228483e-06, "loss": 0.3838, "step": 9013 }, { "epoch": 1.3793420045906657, "grad_norm": 2.218639370811378, "learning_rate": 4.640392982150846e-06, "loss": 0.3431, "step": 9014 }, { "epoch": 1.379495026778883, "grad_norm": 2.037830779490401, "learning_rate": 4.638300958227645e-06, "loss": 0.3522, "step": 9015 }, { "epoch": 1.3796480489671001, "grad_norm": 2.410335657240026, "learning_rate": 4.636209263587358e-06, "loss": 0.3976, "step": 9016 }, { "epoch": 1.3798010711553175, "grad_norm": 2.1241097054830167, "learning_rate": 4.634117898358441e-06, "loss": 0.3065, "step": 9017 }, { "epoch": 1.3799540933435348, "grad_norm": 2.343825620509157, "learning_rate": 4.632026862669337e-06, "loss": 0.3899, "step": 9018 }, { "epoch": 1.3801071155317521, "grad_norm": 1.9525031937885948, "learning_rate": 4.629936156648463e-06, "loss": 0.2961, "step": 9019 }, { "epoch": 1.3802601377199695, "grad_norm": 2.134029971911187, "learning_rate": 4.627845780424217e-06, "loss": 0.3464, "step": 9020 }, { "epoch": 1.3804131599081866, "grad_norm": 1.9946961858753256, "learning_rate": 4.625755734124977e-06, "loss": 0.3302, "step": 9021 }, { "epoch": 1.380566182096404, "grad_norm": 1.9751054972586108, "learning_rate": 4.623666017879098e-06, "loss": 0.2879, "step": 9022 }, { "epoch": 1.3807192042846212, "grad_norm": 2.1883779520669115, "learning_rate": 4.62157663181493e-06, "loss": 0.2725, "step": 9023 }, { "epoch": 1.3808722264728386, "grad_norm": 1.8160688719204534, "learning_rate": 4.619487576060777e-06, "loss": 0.2921, "step": 9024 }, { "epoch": 1.381025248661056, "grad_norm": 2.04738295569202, "learning_rate": 4.6173988507449366e-06, "loss": 0.3446, "step": 9025 }, { "epoch": 1.381178270849273, "grad_norm": 1.970933426371676, "learning_rate": 4.615310455995697e-06, "loss": 0.2965, "step": 9026 }, { "epoch": 1.3813312930374904, "grad_norm": 2.265268014430741, "learning_rate": 4.613222391941304e-06, "loss": 0.2872, "step": 9027 }, { "epoch": 1.3814843152257077, "grad_norm": 2.139073356126362, "learning_rate": 4.611134658709992e-06, "loss": 0.3425, "step": 9028 }, { "epoch": 1.381637337413925, "grad_norm": 2.07292711463194, "learning_rate": 4.609047256429992e-06, "loss": 0.3747, "step": 9029 }, { "epoch": 1.3817903596021424, "grad_norm": 2.268761758492247, "learning_rate": 4.60696018522948e-06, "loss": 0.38, "step": 9030 }, { "epoch": 1.3819433817903595, "grad_norm": 2.2366077257417785, "learning_rate": 4.604873445236645e-06, "loss": 0.3249, "step": 9031 }, { "epoch": 1.382096403978577, "grad_norm": 2.013369317159582, "learning_rate": 4.602787036579643e-06, "loss": 0.3385, "step": 9032 }, { "epoch": 1.3822494261667941, "grad_norm": 2.0345532889136653, "learning_rate": 4.600700959386592e-06, "loss": 0.2601, "step": 9033 }, { "epoch": 1.3824024483550115, "grad_norm": 2.1497689903647235, "learning_rate": 4.598615213785624e-06, "loss": 0.3804, "step": 9034 }, { "epoch": 1.3825554705432288, "grad_norm": 2.249995107110444, "learning_rate": 4.596529799904825e-06, "loss": 0.3559, "step": 9035 }, { "epoch": 1.3827084927314461, "grad_norm": 1.8239006623904237, "learning_rate": 4.594444717872269e-06, "loss": 0.2621, "step": 9036 }, { "epoch": 1.3828615149196635, "grad_norm": 2.023991229226579, "learning_rate": 4.592359967816012e-06, "loss": 0.3317, "step": 9037 }, { "epoch": 1.3830145371078806, "grad_norm": 2.0859580119335326, "learning_rate": 4.590275549864081e-06, "loss": 0.3042, "step": 9038 }, { "epoch": 1.383167559296098, "grad_norm": 2.376028679244968, "learning_rate": 4.5881914641444936e-06, "loss": 0.3887, "step": 9039 }, { "epoch": 1.3833205814843152, "grad_norm": 2.0554311647854164, "learning_rate": 4.586107710785238e-06, "loss": 0.3451, "step": 9040 }, { "epoch": 1.3834736036725326, "grad_norm": 1.7523628481913938, "learning_rate": 4.584024289914288e-06, "loss": 0.3022, "step": 9041 }, { "epoch": 1.38362662586075, "grad_norm": 1.9390495654961886, "learning_rate": 4.581941201659593e-06, "loss": 0.3678, "step": 9042 }, { "epoch": 1.383779648048967, "grad_norm": 2.237567627656652, "learning_rate": 4.579858446149086e-06, "loss": 0.4361, "step": 9043 }, { "epoch": 1.3839326702371844, "grad_norm": 2.3021361434486405, "learning_rate": 4.577776023510674e-06, "loss": 0.3286, "step": 9044 }, { "epoch": 1.3840856924254017, "grad_norm": 2.3763182088117545, "learning_rate": 4.575693933872248e-06, "loss": 0.3913, "step": 9045 }, { "epoch": 1.384238714613619, "grad_norm": 1.9626988253783606, "learning_rate": 4.573612177361679e-06, "loss": 0.3182, "step": 9046 }, { "epoch": 1.3843917368018364, "grad_norm": 2.173280085060481, "learning_rate": 4.571530754106813e-06, "loss": 0.3433, "step": 9047 }, { "epoch": 1.3845447589900535, "grad_norm": 1.942616106562521, "learning_rate": 4.5694496642354815e-06, "loss": 0.2648, "step": 9048 }, { "epoch": 1.3846977811782708, "grad_norm": 2.1229756870525462, "learning_rate": 4.567368907875489e-06, "loss": 0.3388, "step": 9049 }, { "epoch": 1.3848508033664881, "grad_norm": 1.9885131487226595, "learning_rate": 4.565288485154624e-06, "loss": 0.3119, "step": 9050 }, { "epoch": 1.3850038255547055, "grad_norm": 1.9222230959456768, "learning_rate": 4.563208396200651e-06, "loss": 0.2911, "step": 9051 }, { "epoch": 1.3851568477429228, "grad_norm": 2.158206506225906, "learning_rate": 4.561128641141327e-06, "loss": 0.3206, "step": 9052 }, { "epoch": 1.38530986993114, "grad_norm": 2.19345897921204, "learning_rate": 4.559049220104366e-06, "loss": 0.3805, "step": 9053 }, { "epoch": 1.3854628921193572, "grad_norm": 2.1317407824996044, "learning_rate": 4.5569701332174746e-06, "loss": 0.3375, "step": 9054 }, { "epoch": 1.3856159143075746, "grad_norm": 2.3366870170548233, "learning_rate": 4.554891380608346e-06, "loss": 0.3741, "step": 9055 }, { "epoch": 1.385768936495792, "grad_norm": 2.2540854792574363, "learning_rate": 4.552812962404637e-06, "loss": 0.3169, "step": 9056 }, { "epoch": 1.3859219586840092, "grad_norm": 2.099517950545955, "learning_rate": 4.550734878733989e-06, "loss": 0.3271, "step": 9057 }, { "epoch": 1.3860749808722264, "grad_norm": 1.9069516040024563, "learning_rate": 4.548657129724038e-06, "loss": 0.2895, "step": 9058 }, { "epoch": 1.3862280030604437, "grad_norm": 2.161070641348649, "learning_rate": 4.54657971550237e-06, "loss": 0.3233, "step": 9059 }, { "epoch": 1.386381025248661, "grad_norm": 2.216026925034713, "learning_rate": 4.54450263619658e-06, "loss": 0.3162, "step": 9060 }, { "epoch": 1.3865340474368784, "grad_norm": 2.1840129464952094, "learning_rate": 4.542425891934229e-06, "loss": 0.3682, "step": 9061 }, { "epoch": 1.3866870696250957, "grad_norm": 1.884220281115278, "learning_rate": 4.540349482842846e-06, "loss": 0.2823, "step": 9062 }, { "epoch": 1.3868400918133128, "grad_norm": 1.7360218886526524, "learning_rate": 4.538273409049964e-06, "loss": 0.2798, "step": 9063 }, { "epoch": 1.3869931140015304, "grad_norm": 2.3949601964182103, "learning_rate": 4.536197670683081e-06, "loss": 0.3681, "step": 9064 }, { "epoch": 1.3871461361897475, "grad_norm": 2.0561106971906646, "learning_rate": 4.534122267869667e-06, "loss": 0.3101, "step": 9065 }, { "epoch": 1.3872991583779648, "grad_norm": 2.3957391340285383, "learning_rate": 4.532047200737191e-06, "loss": 0.4326, "step": 9066 }, { "epoch": 1.3874521805661821, "grad_norm": 1.9010857779535084, "learning_rate": 4.529972469413087e-06, "loss": 0.3132, "step": 9067 }, { "epoch": 1.3876052027543995, "grad_norm": 2.02976579219007, "learning_rate": 4.527898074024772e-06, "loss": 0.358, "step": 9068 }, { "epoch": 1.3877582249426168, "grad_norm": 2.29210851094873, "learning_rate": 4.525824014699643e-06, "loss": 0.3679, "step": 9069 }, { "epoch": 1.387911247130834, "grad_norm": 1.9044989375358745, "learning_rate": 4.523750291565078e-06, "loss": 0.3186, "step": 9070 }, { "epoch": 1.3880642693190512, "grad_norm": 1.9671211186121362, "learning_rate": 4.52167690474843e-06, "loss": 0.313, "step": 9071 }, { "epoch": 1.3882172915072686, "grad_norm": 2.1843101523101747, "learning_rate": 4.5196038543770335e-06, "loss": 0.3622, "step": 9072 }, { "epoch": 1.388370313695486, "grad_norm": 1.8997165102498639, "learning_rate": 4.517531140578205e-06, "loss": 0.3651, "step": 9073 }, { "epoch": 1.3885233358837032, "grad_norm": 2.11857325406712, "learning_rate": 4.515458763479237e-06, "loss": 0.4028, "step": 9074 }, { "epoch": 1.3886763580719204, "grad_norm": 2.0344891913610037, "learning_rate": 4.5133867232074e-06, "loss": 0.2762, "step": 9075 }, { "epoch": 1.3888293802601377, "grad_norm": 1.8699787605804676, "learning_rate": 4.51131501988995e-06, "loss": 0.2799, "step": 9076 }, { "epoch": 1.388982402448355, "grad_norm": 1.9247216761568173, "learning_rate": 4.509243653654116e-06, "loss": 0.231, "step": 9077 }, { "epoch": 1.3891354246365724, "grad_norm": 1.8389597441884304, "learning_rate": 4.507172624627109e-06, "loss": 0.2892, "step": 9078 }, { "epoch": 1.3892884468247897, "grad_norm": 2.1670682348965054, "learning_rate": 4.505101932936119e-06, "loss": 0.3743, "step": 9079 }, { "epoch": 1.3894414690130068, "grad_norm": 2.04514272674738, "learning_rate": 4.5030315787083126e-06, "loss": 0.3327, "step": 9080 }, { "epoch": 1.3895944912012241, "grad_norm": 2.3185780179520767, "learning_rate": 4.5009615620708466e-06, "loss": 0.4264, "step": 9081 }, { "epoch": 1.3897475133894415, "grad_norm": 1.994748159454086, "learning_rate": 4.498891883150842e-06, "loss": 0.3528, "step": 9082 }, { "epoch": 1.3899005355776588, "grad_norm": 2.2945232098348107, "learning_rate": 4.496822542075403e-06, "loss": 0.3135, "step": 9083 }, { "epoch": 1.3900535577658761, "grad_norm": 2.19865510079339, "learning_rate": 4.494753538971627e-06, "loss": 0.3625, "step": 9084 }, { "epoch": 1.3902065799540932, "grad_norm": 1.9457973161006021, "learning_rate": 4.4926848739665695e-06, "loss": 0.3176, "step": 9085 }, { "epoch": 1.3903596021423106, "grad_norm": 2.2547412719369184, "learning_rate": 4.490616547187275e-06, "loss": 0.3324, "step": 9086 }, { "epoch": 1.390512624330528, "grad_norm": 2.331565110391067, "learning_rate": 4.488548558760778e-06, "loss": 0.3423, "step": 9087 }, { "epoch": 1.3906656465187452, "grad_norm": 2.0880204887768783, "learning_rate": 4.486480908814068e-06, "loss": 0.3643, "step": 9088 }, { "epoch": 1.3908186687069626, "grad_norm": 2.1681981316566596, "learning_rate": 4.4844135974741384e-06, "loss": 0.356, "step": 9089 }, { "epoch": 1.3909716908951797, "grad_norm": 2.149384143224433, "learning_rate": 4.48234662486795e-06, "loss": 0.3372, "step": 9090 }, { "epoch": 1.391124713083397, "grad_norm": 2.1414181789883484, "learning_rate": 4.480279991122434e-06, "loss": 0.3571, "step": 9091 }, { "epoch": 1.3912777352716144, "grad_norm": 1.985829509300672, "learning_rate": 4.478213696364519e-06, "loss": 0.2794, "step": 9092 }, { "epoch": 1.3914307574598317, "grad_norm": 2.155329738496481, "learning_rate": 4.476147740721108e-06, "loss": 0.3552, "step": 9093 }, { "epoch": 1.391583779648049, "grad_norm": 2.0895352644773655, "learning_rate": 4.4740821243190655e-06, "loss": 0.3342, "step": 9094 }, { "epoch": 1.3917368018362661, "grad_norm": 2.031285782158425, "learning_rate": 4.472016847285261e-06, "loss": 0.3187, "step": 9095 }, { "epoch": 1.3918898240244835, "grad_norm": 2.179929418116259, "learning_rate": 4.469951909746527e-06, "loss": 0.2988, "step": 9096 }, { "epoch": 1.3920428462127008, "grad_norm": 2.2347385172890992, "learning_rate": 4.467887311829681e-06, "loss": 0.3867, "step": 9097 }, { "epoch": 1.3921958684009181, "grad_norm": 2.03613306275373, "learning_rate": 4.465823053661515e-06, "loss": 0.4267, "step": 9098 }, { "epoch": 1.3923488905891355, "grad_norm": 2.362451409078388, "learning_rate": 4.463759135368807e-06, "loss": 0.3879, "step": 9099 }, { "epoch": 1.3925019127773526, "grad_norm": 2.3193830575924856, "learning_rate": 4.461695557078307e-06, "loss": 0.3888, "step": 9100 }, { "epoch": 1.3926549349655701, "grad_norm": 2.0010960754941793, "learning_rate": 4.45963231891675e-06, "loss": 0.3607, "step": 9101 }, { "epoch": 1.3928079571537872, "grad_norm": 1.956606094360901, "learning_rate": 4.457569421010846e-06, "loss": 0.2785, "step": 9102 }, { "epoch": 1.3929609793420046, "grad_norm": 1.659279994173601, "learning_rate": 4.455506863487285e-06, "loss": 0.2374, "step": 9103 }, { "epoch": 1.393114001530222, "grad_norm": 2.005492432335445, "learning_rate": 4.453444646472736e-06, "loss": 0.3199, "step": 9104 }, { "epoch": 1.3932670237184392, "grad_norm": 2.291401472114738, "learning_rate": 4.451382770093855e-06, "loss": 0.3307, "step": 9105 }, { "epoch": 1.3934200459066566, "grad_norm": 2.2285881389564364, "learning_rate": 4.44932123447726e-06, "loss": 0.3942, "step": 9106 }, { "epoch": 1.3935730680948737, "grad_norm": 2.5366833904976667, "learning_rate": 4.447260039749562e-06, "loss": 0.4648, "step": 9107 }, { "epoch": 1.393726090283091, "grad_norm": 2.1735252628773116, "learning_rate": 4.445199186037352e-06, "loss": 0.357, "step": 9108 }, { "epoch": 1.3938791124713084, "grad_norm": 1.949213191530467, "learning_rate": 4.443138673467183e-06, "loss": 0.2779, "step": 9109 }, { "epoch": 1.3940321346595257, "grad_norm": 2.0589593708206877, "learning_rate": 4.441078502165613e-06, "loss": 0.3583, "step": 9110 }, { "epoch": 1.394185156847743, "grad_norm": 2.0886474227263174, "learning_rate": 4.43901867225916e-06, "loss": 0.3245, "step": 9111 }, { "epoch": 1.3943381790359601, "grad_norm": 2.2564496042284574, "learning_rate": 4.4369591838743175e-06, "loss": 0.4147, "step": 9112 }, { "epoch": 1.3944912012241775, "grad_norm": 2.224803250753932, "learning_rate": 4.434900037137579e-06, "loss": 0.3826, "step": 9113 }, { "epoch": 1.3946442234123948, "grad_norm": 1.9444166418014062, "learning_rate": 4.432841232175404e-06, "loss": 0.3276, "step": 9114 }, { "epoch": 1.3947972456006121, "grad_norm": 1.9814704822594493, "learning_rate": 4.43078276911422e-06, "loss": 0.3291, "step": 9115 }, { "epoch": 1.3949502677888295, "grad_norm": 2.202246242241708, "learning_rate": 4.428724648080457e-06, "loss": 0.3486, "step": 9116 }, { "epoch": 1.3951032899770466, "grad_norm": 2.1543385539147315, "learning_rate": 4.426666869200507e-06, "loss": 0.3825, "step": 9117 }, { "epoch": 1.395256312165264, "grad_norm": 1.9988288994094405, "learning_rate": 4.424609432600749e-06, "loss": 0.3201, "step": 9118 }, { "epoch": 1.3954093343534812, "grad_norm": 2.339767082170093, "learning_rate": 4.422552338407537e-06, "loss": 0.355, "step": 9119 }, { "epoch": 1.3955623565416986, "grad_norm": 2.377307692928964, "learning_rate": 4.4204955867472035e-06, "loss": 0.392, "step": 9120 }, { "epoch": 1.395715378729916, "grad_norm": 1.8196435714068888, "learning_rate": 4.418439177746064e-06, "loss": 0.3027, "step": 9121 }, { "epoch": 1.395868400918133, "grad_norm": 2.2604499545369934, "learning_rate": 4.41638311153041e-06, "loss": 0.3455, "step": 9122 }, { "epoch": 1.3960214231063504, "grad_norm": 2.033493361083444, "learning_rate": 4.414327388226511e-06, "loss": 0.3, "step": 9123 }, { "epoch": 1.3961744452945677, "grad_norm": 2.089188809076427, "learning_rate": 4.412272007960621e-06, "loss": 0.3484, "step": 9124 }, { "epoch": 1.396327467482785, "grad_norm": 2.0463071223170473, "learning_rate": 4.4102169708589635e-06, "loss": 0.3231, "step": 9125 }, { "epoch": 1.3964804896710024, "grad_norm": 2.088456068827583, "learning_rate": 4.4081622770477505e-06, "loss": 0.411, "step": 9126 }, { "epoch": 1.3966335118592195, "grad_norm": 2.1862769519850844, "learning_rate": 4.4061079266531685e-06, "loss": 0.3502, "step": 9127 }, { "epoch": 1.3967865340474368, "grad_norm": 2.45723775464347, "learning_rate": 4.404053919801381e-06, "loss": 0.3739, "step": 9128 }, { "epoch": 1.3969395562356541, "grad_norm": 2.25312171783395, "learning_rate": 4.4020002566185336e-06, "loss": 0.3956, "step": 9129 }, { "epoch": 1.3970925784238715, "grad_norm": 2.303669832441205, "learning_rate": 4.39994693723075e-06, "loss": 0.3633, "step": 9130 }, { "epoch": 1.3972456006120888, "grad_norm": 2.158839638357098, "learning_rate": 4.3978939617641324e-06, "loss": 0.3417, "step": 9131 }, { "epoch": 1.397398622800306, "grad_norm": 2.1069280338103487, "learning_rate": 4.395841330344762e-06, "loss": 0.3423, "step": 9132 }, { "epoch": 1.3975516449885235, "grad_norm": 1.982236354173458, "learning_rate": 4.393789043098697e-06, "loss": 0.346, "step": 9133 }, { "epoch": 1.3977046671767406, "grad_norm": 2.192432973522562, "learning_rate": 4.391737100151984e-06, "loss": 0.4102, "step": 9134 }, { "epoch": 1.397857689364958, "grad_norm": 2.2270581821455844, "learning_rate": 4.3896855016306324e-06, "loss": 0.4185, "step": 9135 }, { "epoch": 1.3980107115531752, "grad_norm": 2.299865181546426, "learning_rate": 4.387634247660638e-06, "loss": 0.4115, "step": 9136 }, { "epoch": 1.3981637337413926, "grad_norm": 2.474822061012731, "learning_rate": 4.385583338367988e-06, "loss": 0.3599, "step": 9137 }, { "epoch": 1.39831675592961, "grad_norm": 1.9942991712556746, "learning_rate": 4.38353277387862e-06, "loss": 0.3323, "step": 9138 }, { "epoch": 1.398469778117827, "grad_norm": 1.9557470552238374, "learning_rate": 4.381482554318481e-06, "loss": 0.2981, "step": 9139 }, { "epoch": 1.3986228003060444, "grad_norm": 1.9394714605145627, "learning_rate": 4.379432679813482e-06, "loss": 0.3279, "step": 9140 }, { "epoch": 1.3987758224942617, "grad_norm": 2.227238700846652, "learning_rate": 4.377383150489503e-06, "loss": 0.3151, "step": 9141 }, { "epoch": 1.398928844682479, "grad_norm": 1.9792871389602236, "learning_rate": 4.375333966472423e-06, "loss": 0.2772, "step": 9142 }, { "epoch": 1.3990818668706964, "grad_norm": 1.9119147272515082, "learning_rate": 4.373285127888093e-06, "loss": 0.3299, "step": 9143 }, { "epoch": 1.3992348890589135, "grad_norm": 2.0573878948922553, "learning_rate": 4.3712366348623256e-06, "loss": 0.3293, "step": 9144 }, { "epoch": 1.3993879112471308, "grad_norm": 2.012802138222529, "learning_rate": 4.369188487520942e-06, "loss": 0.3336, "step": 9145 }, { "epoch": 1.3995409334353481, "grad_norm": 2.168983085038054, "learning_rate": 4.36714068598972e-06, "loss": 0.4029, "step": 9146 }, { "epoch": 1.3996939556235655, "grad_norm": 2.1144699722556517, "learning_rate": 4.365093230394425e-06, "loss": 0.337, "step": 9147 }, { "epoch": 1.3998469778117828, "grad_norm": 2.0987987285199305, "learning_rate": 4.363046120860799e-06, "loss": 0.3245, "step": 9148 }, { "epoch": 1.4, "grad_norm": 2.138558119857808, "learning_rate": 4.360999357514562e-06, "loss": 0.3627, "step": 9149 }, { "epoch": 1.4001530221882172, "grad_norm": 2.1546083763172135, "learning_rate": 4.358952940481414e-06, "loss": 0.3407, "step": 9150 }, { "epoch": 1.4003060443764346, "grad_norm": 2.13815010724717, "learning_rate": 4.356906869887034e-06, "loss": 0.3007, "step": 9151 }, { "epoch": 1.400459066564652, "grad_norm": 2.084680308601072, "learning_rate": 4.354861145857079e-06, "loss": 0.3639, "step": 9152 }, { "epoch": 1.4006120887528692, "grad_norm": 1.9804950825280374, "learning_rate": 4.352815768517185e-06, "loss": 0.2538, "step": 9153 }, { "epoch": 1.4007651109410864, "grad_norm": 1.9922835195123851, "learning_rate": 4.350770737992967e-06, "loss": 0.2989, "step": 9154 }, { "epoch": 1.4009181331293037, "grad_norm": 2.0487239910505006, "learning_rate": 4.3487260544100176e-06, "loss": 0.3304, "step": 9155 }, { "epoch": 1.401071155317521, "grad_norm": 2.0799090584250557, "learning_rate": 4.346681717893909e-06, "loss": 0.3208, "step": 9156 }, { "epoch": 1.4012241775057384, "grad_norm": 2.3668579542580246, "learning_rate": 4.3446377285701924e-06, "loss": 0.3714, "step": 9157 }, { "epoch": 1.4013771996939557, "grad_norm": 2.3259095379886516, "learning_rate": 4.342594086564396e-06, "loss": 0.3701, "step": 9158 }, { "epoch": 1.4015302218821728, "grad_norm": 2.2272276688925636, "learning_rate": 4.340550792002029e-06, "loss": 0.3585, "step": 9159 }, { "epoch": 1.4016832440703901, "grad_norm": 2.041804323945471, "learning_rate": 4.338507845008578e-06, "loss": 0.3692, "step": 9160 }, { "epoch": 1.4018362662586075, "grad_norm": 2.2361335197569723, "learning_rate": 4.3364652457095095e-06, "loss": 0.3295, "step": 9161 }, { "epoch": 1.4019892884468248, "grad_norm": 2.1316794949736573, "learning_rate": 4.3344229942302605e-06, "loss": 0.3014, "step": 9162 }, { "epoch": 1.4021423106350421, "grad_norm": 2.06167169272297, "learning_rate": 4.332381090696267e-06, "loss": 0.341, "step": 9163 }, { "epoch": 1.4022953328232592, "grad_norm": 1.9514353960688686, "learning_rate": 4.3303395352329195e-06, "loss": 0.2991, "step": 9164 }, { "epoch": 1.4024483550114768, "grad_norm": 2.2184375708560595, "learning_rate": 4.3282983279655965e-06, "loss": 0.2893, "step": 9165 }, { "epoch": 1.402601377199694, "grad_norm": 2.0421161222765765, "learning_rate": 4.326257469019668e-06, "loss": 0.2997, "step": 9166 }, { "epoch": 1.4027543993879112, "grad_norm": 1.9957994860733428, "learning_rate": 4.324216958520462e-06, "loss": 0.3234, "step": 9167 }, { "epoch": 1.4029074215761286, "grad_norm": 2.4207732562865116, "learning_rate": 4.3221767965932915e-06, "loss": 0.344, "step": 9168 }, { "epoch": 1.403060443764346, "grad_norm": 1.8290840140032734, "learning_rate": 4.320136983363463e-06, "loss": 0.2701, "step": 9169 }, { "epoch": 1.4032134659525632, "grad_norm": 2.079210865586468, "learning_rate": 4.318097518956236e-06, "loss": 0.3439, "step": 9170 }, { "epoch": 1.4033664881407804, "grad_norm": 2.3346282697773617, "learning_rate": 4.31605840349687e-06, "loss": 0.3681, "step": 9171 }, { "epoch": 1.4035195103289977, "grad_norm": 2.3745803397537566, "learning_rate": 4.314019637110598e-06, "loss": 0.4203, "step": 9172 }, { "epoch": 1.403672532517215, "grad_norm": 2.2195262607467954, "learning_rate": 4.311981219922616e-06, "loss": 0.3696, "step": 9173 }, { "epoch": 1.4038255547054324, "grad_norm": 2.449637868284868, "learning_rate": 4.309943152058122e-06, "loss": 0.4486, "step": 9174 }, { "epoch": 1.4039785768936497, "grad_norm": 2.3354398337723348, "learning_rate": 4.30790543364228e-06, "loss": 0.413, "step": 9175 }, { "epoch": 1.4041315990818668, "grad_norm": 2.036115746289367, "learning_rate": 4.305868064800233e-06, "loss": 0.3874, "step": 9176 }, { "epoch": 1.4042846212700841, "grad_norm": 2.0707373753851543, "learning_rate": 4.303831045657102e-06, "loss": 0.297, "step": 9177 }, { "epoch": 1.4044376434583015, "grad_norm": 2.2471500234006294, "learning_rate": 4.301794376337991e-06, "loss": 0.3593, "step": 9178 }, { "epoch": 1.4045906656465188, "grad_norm": 2.130251359667783, "learning_rate": 4.29975805696798e-06, "loss": 0.3498, "step": 9179 }, { "epoch": 1.4047436878347361, "grad_norm": 2.135027547460175, "learning_rate": 4.297722087672125e-06, "loss": 0.2999, "step": 9180 }, { "epoch": 1.4048967100229532, "grad_norm": 2.021138639473653, "learning_rate": 4.295686468575464e-06, "loss": 0.2895, "step": 9181 }, { "epoch": 1.4050497322111706, "grad_norm": 2.0694117569321455, "learning_rate": 4.2936511998030116e-06, "loss": 0.3878, "step": 9182 }, { "epoch": 1.405202754399388, "grad_norm": 2.098084448075512, "learning_rate": 4.29161628147976e-06, "loss": 0.3577, "step": 9183 }, { "epoch": 1.4053557765876052, "grad_norm": 2.300338937806773, "learning_rate": 4.289581713730691e-06, "loss": 0.3517, "step": 9184 }, { "epoch": 1.4055087987758226, "grad_norm": 2.2472611334731547, "learning_rate": 4.287547496680744e-06, "loss": 0.3293, "step": 9185 }, { "epoch": 1.4056618209640397, "grad_norm": 1.9924191518899794, "learning_rate": 4.2855136304548495e-06, "loss": 0.3571, "step": 9186 }, { "epoch": 1.405814843152257, "grad_norm": 2.5741502175394353, "learning_rate": 4.283480115177925e-06, "loss": 0.3318, "step": 9187 }, { "epoch": 1.4059678653404744, "grad_norm": 2.2106988688252307, "learning_rate": 4.2814469509748465e-06, "loss": 0.3486, "step": 9188 }, { "epoch": 1.4061208875286917, "grad_norm": 2.037048253663022, "learning_rate": 4.279414137970478e-06, "loss": 0.3917, "step": 9189 }, { "epoch": 1.406273909716909, "grad_norm": 2.367760531391993, "learning_rate": 4.277381676289673e-06, "loss": 0.4048, "step": 9190 }, { "epoch": 1.4064269319051261, "grad_norm": 2.3062782712082472, "learning_rate": 4.27534956605724e-06, "loss": 0.3714, "step": 9191 }, { "epoch": 1.4065799540933435, "grad_norm": 2.023102891670905, "learning_rate": 4.273317807397989e-06, "loss": 0.3212, "step": 9192 }, { "epoch": 1.4067329762815608, "grad_norm": 2.2230748607200814, "learning_rate": 4.271286400436697e-06, "loss": 0.3564, "step": 9193 }, { "epoch": 1.4068859984697781, "grad_norm": 1.8733281634689514, "learning_rate": 4.269255345298111e-06, "loss": 0.336, "step": 9194 }, { "epoch": 1.4070390206579955, "grad_norm": 2.0370688321086527, "learning_rate": 4.267224642106977e-06, "loss": 0.3371, "step": 9195 }, { "epoch": 1.4071920428462126, "grad_norm": 2.140419683830952, "learning_rate": 4.265194290988008e-06, "loss": 0.3348, "step": 9196 }, { "epoch": 1.40734506503443, "grad_norm": 2.0255097904919013, "learning_rate": 4.2631642920658845e-06, "loss": 0.3399, "step": 9197 }, { "epoch": 1.4074980872226472, "grad_norm": 2.6557100376557865, "learning_rate": 4.261134645465288e-06, "loss": 0.3875, "step": 9198 }, { "epoch": 1.4076511094108646, "grad_norm": 2.3843033837943812, "learning_rate": 4.259105351310864e-06, "loss": 0.3838, "step": 9199 }, { "epoch": 1.407804131599082, "grad_norm": 2.2177489481232744, "learning_rate": 4.2570764097272385e-06, "loss": 0.3138, "step": 9200 }, { "epoch": 1.407957153787299, "grad_norm": 2.1430301544228767, "learning_rate": 4.255047820839018e-06, "loss": 0.3316, "step": 9201 }, { "epoch": 1.4081101759755166, "grad_norm": 2.03948756178882, "learning_rate": 4.253019584770784e-06, "loss": 0.3338, "step": 9202 }, { "epoch": 1.4082631981637337, "grad_norm": 2.7064411052587674, "learning_rate": 4.2509917016471e-06, "loss": 0.3696, "step": 9203 }, { "epoch": 1.408416220351951, "grad_norm": 2.157120376788771, "learning_rate": 4.248964171592506e-06, "loss": 0.3714, "step": 9204 }, { "epoch": 1.4085692425401684, "grad_norm": 1.9402279140569512, "learning_rate": 4.246936994731521e-06, "loss": 0.3287, "step": 9205 }, { "epoch": 1.4087222647283857, "grad_norm": 2.0005560931211925, "learning_rate": 4.2449101711886405e-06, "loss": 0.265, "step": 9206 }, { "epoch": 1.408875286916603, "grad_norm": 2.0099989536188745, "learning_rate": 4.242883701088342e-06, "loss": 0.3342, "step": 9207 }, { "epoch": 1.4090283091048201, "grad_norm": 1.89190018721622, "learning_rate": 4.240857584555075e-06, "loss": 0.3328, "step": 9208 }, { "epoch": 1.4091813312930375, "grad_norm": 2.094044689048361, "learning_rate": 4.2388318217132755e-06, "loss": 0.3219, "step": 9209 }, { "epoch": 1.4093343534812548, "grad_norm": 2.1431376339884576, "learning_rate": 4.23680641268735e-06, "loss": 0.3437, "step": 9210 }, { "epoch": 1.4094873756694721, "grad_norm": 2.1750133312455393, "learning_rate": 4.2347813576016896e-06, "loss": 0.3898, "step": 9211 }, { "epoch": 1.4096403978576895, "grad_norm": 2.026617738848952, "learning_rate": 4.232756656580655e-06, "loss": 0.3322, "step": 9212 }, { "epoch": 1.4097934200459066, "grad_norm": 2.2552918578481944, "learning_rate": 4.2307323097486045e-06, "loss": 0.3443, "step": 9213 }, { "epoch": 1.409946442234124, "grad_norm": 2.237233568584304, "learning_rate": 4.228708317229849e-06, "loss": 0.3377, "step": 9214 }, { "epoch": 1.4100994644223412, "grad_norm": 2.1424246978573374, "learning_rate": 4.226684679148687e-06, "loss": 0.3334, "step": 9215 }, { "epoch": 1.4102524866105586, "grad_norm": 2.0460843887698483, "learning_rate": 4.2246613956294135e-06, "loss": 0.3062, "step": 9216 }, { "epoch": 1.410405508798776, "grad_norm": 2.144472320092002, "learning_rate": 4.222638466796272e-06, "loss": 0.2989, "step": 9217 }, { "epoch": 1.410558530986993, "grad_norm": 1.9545756311695621, "learning_rate": 4.220615892773501e-06, "loss": 0.2725, "step": 9218 }, { "epoch": 1.4107115531752104, "grad_norm": 2.2589372985105705, "learning_rate": 4.218593673685324e-06, "loss": 0.3786, "step": 9219 }, { "epoch": 1.4108645753634277, "grad_norm": 2.010967215877556, "learning_rate": 4.2165718096559196e-06, "loss": 0.2905, "step": 9220 }, { "epoch": 1.411017597551645, "grad_norm": 2.0807598973462884, "learning_rate": 4.214550300809468e-06, "loss": 0.3706, "step": 9221 }, { "epoch": 1.4111706197398624, "grad_norm": 2.365584366602693, "learning_rate": 4.212529147270119e-06, "loss": 0.397, "step": 9222 }, { "epoch": 1.4113236419280795, "grad_norm": 2.510130358885097, "learning_rate": 4.210508349161989e-06, "loss": 0.415, "step": 9223 }, { "epoch": 1.4114766641162968, "grad_norm": 2.2722053771676265, "learning_rate": 4.208487906609193e-06, "loss": 0.3437, "step": 9224 }, { "epoch": 1.4116296863045141, "grad_norm": 2.2739261112065448, "learning_rate": 4.2064678197358155e-06, "loss": 0.3325, "step": 9225 }, { "epoch": 1.4117827084927315, "grad_norm": 2.0286388443546968, "learning_rate": 4.204448088665906e-06, "loss": 0.2738, "step": 9226 }, { "epoch": 1.4119357306809488, "grad_norm": 2.021410749339718, "learning_rate": 4.2024287135235156e-06, "loss": 0.3109, "step": 9227 }, { "epoch": 1.412088752869166, "grad_norm": 2.1979983118021713, "learning_rate": 4.200409694432658e-06, "loss": 0.3539, "step": 9228 }, { "epoch": 1.4122417750573832, "grad_norm": 2.0107755272944243, "learning_rate": 4.198391031517328e-06, "loss": 0.3057, "step": 9229 }, { "epoch": 1.4123947972456006, "grad_norm": 1.9654965383083711, "learning_rate": 4.196372724901502e-06, "loss": 0.3215, "step": 9230 }, { "epoch": 1.412547819433818, "grad_norm": 2.0359147985124344, "learning_rate": 4.1943547747091306e-06, "loss": 0.2846, "step": 9231 }, { "epoch": 1.4127008416220352, "grad_norm": 2.098622048300682, "learning_rate": 4.192337181064145e-06, "loss": 0.3234, "step": 9232 }, { "epoch": 1.4128538638102524, "grad_norm": 2.4112177721252963, "learning_rate": 4.190319944090452e-06, "loss": 0.4037, "step": 9233 }, { "epoch": 1.41300688599847, "grad_norm": 2.1986701033338933, "learning_rate": 4.1883030639119385e-06, "loss": 0.3305, "step": 9234 }, { "epoch": 1.413159908186687, "grad_norm": 2.1304605397689422, "learning_rate": 4.18628654065247e-06, "loss": 0.3071, "step": 9235 }, { "epoch": 1.4133129303749044, "grad_norm": 2.5068322091767747, "learning_rate": 4.1842703744358885e-06, "loss": 0.4024, "step": 9236 }, { "epoch": 1.4134659525631217, "grad_norm": 1.8202902088615422, "learning_rate": 4.182254565386015e-06, "loss": 0.2486, "step": 9237 }, { "epoch": 1.413618974751339, "grad_norm": 1.968323001514379, "learning_rate": 4.180239113626646e-06, "loss": 0.3794, "step": 9238 }, { "epoch": 1.4137719969395564, "grad_norm": 2.0430665390514577, "learning_rate": 4.1782240192815614e-06, "loss": 0.3269, "step": 9239 }, { "epoch": 1.4139250191277735, "grad_norm": 2.2109860232560603, "learning_rate": 4.176209282474515e-06, "loss": 0.319, "step": 9240 }, { "epoch": 1.4140780413159908, "grad_norm": 2.0509503774215148, "learning_rate": 4.174194903329239e-06, "loss": 0.33, "step": 9241 }, { "epoch": 1.4142310635042081, "grad_norm": 2.07717813707438, "learning_rate": 4.1721808819694445e-06, "loss": 0.2624, "step": 9242 }, { "epoch": 1.4143840856924255, "grad_norm": 2.294446630754429, "learning_rate": 4.17016721851882e-06, "loss": 0.3071, "step": 9243 }, { "epoch": 1.4145371078806428, "grad_norm": 2.5392494654134987, "learning_rate": 4.168153913101029e-06, "loss": 0.3624, "step": 9244 }, { "epoch": 1.41469013006886, "grad_norm": 1.887829520244782, "learning_rate": 4.16614096583973e-06, "loss": 0.3119, "step": 9245 }, { "epoch": 1.4148431522570772, "grad_norm": 2.5210457622741456, "learning_rate": 4.1641283768585315e-06, "loss": 0.3963, "step": 9246 }, { "epoch": 1.4149961744452946, "grad_norm": 2.0711867206232166, "learning_rate": 4.162116146281036e-06, "loss": 0.296, "step": 9247 }, { "epoch": 1.415149196633512, "grad_norm": 2.062152923881803, "learning_rate": 4.160104274230833e-06, "loss": 0.3765, "step": 9248 }, { "epoch": 1.4153022188217292, "grad_norm": 2.0768357136167945, "learning_rate": 4.1580927608314645e-06, "loss": 0.4489, "step": 9249 }, { "epoch": 1.4154552410099464, "grad_norm": 2.096843231214301, "learning_rate": 4.156081606206478e-06, "loss": 0.3397, "step": 9250 }, { "epoch": 1.4156082631981637, "grad_norm": 1.947566872119692, "learning_rate": 4.154070810479385e-06, "loss": 0.316, "step": 9251 }, { "epoch": 1.415761285386381, "grad_norm": 2.180262409466973, "learning_rate": 4.152060373773665e-06, "loss": 0.3205, "step": 9252 }, { "epoch": 1.4159143075745984, "grad_norm": 2.19393243995704, "learning_rate": 4.150050296212799e-06, "loss": 0.328, "step": 9253 }, { "epoch": 1.4160673297628157, "grad_norm": 2.2667999392118077, "learning_rate": 4.148040577920233e-06, "loss": 0.3422, "step": 9254 }, { "epoch": 1.4162203519510328, "grad_norm": 2.3500757721659347, "learning_rate": 4.146031219019381e-06, "loss": 0.3223, "step": 9255 }, { "epoch": 1.4163733741392501, "grad_norm": 2.038336086469945, "learning_rate": 4.144022219633656e-06, "loss": 0.3369, "step": 9256 }, { "epoch": 1.4165263963274675, "grad_norm": 2.47373157191876, "learning_rate": 4.142013579886435e-06, "loss": 0.3522, "step": 9257 }, { "epoch": 1.4166794185156848, "grad_norm": 2.006527184254753, "learning_rate": 4.140005299901076e-06, "loss": 0.3342, "step": 9258 }, { "epoch": 1.4168324407039021, "grad_norm": 2.0188062531388566, "learning_rate": 4.137997379800916e-06, "loss": 0.2992, "step": 9259 }, { "epoch": 1.4169854628921192, "grad_norm": 2.1837388368801305, "learning_rate": 4.135989819709271e-06, "loss": 0.3306, "step": 9260 }, { "epoch": 1.4171384850803366, "grad_norm": 2.0237473797483907, "learning_rate": 4.1339826197494305e-06, "loss": 0.3127, "step": 9261 }, { "epoch": 1.417291507268554, "grad_norm": 2.056314469069344, "learning_rate": 4.131975780044665e-06, "loss": 0.3071, "step": 9262 }, { "epoch": 1.4174445294567712, "grad_norm": 2.196217556752408, "learning_rate": 4.129969300718223e-06, "loss": 0.3395, "step": 9263 }, { "epoch": 1.4175975516449886, "grad_norm": 2.1032234644707835, "learning_rate": 4.1279631818933295e-06, "loss": 0.2709, "step": 9264 }, { "epoch": 1.4177505738332057, "grad_norm": 2.4031479684877817, "learning_rate": 4.125957423693186e-06, "loss": 0.42, "step": 9265 }, { "epoch": 1.4179035960214232, "grad_norm": 2.0786897004206732, "learning_rate": 4.123952026240982e-06, "loss": 0.3923, "step": 9266 }, { "epoch": 1.4180566182096404, "grad_norm": 2.251632349890319, "learning_rate": 4.121946989659869e-06, "loss": 0.3852, "step": 9267 }, { "epoch": 1.4182096403978577, "grad_norm": 2.555790821467786, "learning_rate": 4.119942314072982e-06, "loss": 0.411, "step": 9268 }, { "epoch": 1.418362662586075, "grad_norm": 2.349511897533703, "learning_rate": 4.117937999603448e-06, "loss": 0.3667, "step": 9269 }, { "epoch": 1.4185156847742924, "grad_norm": 2.4392205660352904, "learning_rate": 4.115934046374348e-06, "loss": 0.316, "step": 9270 }, { "epoch": 1.4186687069625097, "grad_norm": 2.148913506260671, "learning_rate": 4.1139304545087545e-06, "loss": 0.3347, "step": 9271 }, { "epoch": 1.4188217291507268, "grad_norm": 2.0878267481393658, "learning_rate": 4.111927224129724e-06, "loss": 0.342, "step": 9272 }, { "epoch": 1.4189747513389441, "grad_norm": 2.161297083283504, "learning_rate": 4.109924355360271e-06, "loss": 0.3463, "step": 9273 }, { "epoch": 1.4191277735271615, "grad_norm": 1.976150388866175, "learning_rate": 4.107921848323409e-06, "loss": 0.3227, "step": 9274 }, { "epoch": 1.4192807957153788, "grad_norm": 2.19615636726198, "learning_rate": 4.1059197031421185e-06, "loss": 0.4099, "step": 9275 }, { "epoch": 1.4194338179035961, "grad_norm": 1.9991519943828997, "learning_rate": 4.1039179199393495e-06, "loss": 0.2936, "step": 9276 }, { "epoch": 1.4195868400918132, "grad_norm": 2.1666577229545427, "learning_rate": 4.101916498838052e-06, "loss": 0.3918, "step": 9277 }, { "epoch": 1.4197398622800306, "grad_norm": 2.0607595100678964, "learning_rate": 4.099915439961135e-06, "loss": 0.2556, "step": 9278 }, { "epoch": 1.419892884468248, "grad_norm": 2.2451488681433407, "learning_rate": 4.097914743431491e-06, "loss": 0.3761, "step": 9279 }, { "epoch": 1.4200459066564652, "grad_norm": 2.2674171815366364, "learning_rate": 4.095914409371994e-06, "loss": 0.3863, "step": 9280 }, { "epoch": 1.4201989288446826, "grad_norm": 2.1163719464876447, "learning_rate": 4.093914437905489e-06, "loss": 0.3283, "step": 9281 }, { "epoch": 1.4203519510328997, "grad_norm": 2.193899121026817, "learning_rate": 4.091914829154801e-06, "loss": 0.3804, "step": 9282 }, { "epoch": 1.420504973221117, "grad_norm": 2.1351207017567355, "learning_rate": 4.089915583242738e-06, "loss": 0.3554, "step": 9283 }, { "epoch": 1.4206579954093344, "grad_norm": 2.151449603371385, "learning_rate": 4.087916700292079e-06, "loss": 0.3385, "step": 9284 }, { "epoch": 1.4208110175975517, "grad_norm": 2.2234211243154087, "learning_rate": 4.0859181804255845e-06, "loss": 0.3383, "step": 9285 }, { "epoch": 1.420964039785769, "grad_norm": 2.2507111139185745, "learning_rate": 4.08392002376599e-06, "loss": 0.3811, "step": 9286 }, { "epoch": 1.4211170619739861, "grad_norm": 1.973399321101313, "learning_rate": 4.08192223043601e-06, "loss": 0.2961, "step": 9287 }, { "epoch": 1.4212700841622035, "grad_norm": 2.133709019598902, "learning_rate": 4.079924800558338e-06, "loss": 0.3478, "step": 9288 }, { "epoch": 1.4214231063504208, "grad_norm": 2.158294322387067, "learning_rate": 4.077927734255643e-06, "loss": 0.3751, "step": 9289 }, { "epoch": 1.4215761285386381, "grad_norm": 1.9920504708122626, "learning_rate": 4.075931031650574e-06, "loss": 0.3415, "step": 9290 }, { "epoch": 1.4217291507268555, "grad_norm": 2.20512398485947, "learning_rate": 4.073934692865755e-06, "loss": 0.3244, "step": 9291 }, { "epoch": 1.4218821729150726, "grad_norm": 2.347120560371027, "learning_rate": 4.07193871802379e-06, "loss": 0.3721, "step": 9292 }, { "epoch": 1.42203519510329, "grad_norm": 2.067692285877767, "learning_rate": 4.069943107247259e-06, "loss": 0.315, "step": 9293 }, { "epoch": 1.4221882172915072, "grad_norm": 2.0118785839217943, "learning_rate": 4.0679478606587155e-06, "loss": 0.318, "step": 9294 }, { "epoch": 1.4223412394797246, "grad_norm": 2.1728967714428493, "learning_rate": 4.065952978380708e-06, "loss": 0.3713, "step": 9295 }, { "epoch": 1.422494261667942, "grad_norm": 2.311326910138758, "learning_rate": 4.063958460535738e-06, "loss": 0.3365, "step": 9296 }, { "epoch": 1.422647283856159, "grad_norm": 2.1496506605822905, "learning_rate": 4.061964307246299e-06, "loss": 0.3167, "step": 9297 }, { "epoch": 1.4228003060443766, "grad_norm": 2.143129053849377, "learning_rate": 4.059970518634867e-06, "loss": 0.3739, "step": 9298 }, { "epoch": 1.4229533282325937, "grad_norm": 2.3506052011866156, "learning_rate": 4.05797709482388e-06, "loss": 0.3054, "step": 9299 }, { "epoch": 1.423106350420811, "grad_norm": 2.0868900431403006, "learning_rate": 4.055984035935762e-06, "loss": 0.3172, "step": 9300 }, { "epoch": 1.4232593726090284, "grad_norm": 2.064767658180076, "learning_rate": 4.0539913420929235e-06, "loss": 0.2893, "step": 9301 }, { "epoch": 1.4234123947972457, "grad_norm": 2.1290755477464978, "learning_rate": 4.051999013417731e-06, "loss": 0.3727, "step": 9302 }, { "epoch": 1.423565416985463, "grad_norm": 1.8483480553704854, "learning_rate": 4.050007050032552e-06, "loss": 0.2864, "step": 9303 }, { "epoch": 1.4237184391736801, "grad_norm": 2.0023364387746447, "learning_rate": 4.048015452059719e-06, "loss": 0.3626, "step": 9304 }, { "epoch": 1.4238714613618975, "grad_norm": 1.8518706736246955, "learning_rate": 4.046024219621535e-06, "loss": 0.2707, "step": 9305 }, { "epoch": 1.4240244835501148, "grad_norm": 1.9748324071119407, "learning_rate": 4.044033352840299e-06, "loss": 0.3093, "step": 9306 }, { "epoch": 1.4241775057383321, "grad_norm": 1.9481838626006736, "learning_rate": 4.042042851838278e-06, "loss": 0.2265, "step": 9307 }, { "epoch": 1.4243305279265495, "grad_norm": 2.210466757271439, "learning_rate": 4.040052716737707e-06, "loss": 0.3629, "step": 9308 }, { "epoch": 1.4244835501147666, "grad_norm": 2.0610090670428973, "learning_rate": 4.038062947660817e-06, "loss": 0.3107, "step": 9309 }, { "epoch": 1.424636572302984, "grad_norm": 2.149631892573452, "learning_rate": 4.0360735447298035e-06, "loss": 0.3422, "step": 9310 }, { "epoch": 1.4247895944912012, "grad_norm": 1.990278427506984, "learning_rate": 4.034084508066846e-06, "loss": 0.3334, "step": 9311 }, { "epoch": 1.4249426166794186, "grad_norm": 2.2489829280017135, "learning_rate": 4.0320958377940976e-06, "loss": 0.3528, "step": 9312 }, { "epoch": 1.425095638867636, "grad_norm": 2.3206819559913052, "learning_rate": 4.03010753403369e-06, "loss": 0.3797, "step": 9313 }, { "epoch": 1.425248661055853, "grad_norm": 2.0766681221229097, "learning_rate": 4.028119596907734e-06, "loss": 0.3592, "step": 9314 }, { "epoch": 1.4254016832440703, "grad_norm": 2.0848585897926974, "learning_rate": 4.026132026538315e-06, "loss": 0.3219, "step": 9315 }, { "epoch": 1.4255547054322877, "grad_norm": 2.3234590571082636, "learning_rate": 4.024144823047498e-06, "loss": 0.3569, "step": 9316 }, { "epoch": 1.425707727620505, "grad_norm": 2.00027416391328, "learning_rate": 4.022157986557325e-06, "loss": 0.3802, "step": 9317 }, { "epoch": 1.4258607498087223, "grad_norm": 2.0622902572976707, "learning_rate": 4.020171517189816e-06, "loss": 0.3386, "step": 9318 }, { "epoch": 1.4260137719969395, "grad_norm": 1.8924055155330906, "learning_rate": 4.0181854150669665e-06, "loss": 0.3028, "step": 9319 }, { "epoch": 1.4261667941851568, "grad_norm": 2.576666626932052, "learning_rate": 4.016199680310753e-06, "loss": 0.3428, "step": 9320 }, { "epoch": 1.4263198163733741, "grad_norm": 2.248274794714831, "learning_rate": 4.014214313043124e-06, "loss": 0.4452, "step": 9321 }, { "epoch": 1.4264728385615915, "grad_norm": 1.8804937826661376, "learning_rate": 4.012229313386013e-06, "loss": 0.308, "step": 9322 }, { "epoch": 1.4266258607498088, "grad_norm": 2.0792187323412, "learning_rate": 4.010244681461319e-06, "loss": 0.3295, "step": 9323 }, { "epoch": 1.426778882938026, "grad_norm": 2.050108419893287, "learning_rate": 4.008260417390938e-06, "loss": 0.3332, "step": 9324 }, { "epoch": 1.4269319051262432, "grad_norm": 2.073816356773294, "learning_rate": 4.0062765212967215e-06, "loss": 0.3315, "step": 9325 }, { "epoch": 1.4270849273144606, "grad_norm": 2.05419996109476, "learning_rate": 4.004292993300508e-06, "loss": 0.3372, "step": 9326 }, { "epoch": 1.427237949502678, "grad_norm": 2.4100490477444887, "learning_rate": 4.002309833524124e-06, "loss": 0.3862, "step": 9327 }, { "epoch": 1.4273909716908952, "grad_norm": 1.9825638628012534, "learning_rate": 4.000327042089353e-06, "loss": 0.3253, "step": 9328 }, { "epoch": 1.4275439938791123, "grad_norm": 2.2351531713408934, "learning_rate": 3.998344619117965e-06, "loss": 0.3989, "step": 9329 }, { "epoch": 1.4276970160673297, "grad_norm": 2.0848182917817617, "learning_rate": 3.996362564731721e-06, "loss": 0.3643, "step": 9330 }, { "epoch": 1.427850038255547, "grad_norm": 2.1695084103350344, "learning_rate": 3.99438087905233e-06, "loss": 0.3361, "step": 9331 }, { "epoch": 1.4280030604437643, "grad_norm": 2.384032138139611, "learning_rate": 3.992399562201507e-06, "loss": 0.3408, "step": 9332 }, { "epoch": 1.4281560826319817, "grad_norm": 1.984735482035343, "learning_rate": 3.990418614300931e-06, "loss": 0.2962, "step": 9333 }, { "epoch": 1.4283091048201988, "grad_norm": 1.8606884139982514, "learning_rate": 3.988438035472253e-06, "loss": 0.3065, "step": 9334 }, { "epoch": 1.4284621270084163, "grad_norm": 2.0350449158318558, "learning_rate": 3.986457825837115e-06, "loss": 0.2656, "step": 9335 }, { "epoch": 1.4286151491966335, "grad_norm": 2.2668034428389268, "learning_rate": 3.98447798551713e-06, "loss": 0.3188, "step": 9336 }, { "epoch": 1.4287681713848508, "grad_norm": 2.507306255993113, "learning_rate": 3.982498514633879e-06, "loss": 0.4137, "step": 9337 }, { "epoch": 1.4289211935730681, "grad_norm": 2.2283287973014194, "learning_rate": 3.980519413308938e-06, "loss": 0.3417, "step": 9338 }, { "epoch": 1.4290742157612855, "grad_norm": 2.0412354072050025, "learning_rate": 3.9785406816638474e-06, "loss": 0.371, "step": 9339 }, { "epoch": 1.4292272379495028, "grad_norm": 2.4175597698942464, "learning_rate": 3.97656231982013e-06, "loss": 0.4123, "step": 9340 }, { "epoch": 1.42938026013772, "grad_norm": 2.1106879107993812, "learning_rate": 3.9745843278992835e-06, "loss": 0.3441, "step": 9341 }, { "epoch": 1.4295332823259372, "grad_norm": 2.178146614681262, "learning_rate": 3.9726067060227855e-06, "loss": 0.3439, "step": 9342 }, { "epoch": 1.4296863045141546, "grad_norm": 1.9155241157979555, "learning_rate": 3.97062945431209e-06, "loss": 0.2897, "step": 9343 }, { "epoch": 1.429839326702372, "grad_norm": 2.1661492829484477, "learning_rate": 3.968652572888626e-06, "loss": 0.3392, "step": 9344 }, { "epoch": 1.4299923488905892, "grad_norm": 2.1917874006953553, "learning_rate": 3.966676061873802e-06, "loss": 0.3808, "step": 9345 }, { "epoch": 1.4301453710788063, "grad_norm": 2.1886007253205038, "learning_rate": 3.9646999213890045e-06, "loss": 0.3263, "step": 9346 }, { "epoch": 1.4302983932670237, "grad_norm": 1.8647429617175988, "learning_rate": 3.96272415155559e-06, "loss": 0.3029, "step": 9347 }, { "epoch": 1.430451415455241, "grad_norm": 2.145380663913235, "learning_rate": 3.960748752494912e-06, "loss": 0.3374, "step": 9348 }, { "epoch": 1.4306044376434583, "grad_norm": 1.8874504028970565, "learning_rate": 3.9587737243282745e-06, "loss": 0.3119, "step": 9349 }, { "epoch": 1.4307574598316757, "grad_norm": 2.0694188207539903, "learning_rate": 3.9567990671769715e-06, "loss": 0.3817, "step": 9350 }, { "epoch": 1.4309104820198928, "grad_norm": 1.9065080001772796, "learning_rate": 3.954824781162288e-06, "loss": 0.3096, "step": 9351 }, { "epoch": 1.4310635042081101, "grad_norm": 2.121242007256196, "learning_rate": 3.952850866405455e-06, "loss": 0.3715, "step": 9352 }, { "epoch": 1.4312165263963275, "grad_norm": 2.0622979357181963, "learning_rate": 3.950877323027711e-06, "loss": 0.3287, "step": 9353 }, { "epoch": 1.4313695485845448, "grad_norm": 2.4372797945913227, "learning_rate": 3.948904151150258e-06, "loss": 0.4393, "step": 9354 }, { "epoch": 1.4315225707727621, "grad_norm": 2.201072981321367, "learning_rate": 3.946931350894267e-06, "loss": 0.2913, "step": 9355 }, { "epoch": 1.4316755929609792, "grad_norm": 2.1034205574833695, "learning_rate": 3.944958922380904e-06, "loss": 0.3524, "step": 9356 }, { "epoch": 1.4318286151491966, "grad_norm": 2.172868895461568, "learning_rate": 3.942986865731304e-06, "loss": 0.3317, "step": 9357 }, { "epoch": 1.431981637337414, "grad_norm": 2.0235574851664198, "learning_rate": 3.94101518106657e-06, "loss": 0.3337, "step": 9358 }, { "epoch": 1.4321346595256312, "grad_norm": 1.9239559729267717, "learning_rate": 3.939043868507801e-06, "loss": 0.3044, "step": 9359 }, { "epoch": 1.4322876817138486, "grad_norm": 2.116674414107678, "learning_rate": 3.937072928176057e-06, "loss": 0.3301, "step": 9360 }, { "epoch": 1.4324407039020657, "grad_norm": 2.171452873241745, "learning_rate": 3.935102360192382e-06, "loss": 0.3639, "step": 9361 }, { "epoch": 1.432593726090283, "grad_norm": 2.1857081150102005, "learning_rate": 3.933132164677799e-06, "loss": 0.3481, "step": 9362 }, { "epoch": 1.4327467482785003, "grad_norm": 2.169013596341822, "learning_rate": 3.931162341753302e-06, "loss": 0.3509, "step": 9363 }, { "epoch": 1.4328997704667177, "grad_norm": 1.896338149593098, "learning_rate": 3.9291928915398685e-06, "loss": 0.2785, "step": 9364 }, { "epoch": 1.433052792654935, "grad_norm": 2.549158690084208, "learning_rate": 3.927223814158447e-06, "loss": 0.3658, "step": 9365 }, { "epoch": 1.4332058148431521, "grad_norm": 2.0841384735924176, "learning_rate": 3.925255109729969e-06, "loss": 0.3119, "step": 9366 }, { "epoch": 1.4333588370313697, "grad_norm": 1.9791111620268143, "learning_rate": 3.923286778375339e-06, "loss": 0.3146, "step": 9367 }, { "epoch": 1.4335118592195868, "grad_norm": 2.2175978956874953, "learning_rate": 3.92131882021544e-06, "loss": 0.3854, "step": 9368 }, { "epoch": 1.4336648814078041, "grad_norm": 1.9276236760923329, "learning_rate": 3.919351235371133e-06, "loss": 0.2764, "step": 9369 }, { "epoch": 1.4338179035960215, "grad_norm": 2.4958786979979513, "learning_rate": 3.917384023963254e-06, "loss": 0.3887, "step": 9370 }, { "epoch": 1.4339709257842388, "grad_norm": 1.8840779023709011, "learning_rate": 3.9154171861126165e-06, "loss": 0.2659, "step": 9371 }, { "epoch": 1.4341239479724561, "grad_norm": 1.818201570937649, "learning_rate": 3.913450721940013e-06, "loss": 0.2917, "step": 9372 }, { "epoch": 1.4342769701606732, "grad_norm": 2.309820936836204, "learning_rate": 3.9114846315662114e-06, "loss": 0.3719, "step": 9373 }, { "epoch": 1.4344299923488906, "grad_norm": 2.023699879434676, "learning_rate": 3.909518915111957e-06, "loss": 0.3303, "step": 9374 }, { "epoch": 1.434583014537108, "grad_norm": 1.9069980001503113, "learning_rate": 3.9075535726979715e-06, "loss": 0.2974, "step": 9375 }, { "epoch": 1.4347360367253252, "grad_norm": 2.170571525569376, "learning_rate": 3.905588604444953e-06, "loss": 0.3023, "step": 9376 }, { "epoch": 1.4348890589135426, "grad_norm": 2.1143488858850663, "learning_rate": 3.903624010473585e-06, "loss": 0.3012, "step": 9377 }, { "epoch": 1.4350420811017597, "grad_norm": 2.2298559527760418, "learning_rate": 3.901659790904514e-06, "loss": 0.359, "step": 9378 }, { "epoch": 1.435195103289977, "grad_norm": 1.8232134485763651, "learning_rate": 3.899695945858367e-06, "loss": 0.3267, "step": 9379 }, { "epoch": 1.4353481254781943, "grad_norm": 2.029838669991213, "learning_rate": 3.897732475455764e-06, "loss": 0.2858, "step": 9380 }, { "epoch": 1.4355011476664117, "grad_norm": 1.7961842764250062, "learning_rate": 3.895769379817279e-06, "loss": 0.3031, "step": 9381 }, { "epoch": 1.435654169854629, "grad_norm": 2.169262090657161, "learning_rate": 3.893806659063473e-06, "loss": 0.2784, "step": 9382 }, { "epoch": 1.4358071920428461, "grad_norm": 1.9856810913149714, "learning_rate": 3.891844313314893e-06, "loss": 0.2901, "step": 9383 }, { "epoch": 1.4359602142310635, "grad_norm": 2.2450828272982783, "learning_rate": 3.889882342692043e-06, "loss": 0.3681, "step": 9384 }, { "epoch": 1.4361132364192808, "grad_norm": 2.304650971104986, "learning_rate": 3.8879207473154245e-06, "loss": 0.3499, "step": 9385 }, { "epoch": 1.4362662586074981, "grad_norm": 2.057041817931834, "learning_rate": 3.885959527305507e-06, "loss": 0.3643, "step": 9386 }, { "epoch": 1.4364192807957155, "grad_norm": 2.1277655643358195, "learning_rate": 3.883998682782727e-06, "loss": 0.394, "step": 9387 }, { "epoch": 1.4365723029839326, "grad_norm": 2.073493486256418, "learning_rate": 3.882038213867516e-06, "loss": 0.3343, "step": 9388 }, { "epoch": 1.43672532517215, "grad_norm": 2.300417499890103, "learning_rate": 3.880078120680273e-06, "loss": 0.4377, "step": 9389 }, { "epoch": 1.4368783473603672, "grad_norm": 2.1283040533902344, "learning_rate": 3.878118403341373e-06, "loss": 0.3926, "step": 9390 }, { "epoch": 1.4370313695485846, "grad_norm": 2.0678784247283066, "learning_rate": 3.876159061971172e-06, "loss": 0.3086, "step": 9391 }, { "epoch": 1.437184391736802, "grad_norm": 2.5933724046485556, "learning_rate": 3.874200096689999e-06, "loss": 0.4141, "step": 9392 }, { "epoch": 1.437337413925019, "grad_norm": 2.569529420688582, "learning_rate": 3.872241507618162e-06, "loss": 0.3729, "step": 9393 }, { "epoch": 1.4374904361132363, "grad_norm": 2.424768703143181, "learning_rate": 3.870283294875946e-06, "loss": 0.3334, "step": 9394 }, { "epoch": 1.4376434583014537, "grad_norm": 2.075534358433235, "learning_rate": 3.868325458583613e-06, "loss": 0.3691, "step": 9395 }, { "epoch": 1.437796480489671, "grad_norm": 2.035561827260354, "learning_rate": 3.8663679988614004e-06, "loss": 0.2953, "step": 9396 }, { "epoch": 1.4379495026778883, "grad_norm": 2.5251761093520715, "learning_rate": 3.864410915829523e-06, "loss": 0.3889, "step": 9397 }, { "epoch": 1.4381025248661055, "grad_norm": 2.4679716294913505, "learning_rate": 3.862454209608175e-06, "loss": 0.317, "step": 9398 }, { "epoch": 1.438255547054323, "grad_norm": 2.148348350788539, "learning_rate": 3.860497880317523e-06, "loss": 0.3622, "step": 9399 }, { "epoch": 1.4384085692425401, "grad_norm": 2.1987933936762842, "learning_rate": 3.858541928077716e-06, "loss": 0.3212, "step": 9400 }, { "epoch": 1.4385615914307575, "grad_norm": 2.0618331098672638, "learning_rate": 3.856586353008873e-06, "loss": 0.3206, "step": 9401 }, { "epoch": 1.4387146136189748, "grad_norm": 2.37921277084796, "learning_rate": 3.854631155231096e-06, "loss": 0.4022, "step": 9402 }, { "epoch": 1.4388676358071921, "grad_norm": 2.0436503449931194, "learning_rate": 3.85267633486446e-06, "loss": 0.2922, "step": 9403 }, { "epoch": 1.4390206579954095, "grad_norm": 1.8930267404115466, "learning_rate": 3.850721892029019e-06, "loss": 0.3687, "step": 9404 }, { "epoch": 1.4391736801836266, "grad_norm": 2.01203672849637, "learning_rate": 3.8487678268448005e-06, "loss": 0.3022, "step": 9405 }, { "epoch": 1.439326702371844, "grad_norm": 2.635657231491509, "learning_rate": 3.846814139431819e-06, "loss": 0.3954, "step": 9406 }, { "epoch": 1.4394797245600612, "grad_norm": 1.7576099379374932, "learning_rate": 3.844860829910048e-06, "loss": 0.2292, "step": 9407 }, { "epoch": 1.4396327467482786, "grad_norm": 2.0022483302447966, "learning_rate": 3.842907898399452e-06, "loss": 0.287, "step": 9408 }, { "epoch": 1.439785768936496, "grad_norm": 2.3685326123050348, "learning_rate": 3.840955345019973e-06, "loss": 0.3695, "step": 9409 }, { "epoch": 1.439938791124713, "grad_norm": 2.131612009218713, "learning_rate": 3.839003169891519e-06, "loss": 0.3578, "step": 9410 }, { "epoch": 1.4400918133129303, "grad_norm": 2.3468392356985324, "learning_rate": 3.8370513731339775e-06, "loss": 0.3115, "step": 9411 }, { "epoch": 1.4402448355011477, "grad_norm": 2.223056491521997, "learning_rate": 3.835099954867228e-06, "loss": 0.3995, "step": 9412 }, { "epoch": 1.440397857689365, "grad_norm": 2.0760931406496095, "learning_rate": 3.833148915211101e-06, "loss": 0.2893, "step": 9413 }, { "epoch": 1.4405508798775823, "grad_norm": 2.5262822068969313, "learning_rate": 3.831198254285428e-06, "loss": 0.4318, "step": 9414 }, { "epoch": 1.4407039020657995, "grad_norm": 2.090177035772777, "learning_rate": 3.829247972210005e-06, "loss": 0.3353, "step": 9415 }, { "epoch": 1.4408569242540168, "grad_norm": 2.2014983845962877, "learning_rate": 3.827298069104598e-06, "loss": 0.3364, "step": 9416 }, { "epoch": 1.4410099464422341, "grad_norm": 2.12746696908804, "learning_rate": 3.825348545088967e-06, "loss": 0.3543, "step": 9417 }, { "epoch": 1.4411629686304515, "grad_norm": 2.1360864766502017, "learning_rate": 3.823399400282838e-06, "loss": 0.3685, "step": 9418 }, { "epoch": 1.4413159908186688, "grad_norm": 1.8667857752382644, "learning_rate": 3.821450634805915e-06, "loss": 0.2794, "step": 9419 }, { "epoch": 1.441469013006886, "grad_norm": 2.4708104972068607, "learning_rate": 3.819502248777878e-06, "loss": 0.3114, "step": 9420 }, { "epoch": 1.4416220351951032, "grad_norm": 2.0565915598013644, "learning_rate": 3.8175542423183865e-06, "loss": 0.3356, "step": 9421 }, { "epoch": 1.4417750573833206, "grad_norm": 1.8670926466183604, "learning_rate": 3.815606615547075e-06, "loss": 0.3092, "step": 9422 }, { "epoch": 1.441928079571538, "grad_norm": 2.108441622458775, "learning_rate": 3.8136593685835545e-06, "loss": 0.3217, "step": 9423 }, { "epoch": 1.4420811017597552, "grad_norm": 2.2213785242745914, "learning_rate": 3.811712501547413e-06, "loss": 0.3894, "step": 9424 }, { "epoch": 1.4422341239479723, "grad_norm": 2.2379539576928034, "learning_rate": 3.809766014558216e-06, "loss": 0.3155, "step": 9425 }, { "epoch": 1.4423871461361897, "grad_norm": 2.1450936115838046, "learning_rate": 3.8078199077355017e-06, "loss": 0.3084, "step": 9426 }, { "epoch": 1.442540168324407, "grad_norm": 1.9620617688500854, "learning_rate": 3.8058741811987966e-06, "loss": 0.3054, "step": 9427 }, { "epoch": 1.4426931905126243, "grad_norm": 2.3851341339259178, "learning_rate": 3.8039288350675862e-06, "loss": 0.3356, "step": 9428 }, { "epoch": 1.4428462127008417, "grad_norm": 2.002450463199621, "learning_rate": 3.801983869461342e-06, "loss": 0.3458, "step": 9429 }, { "epoch": 1.4429992348890588, "grad_norm": 2.1660350030336653, "learning_rate": 3.8000392844995227e-06, "loss": 0.3357, "step": 9430 }, { "epoch": 1.4431522570772761, "grad_norm": 1.869478686298557, "learning_rate": 3.7980950803015417e-06, "loss": 0.3066, "step": 9431 }, { "epoch": 1.4433052792654935, "grad_norm": 2.3072334674826576, "learning_rate": 3.7961512569867997e-06, "loss": 0.3314, "step": 9432 }, { "epoch": 1.4434583014537108, "grad_norm": 2.275252761892947, "learning_rate": 3.7942078146746852e-06, "loss": 0.3406, "step": 9433 }, { "epoch": 1.4436113236419281, "grad_norm": 1.9311399642400455, "learning_rate": 3.79226475348454e-06, "loss": 0.2448, "step": 9434 }, { "epoch": 1.4437643458301452, "grad_norm": 1.9937141861498646, "learning_rate": 3.7903220735357037e-06, "loss": 0.3383, "step": 9435 }, { "epoch": 1.4439173680183628, "grad_norm": 2.2548689227421903, "learning_rate": 3.788379774947486e-06, "loss": 0.3373, "step": 9436 }, { "epoch": 1.44407039020658, "grad_norm": 2.245991213469082, "learning_rate": 3.786437857839158e-06, "loss": 0.3213, "step": 9437 }, { "epoch": 1.4442234123947972, "grad_norm": 2.1682580136836727, "learning_rate": 3.7844963223299925e-06, "loss": 0.3057, "step": 9438 }, { "epoch": 1.4443764345830146, "grad_norm": 2.361384023733611, "learning_rate": 3.782555168539227e-06, "loss": 0.2986, "step": 9439 }, { "epoch": 1.444529456771232, "grad_norm": 2.0796621089626828, "learning_rate": 3.780614396586064e-06, "loss": 0.3345, "step": 9440 }, { "epoch": 1.4446824789594492, "grad_norm": 2.1994329703056628, "learning_rate": 3.7786740065897055e-06, "loss": 0.3354, "step": 9441 }, { "epoch": 1.4448355011476663, "grad_norm": 1.8833177217194372, "learning_rate": 3.776733998669314e-06, "loss": 0.2956, "step": 9442 }, { "epoch": 1.4449885233358837, "grad_norm": 2.3741820074807496, "learning_rate": 3.774794372944033e-06, "loss": 0.3962, "step": 9443 }, { "epoch": 1.445141545524101, "grad_norm": 2.190022440966505, "learning_rate": 3.772855129532982e-06, "loss": 0.3202, "step": 9444 }, { "epoch": 1.4452945677123183, "grad_norm": 2.0964969488525247, "learning_rate": 3.7709162685552592e-06, "loss": 0.333, "step": 9445 }, { "epoch": 1.4454475899005357, "grad_norm": 2.028210724620882, "learning_rate": 3.768977790129936e-06, "loss": 0.2656, "step": 9446 }, { "epoch": 1.4456006120887528, "grad_norm": 2.1346199022390726, "learning_rate": 3.7670396943760635e-06, "loss": 0.3149, "step": 9447 }, { "epoch": 1.4457536342769701, "grad_norm": 2.042384274192098, "learning_rate": 3.7651019814126656e-06, "loss": 0.351, "step": 9448 }, { "epoch": 1.4459066564651875, "grad_norm": 2.1653394714366656, "learning_rate": 3.7631646513587473e-06, "loss": 0.3592, "step": 9449 }, { "epoch": 1.4460596786534048, "grad_norm": 1.9813776866303934, "learning_rate": 3.7612277043332857e-06, "loss": 0.2843, "step": 9450 }, { "epoch": 1.4462127008416221, "grad_norm": 2.0487930782771206, "learning_rate": 3.759291140455237e-06, "loss": 0.2968, "step": 9451 }, { "epoch": 1.4463657230298392, "grad_norm": 2.1717832715799537, "learning_rate": 3.7573549598435334e-06, "loss": 0.3326, "step": 9452 }, { "epoch": 1.4465187452180566, "grad_norm": 2.469544616284677, "learning_rate": 3.755419162617082e-06, "loss": 0.3397, "step": 9453 }, { "epoch": 1.446671767406274, "grad_norm": 2.3789326927343257, "learning_rate": 3.7534837488947705e-06, "loss": 0.3217, "step": 9454 }, { "epoch": 1.4468247895944912, "grad_norm": 2.0716322076496927, "learning_rate": 3.7515487187954536e-06, "loss": 0.3123, "step": 9455 }, { "epoch": 1.4469778117827086, "grad_norm": 2.0332000399276637, "learning_rate": 3.7496140724379815e-06, "loss": 0.3046, "step": 9456 }, { "epoch": 1.4471308339709257, "grad_norm": 1.999069011734315, "learning_rate": 3.7476798099411583e-06, "loss": 0.3494, "step": 9457 }, { "epoch": 1.447283856159143, "grad_norm": 2.2347715534841033, "learning_rate": 3.745745931423772e-06, "loss": 0.3115, "step": 9458 }, { "epoch": 1.4474368783473603, "grad_norm": 2.0316516810035026, "learning_rate": 3.7438124370046025e-06, "loss": 0.3059, "step": 9459 }, { "epoch": 1.4475899005355777, "grad_norm": 1.8778006090816908, "learning_rate": 3.7418793268023823e-06, "loss": 0.2661, "step": 9460 }, { "epoch": 1.447742922723795, "grad_norm": 2.087926127627727, "learning_rate": 3.739946600935831e-06, "loss": 0.3777, "step": 9461 }, { "epoch": 1.4478959449120121, "grad_norm": 1.910762098685788, "learning_rate": 3.738014259523656e-06, "loss": 0.3092, "step": 9462 }, { "epoch": 1.4480489671002295, "grad_norm": 2.1419651512255604, "learning_rate": 3.736082302684514e-06, "loss": 0.3237, "step": 9463 }, { "epoch": 1.4482019892884468, "grad_norm": 1.9853049413866304, "learning_rate": 3.734150730537067e-06, "loss": 0.3298, "step": 9464 }, { "epoch": 1.4483550114766641, "grad_norm": 2.169481095354853, "learning_rate": 3.7322195431999386e-06, "loss": 0.3032, "step": 9465 }, { "epoch": 1.4485080336648815, "grad_norm": 2.3886882013046042, "learning_rate": 3.730288740791721e-06, "loss": 0.3152, "step": 9466 }, { "epoch": 1.4486610558530986, "grad_norm": 2.1726594327249535, "learning_rate": 3.728358323431003e-06, "loss": 0.3891, "step": 9467 }, { "epoch": 1.4488140780413161, "grad_norm": 2.3278280144910983, "learning_rate": 3.726428291236338e-06, "loss": 0.3213, "step": 9468 }, { "epoch": 1.4489671002295332, "grad_norm": 2.0213566163252144, "learning_rate": 3.7244986443262464e-06, "loss": 0.2588, "step": 9469 }, { "epoch": 1.4491201224177506, "grad_norm": 2.123312387822712, "learning_rate": 3.7225693828192476e-06, "loss": 0.364, "step": 9470 }, { "epoch": 1.449273144605968, "grad_norm": 1.95586400386957, "learning_rate": 3.7206405068338203e-06, "loss": 0.3514, "step": 9471 }, { "epoch": 1.4494261667941852, "grad_norm": 1.8020489683421461, "learning_rate": 3.718712016488425e-06, "loss": 0.2893, "step": 9472 }, { "epoch": 1.4495791889824026, "grad_norm": 2.1640656820411133, "learning_rate": 3.716783911901496e-06, "loss": 0.3673, "step": 9473 }, { "epoch": 1.4497322111706197, "grad_norm": 2.0896268775804034, "learning_rate": 3.7148561931914472e-06, "loss": 0.325, "step": 9474 }, { "epoch": 1.449885233358837, "grad_norm": 2.1222460335419298, "learning_rate": 3.712928860476668e-06, "loss": 0.3339, "step": 9475 }, { "epoch": 1.4500382555470543, "grad_norm": 2.3419143356584082, "learning_rate": 3.7110019138755227e-06, "loss": 0.3304, "step": 9476 }, { "epoch": 1.4501912777352717, "grad_norm": 1.9960251059645366, "learning_rate": 3.7090753535063517e-06, "loss": 0.3324, "step": 9477 }, { "epoch": 1.450344299923489, "grad_norm": 2.0769593423066866, "learning_rate": 3.7071491794874736e-06, "loss": 0.2987, "step": 9478 }, { "epoch": 1.4504973221117061, "grad_norm": 2.232353678787646, "learning_rate": 3.7052233919371826e-06, "loss": 0.3502, "step": 9479 }, { "epoch": 1.4506503442999235, "grad_norm": 2.2035358060269954, "learning_rate": 3.703297990973749e-06, "loss": 0.3136, "step": 9480 }, { "epoch": 1.4508033664881408, "grad_norm": 2.2382851331924285, "learning_rate": 3.7013729767154172e-06, "loss": 0.3332, "step": 9481 }, { "epoch": 1.4509563886763581, "grad_norm": 2.2981953616018767, "learning_rate": 3.6994483492804124e-06, "loss": 0.3646, "step": 9482 }, { "epoch": 1.4511094108645755, "grad_norm": 2.1714996604088523, "learning_rate": 3.6975241087869317e-06, "loss": 0.3799, "step": 9483 }, { "epoch": 1.4512624330527926, "grad_norm": 2.2662738415618726, "learning_rate": 3.6956002553531513e-06, "loss": 0.3537, "step": 9484 }, { "epoch": 1.45141545524101, "grad_norm": 2.149039370046887, "learning_rate": 3.6936767890972215e-06, "loss": 0.3257, "step": 9485 }, { "epoch": 1.4515684774292272, "grad_norm": 1.9961110412792133, "learning_rate": 3.6917537101372714e-06, "loss": 0.2954, "step": 9486 }, { "epoch": 1.4517214996174446, "grad_norm": 2.0931555997712676, "learning_rate": 3.6898310185914e-06, "loss": 0.318, "step": 9487 }, { "epoch": 1.451874521805662, "grad_norm": 2.483815223080329, "learning_rate": 3.687908714577698e-06, "loss": 0.3547, "step": 9488 }, { "epoch": 1.452027543993879, "grad_norm": 2.0723286404767487, "learning_rate": 3.6859867982142126e-06, "loss": 0.2933, "step": 9489 }, { "epoch": 1.4521805661820963, "grad_norm": 2.1016415401998407, "learning_rate": 3.6840652696189737e-06, "loss": 0.3124, "step": 9490 }, { "epoch": 1.4523335883703137, "grad_norm": 2.363286862435652, "learning_rate": 3.6821441289100025e-06, "loss": 0.3849, "step": 9491 }, { "epoch": 1.452486610558531, "grad_norm": 1.9554556428601573, "learning_rate": 3.6802233762052687e-06, "loss": 0.2859, "step": 9492 }, { "epoch": 1.4526396327467483, "grad_norm": 2.2228505660039666, "learning_rate": 3.6783030116227437e-06, "loss": 0.3699, "step": 9493 }, { "epoch": 1.4527926549349655, "grad_norm": 2.064645992589437, "learning_rate": 3.676383035280364e-06, "loss": 0.3022, "step": 9494 }, { "epoch": 1.4529456771231828, "grad_norm": 2.0773468735735694, "learning_rate": 3.674463447296035e-06, "loss": 0.2924, "step": 9495 }, { "epoch": 1.4530986993114001, "grad_norm": 2.433049644238986, "learning_rate": 3.672544247787654e-06, "loss": 0.3803, "step": 9496 }, { "epoch": 1.4532517214996175, "grad_norm": 1.9816445100523168, "learning_rate": 3.6706254368730877e-06, "loss": 0.3232, "step": 9497 }, { "epoch": 1.4534047436878348, "grad_norm": 2.6411701926591324, "learning_rate": 3.6687070146701665e-06, "loss": 0.3543, "step": 9498 }, { "epoch": 1.453557765876052, "grad_norm": 2.2355609159511505, "learning_rate": 3.6667889812967194e-06, "loss": 0.3351, "step": 9499 }, { "epoch": 1.4537107880642695, "grad_norm": 2.371135827805477, "learning_rate": 3.664871336870537e-06, "loss": 0.3725, "step": 9500 }, { "epoch": 1.4538638102524866, "grad_norm": 1.9464656212441733, "learning_rate": 3.662954081509388e-06, "loss": 0.3134, "step": 9501 }, { "epoch": 1.454016832440704, "grad_norm": 2.5268455510244294, "learning_rate": 3.6610372153310202e-06, "loss": 0.355, "step": 9502 }, { "epoch": 1.4541698546289212, "grad_norm": 2.0024853844314277, "learning_rate": 3.6591207384531557e-06, "loss": 0.2974, "step": 9503 }, { "epoch": 1.4543228768171386, "grad_norm": 2.352329885285306, "learning_rate": 3.657204650993491e-06, "loss": 0.3534, "step": 9504 }, { "epoch": 1.454475899005356, "grad_norm": 2.1327778506979604, "learning_rate": 3.6552889530697024e-06, "loss": 0.3452, "step": 9505 }, { "epoch": 1.454628921193573, "grad_norm": 1.9937031650471355, "learning_rate": 3.6533736447994404e-06, "loss": 0.2642, "step": 9506 }, { "epoch": 1.4547819433817903, "grad_norm": 2.178908960404664, "learning_rate": 3.651458726300331e-06, "loss": 0.3035, "step": 9507 }, { "epoch": 1.4549349655700077, "grad_norm": 1.9255218183461211, "learning_rate": 3.649544197689973e-06, "loss": 0.2867, "step": 9508 }, { "epoch": 1.455087987758225, "grad_norm": 2.154114129270153, "learning_rate": 3.6476300590859547e-06, "loss": 0.2427, "step": 9509 }, { "epoch": 1.4552410099464423, "grad_norm": 2.2499540356570407, "learning_rate": 3.6457163106058236e-06, "loss": 0.3877, "step": 9510 }, { "epoch": 1.4553940321346595, "grad_norm": 2.223283595049156, "learning_rate": 3.643802952367107e-06, "loss": 0.3417, "step": 9511 }, { "epoch": 1.4555470543228768, "grad_norm": 2.92579443127091, "learning_rate": 3.6418899844873246e-06, "loss": 0.3641, "step": 9512 }, { "epoch": 1.4557000765110941, "grad_norm": 2.2939770131216632, "learning_rate": 3.639977407083948e-06, "loss": 0.2643, "step": 9513 }, { "epoch": 1.4558530986993115, "grad_norm": 2.201385659868088, "learning_rate": 3.638065220274435e-06, "loss": 0.3657, "step": 9514 }, { "epoch": 1.4560061208875288, "grad_norm": 2.522618790905064, "learning_rate": 3.636153424176232e-06, "loss": 0.3216, "step": 9515 }, { "epoch": 1.456159143075746, "grad_norm": 2.041065696565262, "learning_rate": 3.634242018906736e-06, "loss": 0.3, "step": 9516 }, { "epoch": 1.4563121652639632, "grad_norm": 1.9566790868626256, "learning_rate": 3.632331004583345e-06, "loss": 0.3274, "step": 9517 }, { "epoch": 1.4564651874521806, "grad_norm": 2.1886566680108515, "learning_rate": 3.6304203813234198e-06, "loss": 0.357, "step": 9518 }, { "epoch": 1.456618209640398, "grad_norm": 2.1183396413555466, "learning_rate": 3.6285101492442897e-06, "loss": 0.3473, "step": 9519 }, { "epoch": 1.4567712318286152, "grad_norm": 2.2295734686348543, "learning_rate": 3.6266003084632804e-06, "loss": 0.2975, "step": 9520 }, { "epoch": 1.4569242540168323, "grad_norm": 2.412345482780333, "learning_rate": 3.6246908590976826e-06, "loss": 0.3353, "step": 9521 }, { "epoch": 1.4570772762050497, "grad_norm": 2.1028388857901392, "learning_rate": 3.622781801264753e-06, "loss": 0.3837, "step": 9522 }, { "epoch": 1.457230298393267, "grad_norm": 2.2847187812274217, "learning_rate": 3.6208731350817437e-06, "loss": 0.3641, "step": 9523 }, { "epoch": 1.4573833205814843, "grad_norm": 2.4648038833329204, "learning_rate": 3.6189648606658712e-06, "loss": 0.3452, "step": 9524 }, { "epoch": 1.4575363427697017, "grad_norm": 1.9838995278832425, "learning_rate": 3.617056978134329e-06, "loss": 0.2456, "step": 9525 }, { "epoch": 1.4576893649579188, "grad_norm": 2.623646002262567, "learning_rate": 3.615149487604288e-06, "loss": 0.3425, "step": 9526 }, { "epoch": 1.4578423871461361, "grad_norm": 1.950155135421621, "learning_rate": 3.6132423891928947e-06, "loss": 0.2618, "step": 9527 }, { "epoch": 1.4579954093343535, "grad_norm": 2.081492680781751, "learning_rate": 3.611335683017272e-06, "loss": 0.2437, "step": 9528 }, { "epoch": 1.4581484315225708, "grad_norm": 2.2862166927483623, "learning_rate": 3.609429369194516e-06, "loss": 0.3857, "step": 9529 }, { "epoch": 1.4583014537107881, "grad_norm": 1.9554078644755999, "learning_rate": 3.6075234478417032e-06, "loss": 0.2799, "step": 9530 }, { "epoch": 1.4584544758990052, "grad_norm": 2.0393830421015435, "learning_rate": 3.6056179190758833e-06, "loss": 0.3213, "step": 9531 }, { "epoch": 1.4586074980872226, "grad_norm": 1.8992585160790252, "learning_rate": 3.6037127830140817e-06, "loss": 0.2712, "step": 9532 }, { "epoch": 1.45876052027544, "grad_norm": 2.1471319143065704, "learning_rate": 3.6018080397733013e-06, "loss": 0.3378, "step": 9533 }, { "epoch": 1.4589135424636572, "grad_norm": 2.2031908095595756, "learning_rate": 3.5999036894705185e-06, "loss": 0.3217, "step": 9534 }, { "epoch": 1.4590665646518746, "grad_norm": 2.3220186298678893, "learning_rate": 3.5979997322226866e-06, "loss": 0.3869, "step": 9535 }, { "epoch": 1.4592195868400917, "grad_norm": 2.5306612080969524, "learning_rate": 3.5960961681467364e-06, "loss": 0.3477, "step": 9536 }, { "epoch": 1.4593726090283092, "grad_norm": 2.073885257035563, "learning_rate": 3.5941929973595703e-06, "loss": 0.3113, "step": 9537 }, { "epoch": 1.4595256312165263, "grad_norm": 2.2510208071748097, "learning_rate": 3.5922902199780775e-06, "loss": 0.3027, "step": 9538 }, { "epoch": 1.4596786534047437, "grad_norm": 2.170543165202409, "learning_rate": 3.590387836119107e-06, "loss": 0.3865, "step": 9539 }, { "epoch": 1.459831675592961, "grad_norm": 1.9292932434270147, "learning_rate": 3.58848584589949e-06, "loss": 0.2739, "step": 9540 }, { "epoch": 1.4599846977811783, "grad_norm": 1.8320076614211738, "learning_rate": 3.586584249436046e-06, "loss": 0.2785, "step": 9541 }, { "epoch": 1.4601377199693957, "grad_norm": 2.1935291445909013, "learning_rate": 3.5846830468455497e-06, "loss": 0.3469, "step": 9542 }, { "epoch": 1.4602907421576128, "grad_norm": 2.176716608400466, "learning_rate": 3.58278223824476e-06, "loss": 0.272, "step": 9543 }, { "epoch": 1.4604437643458301, "grad_norm": 1.799210093073552, "learning_rate": 3.580881823750425e-06, "loss": 0.2937, "step": 9544 }, { "epoch": 1.4605967865340475, "grad_norm": 1.9702394956780114, "learning_rate": 3.578981803479241e-06, "loss": 0.261, "step": 9545 }, { "epoch": 1.4607498087222648, "grad_norm": 2.2698743825940917, "learning_rate": 3.577082177547908e-06, "loss": 0.3467, "step": 9546 }, { "epoch": 1.4609028309104821, "grad_norm": 2.4331017290730057, "learning_rate": 3.575182946073089e-06, "loss": 0.3096, "step": 9547 }, { "epoch": 1.4610558530986992, "grad_norm": 2.2378192392486937, "learning_rate": 3.573284109171412e-06, "loss": 0.2813, "step": 9548 }, { "epoch": 1.4612088752869166, "grad_norm": 1.949646785682065, "learning_rate": 3.5713856669595025e-06, "loss": 0.2898, "step": 9549 }, { "epoch": 1.461361897475134, "grad_norm": 2.3546898453130605, "learning_rate": 3.5694876195539506e-06, "loss": 0.3316, "step": 9550 }, { "epoch": 1.4615149196633512, "grad_norm": 2.0028492455077735, "learning_rate": 3.5675899670713156e-06, "loss": 0.3081, "step": 9551 }, { "epoch": 1.4616679418515686, "grad_norm": 2.3157113370587377, "learning_rate": 3.565692709628146e-06, "loss": 0.3438, "step": 9552 }, { "epoch": 1.4618209640397857, "grad_norm": 2.0500920849676345, "learning_rate": 3.5637958473409595e-06, "loss": 0.283, "step": 9553 }, { "epoch": 1.461973986228003, "grad_norm": 2.2982221861588386, "learning_rate": 3.561899380326248e-06, "loss": 0.3415, "step": 9554 }, { "epoch": 1.4621270084162203, "grad_norm": 2.1105916046380777, "learning_rate": 3.5600033087004814e-06, "loss": 0.3392, "step": 9555 }, { "epoch": 1.4622800306044377, "grad_norm": 2.106406268139071, "learning_rate": 3.5581076325801045e-06, "loss": 0.3018, "step": 9556 }, { "epoch": 1.462433052792655, "grad_norm": 1.8577142920496017, "learning_rate": 3.5562123520815395e-06, "loss": 0.3208, "step": 9557 }, { "epoch": 1.4625860749808721, "grad_norm": 1.9650333120954178, "learning_rate": 3.554317467321182e-06, "loss": 0.3118, "step": 9558 }, { "epoch": 1.4627390971690895, "grad_norm": 1.9942111925567405, "learning_rate": 3.552422978415405e-06, "loss": 0.2956, "step": 9559 }, { "epoch": 1.4628921193573068, "grad_norm": 1.8524074124963654, "learning_rate": 3.5505288854805555e-06, "loss": 0.2709, "step": 9560 }, { "epoch": 1.4630451415455241, "grad_norm": 2.328829234349851, "learning_rate": 3.548635188632957e-06, "loss": 0.3441, "step": 9561 }, { "epoch": 1.4631981637337415, "grad_norm": 2.0595048605859057, "learning_rate": 3.5467418879889104e-06, "loss": 0.3298, "step": 9562 }, { "epoch": 1.4633511859219586, "grad_norm": 2.1752269089426495, "learning_rate": 3.5448489836646895e-06, "loss": 0.351, "step": 9563 }, { "epoch": 1.463504208110176, "grad_norm": 2.206069599112191, "learning_rate": 3.5429564757765454e-06, "loss": 0.3253, "step": 9564 }, { "epoch": 1.4636572302983932, "grad_norm": 2.2034438785230352, "learning_rate": 3.541064364440704e-06, "loss": 0.3659, "step": 9565 }, { "epoch": 1.4638102524866106, "grad_norm": 2.2927363798245346, "learning_rate": 3.5391726497733637e-06, "loss": 0.4403, "step": 9566 }, { "epoch": 1.463963274674828, "grad_norm": 2.1978108874987576, "learning_rate": 3.537281331890713e-06, "loss": 0.3383, "step": 9567 }, { "epoch": 1.464116296863045, "grad_norm": 2.44448450236065, "learning_rate": 3.5353904109088945e-06, "loss": 0.304, "step": 9568 }, { "epoch": 1.4642693190512626, "grad_norm": 2.137051347698189, "learning_rate": 3.533499886944037e-06, "loss": 0.3344, "step": 9569 }, { "epoch": 1.4644223412394797, "grad_norm": 2.2505696018802697, "learning_rate": 3.531609760112257e-06, "loss": 0.4358, "step": 9570 }, { "epoch": 1.464575363427697, "grad_norm": 2.051597472463943, "learning_rate": 3.5297200305296208e-06, "loss": 0.2754, "step": 9571 }, { "epoch": 1.4647283856159143, "grad_norm": 2.1406649536647047, "learning_rate": 3.527830698312187e-06, "loss": 0.3756, "step": 9572 }, { "epoch": 1.4648814078041317, "grad_norm": 2.0506454652157005, "learning_rate": 3.525941763575995e-06, "loss": 0.3418, "step": 9573 }, { "epoch": 1.465034429992349, "grad_norm": 2.1326475184422082, "learning_rate": 3.5240532264370396e-06, "loss": 0.2633, "step": 9574 }, { "epoch": 1.4651874521805661, "grad_norm": 2.1494489636744185, "learning_rate": 3.5221650870113134e-06, "loss": 0.3263, "step": 9575 }, { "epoch": 1.4653404743687835, "grad_norm": 2.2083023526559318, "learning_rate": 3.5202773454147733e-06, "loss": 0.3563, "step": 9576 }, { "epoch": 1.4654934965570008, "grad_norm": 2.1455256554573956, "learning_rate": 3.5183900017633445e-06, "loss": 0.3259, "step": 9577 }, { "epoch": 1.4656465187452181, "grad_norm": 2.0692219151315685, "learning_rate": 3.516503056172944e-06, "loss": 0.3062, "step": 9578 }, { "epoch": 1.4657995409334355, "grad_norm": 2.435498607808429, "learning_rate": 3.5146165087594586e-06, "loss": 0.3979, "step": 9579 }, { "epoch": 1.4659525631216526, "grad_norm": 2.251816963015846, "learning_rate": 3.5127303596387386e-06, "loss": 0.3213, "step": 9580 }, { "epoch": 1.46610558530987, "grad_norm": 2.1677463950149294, "learning_rate": 3.510844608926627e-06, "loss": 0.3407, "step": 9581 }, { "epoch": 1.4662586074980872, "grad_norm": 2.4337802808014017, "learning_rate": 3.5089592567389352e-06, "loss": 0.3395, "step": 9582 }, { "epoch": 1.4664116296863046, "grad_norm": 2.178833688485482, "learning_rate": 3.507074303191448e-06, "loss": 0.3467, "step": 9583 }, { "epoch": 1.466564651874522, "grad_norm": 2.079719791026871, "learning_rate": 3.5051897483999274e-06, "loss": 0.3201, "step": 9584 }, { "epoch": 1.466717674062739, "grad_norm": 2.318754978550932, "learning_rate": 3.503305592480113e-06, "loss": 0.31, "step": 9585 }, { "epoch": 1.4668706962509563, "grad_norm": 2.2671430905226684, "learning_rate": 3.501421835547718e-06, "loss": 0.3336, "step": 9586 }, { "epoch": 1.4670237184391737, "grad_norm": 1.9679858647231845, "learning_rate": 3.4995384777184295e-06, "loss": 0.3267, "step": 9587 }, { "epoch": 1.467176740627391, "grad_norm": 2.059412945703858, "learning_rate": 3.4976555191079142e-06, "loss": 0.2965, "step": 9588 }, { "epoch": 1.4673297628156083, "grad_norm": 1.8621298863000937, "learning_rate": 3.4957729598318104e-06, "loss": 0.3184, "step": 9589 }, { "epoch": 1.4674827850038255, "grad_norm": 2.0029242916594794, "learning_rate": 3.493890800005729e-06, "loss": 0.3288, "step": 9590 }, { "epoch": 1.4676358071920428, "grad_norm": 2.104395772468994, "learning_rate": 3.4920090397452743e-06, "loss": 0.2823, "step": 9591 }, { "epoch": 1.4677888293802601, "grad_norm": 1.8928052562757283, "learning_rate": 3.490127679166e-06, "loss": 0.2928, "step": 9592 }, { "epoch": 1.4679418515684775, "grad_norm": 2.0663466283826906, "learning_rate": 3.4882467183834478e-06, "loss": 0.274, "step": 9593 }, { "epoch": 1.4680948737566948, "grad_norm": 1.8060456661072481, "learning_rate": 3.486366157513146e-06, "loss": 0.3044, "step": 9594 }, { "epoch": 1.468247895944912, "grad_norm": 1.9513142978452787, "learning_rate": 3.4844859966705726e-06, "loss": 0.2943, "step": 9595 }, { "epoch": 1.4684009181331292, "grad_norm": 2.2091242439116665, "learning_rate": 3.4826062359712065e-06, "loss": 0.299, "step": 9596 }, { "epoch": 1.4685539403213466, "grad_norm": 2.041058385323502, "learning_rate": 3.4807268755304914e-06, "loss": 0.2695, "step": 9597 }, { "epoch": 1.468706962509564, "grad_norm": 2.1822736478283944, "learning_rate": 3.4788479154638356e-06, "loss": 0.357, "step": 9598 }, { "epoch": 1.4688599846977812, "grad_norm": 2.1286379228362606, "learning_rate": 3.4769693558866434e-06, "loss": 0.3365, "step": 9599 }, { "epoch": 1.4690130068859983, "grad_norm": 2.127037295930163, "learning_rate": 3.475091196914284e-06, "loss": 0.3397, "step": 9600 }, { "epoch": 1.469166029074216, "grad_norm": 2.1368346349315583, "learning_rate": 3.473213438662094e-06, "loss": 0.3509, "step": 9601 }, { "epoch": 1.469319051262433, "grad_norm": 2.079146890204384, "learning_rate": 3.4713360812454033e-06, "loss": 0.3229, "step": 9602 }, { "epoch": 1.4694720734506503, "grad_norm": 2.142254720047488, "learning_rate": 3.4694591247795028e-06, "loss": 0.3195, "step": 9603 }, { "epoch": 1.4696250956388677, "grad_norm": 2.0262041871453778, "learning_rate": 3.4675825693796662e-06, "loss": 0.3646, "step": 9604 }, { "epoch": 1.469778117827085, "grad_norm": 1.9264466580480444, "learning_rate": 3.4657064151611385e-06, "loss": 0.3186, "step": 9605 }, { "epoch": 1.4699311400153023, "grad_norm": 2.0276181691278503, "learning_rate": 3.4638306622391426e-06, "loss": 0.3137, "step": 9606 }, { "epoch": 1.4700841622035195, "grad_norm": 2.534691854587415, "learning_rate": 3.461955310728875e-06, "loss": 0.4236, "step": 9607 }, { "epoch": 1.4702371843917368, "grad_norm": 1.9148967223518105, "learning_rate": 3.4600803607455093e-06, "loss": 0.3288, "step": 9608 }, { "epoch": 1.4703902065799541, "grad_norm": 2.2857281715004416, "learning_rate": 3.4582058124041915e-06, "loss": 0.3491, "step": 9609 }, { "epoch": 1.4705432287681715, "grad_norm": 2.1665463187107314, "learning_rate": 3.4563316658200484e-06, "loss": 0.3467, "step": 9610 }, { "epoch": 1.4706962509563888, "grad_norm": 2.143549381240846, "learning_rate": 3.4544579211081753e-06, "loss": 0.3849, "step": 9611 }, { "epoch": 1.470849273144606, "grad_norm": 1.8653919338791964, "learning_rate": 3.4525845783836474e-06, "loss": 0.238, "step": 9612 }, { "epoch": 1.4710022953328232, "grad_norm": 2.1729156855165352, "learning_rate": 3.4507116377615157e-06, "loss": 0.3141, "step": 9613 }, { "epoch": 1.4711553175210406, "grad_norm": 2.3265866813135965, "learning_rate": 3.448839099356802e-06, "loss": 0.3338, "step": 9614 }, { "epoch": 1.471308339709258, "grad_norm": 1.854469823617688, "learning_rate": 3.4469669632845073e-06, "loss": 0.3019, "step": 9615 }, { "epoch": 1.4714613618974752, "grad_norm": 1.9374657496486478, "learning_rate": 3.4450952296596072e-06, "loss": 0.2676, "step": 9616 }, { "epoch": 1.4716143840856923, "grad_norm": 1.9448743453259358, "learning_rate": 3.4432238985970523e-06, "loss": 0.2811, "step": 9617 }, { "epoch": 1.4717674062739097, "grad_norm": 2.1803253590810434, "learning_rate": 3.441352970211769e-06, "loss": 0.3335, "step": 9618 }, { "epoch": 1.471920428462127, "grad_norm": 2.1774294836502586, "learning_rate": 3.4394824446186527e-06, "loss": 0.3215, "step": 9619 }, { "epoch": 1.4720734506503443, "grad_norm": 2.1656602648557874, "learning_rate": 3.437612321932591e-06, "loss": 0.3493, "step": 9620 }, { "epoch": 1.4722264728385617, "grad_norm": 2.111235629798471, "learning_rate": 3.4357426022684257e-06, "loss": 0.2601, "step": 9621 }, { "epoch": 1.4723794950267788, "grad_norm": 1.912232160522444, "learning_rate": 3.433873285740984e-06, "loss": 0.3647, "step": 9622 }, { "epoch": 1.4725325172149961, "grad_norm": 2.31825086042197, "learning_rate": 3.4320043724650763e-06, "loss": 0.3651, "step": 9623 }, { "epoch": 1.4726855394032135, "grad_norm": 1.7970660470580329, "learning_rate": 3.4301358625554713e-06, "loss": 0.2483, "step": 9624 }, { "epoch": 1.4728385615914308, "grad_norm": 2.2998009808220927, "learning_rate": 3.4282677561269217e-06, "loss": 0.2858, "step": 9625 }, { "epoch": 1.4729915837796481, "grad_norm": 2.1552214382883514, "learning_rate": 3.4264000532941644e-06, "loss": 0.3032, "step": 9626 }, { "epoch": 1.4731446059678652, "grad_norm": 2.1964411418119654, "learning_rate": 3.424532754171889e-06, "loss": 0.3218, "step": 9627 }, { "epoch": 1.4732976281560826, "grad_norm": 2.0569648465887544, "learning_rate": 3.422665858874784e-06, "loss": 0.3243, "step": 9628 }, { "epoch": 1.4734506503443, "grad_norm": 2.167669952320896, "learning_rate": 3.4207993675175023e-06, "loss": 0.3219, "step": 9629 }, { "epoch": 1.4736036725325172, "grad_norm": 1.9886377425796664, "learning_rate": 3.4189332802146623e-06, "loss": 0.3845, "step": 9630 }, { "epoch": 1.4737566947207346, "grad_norm": 2.3674925318781863, "learning_rate": 3.4170675970808797e-06, "loss": 0.3497, "step": 9631 }, { "epoch": 1.4739097169089517, "grad_norm": 2.0213186273832533, "learning_rate": 3.415202318230727e-06, "loss": 0.3284, "step": 9632 }, { "epoch": 1.4740627390971692, "grad_norm": 1.9433332846288336, "learning_rate": 3.4133374437787604e-06, "loss": 0.3333, "step": 9633 }, { "epoch": 1.4742157612853863, "grad_norm": 1.8829935353968115, "learning_rate": 3.411472973839509e-06, "loss": 0.2995, "step": 9634 }, { "epoch": 1.4743687834736037, "grad_norm": 2.199541576399467, "learning_rate": 3.4096089085274763e-06, "loss": 0.387, "step": 9635 }, { "epoch": 1.474521805661821, "grad_norm": 2.320246022684784, "learning_rate": 3.4077452479571425e-06, "loss": 0.3588, "step": 9636 }, { "epoch": 1.4746748278500381, "grad_norm": 2.2086550938995453, "learning_rate": 3.405881992242962e-06, "loss": 0.396, "step": 9637 }, { "epoch": 1.4748278500382557, "grad_norm": 2.5880420208227575, "learning_rate": 3.404019141499364e-06, "loss": 0.3122, "step": 9638 }, { "epoch": 1.4749808722264728, "grad_norm": 2.14820259118592, "learning_rate": 3.4021566958407547e-06, "loss": 0.3345, "step": 9639 }, { "epoch": 1.4751338944146901, "grad_norm": 2.291144556279864, "learning_rate": 3.4002946553815143e-06, "loss": 0.4546, "step": 9640 }, { "epoch": 1.4752869166029074, "grad_norm": 2.2789618485242107, "learning_rate": 3.398433020235996e-06, "loss": 0.2864, "step": 9641 }, { "epoch": 1.4754399387911248, "grad_norm": 2.16718251307733, "learning_rate": 3.3965717905185326e-06, "loss": 0.3261, "step": 9642 }, { "epoch": 1.4755929609793421, "grad_norm": 1.8345178592877107, "learning_rate": 3.3947109663434274e-06, "loss": 0.2828, "step": 9643 }, { "epoch": 1.4757459831675592, "grad_norm": 2.0816444823106215, "learning_rate": 3.392850547824962e-06, "loss": 0.3154, "step": 9644 }, { "epoch": 1.4758990053557766, "grad_norm": 2.2321250664297914, "learning_rate": 3.390990535077392e-06, "loss": 0.3723, "step": 9645 }, { "epoch": 1.476052027543994, "grad_norm": 1.9008291069198198, "learning_rate": 3.3891309282149476e-06, "loss": 0.3035, "step": 9646 }, { "epoch": 1.4762050497322112, "grad_norm": 2.0403337195080007, "learning_rate": 3.3872717273518352e-06, "loss": 0.3225, "step": 9647 }, { "epoch": 1.4763580719204286, "grad_norm": 2.3313497369392864, "learning_rate": 3.3854129326022312e-06, "loss": 0.3249, "step": 9648 }, { "epoch": 1.4765110941086457, "grad_norm": 1.9367820769137207, "learning_rate": 3.383554544080303e-06, "loss": 0.3423, "step": 9649 }, { "epoch": 1.476664116296863, "grad_norm": 1.9343688897956826, "learning_rate": 3.3816965619001697e-06, "loss": 0.2844, "step": 9650 }, { "epoch": 1.4768171384850803, "grad_norm": 2.0350599708053183, "learning_rate": 3.3798389861759385e-06, "loss": 0.2567, "step": 9651 }, { "epoch": 1.4769701606732977, "grad_norm": 2.0884917699535226, "learning_rate": 3.3779818170216994e-06, "loss": 0.3647, "step": 9652 }, { "epoch": 1.477123182861515, "grad_norm": 2.053173963511413, "learning_rate": 3.3761250545515e-06, "loss": 0.3367, "step": 9653 }, { "epoch": 1.4772762050497321, "grad_norm": 2.406740971692602, "learning_rate": 3.3742686988793703e-06, "loss": 0.3357, "step": 9654 }, { "epoch": 1.4774292272379494, "grad_norm": 2.2395311160784273, "learning_rate": 3.372412750119326e-06, "loss": 0.3188, "step": 9655 }, { "epoch": 1.4775822494261668, "grad_norm": 2.090272851729783, "learning_rate": 3.3705572083853354e-06, "loss": 0.3041, "step": 9656 }, { "epoch": 1.4777352716143841, "grad_norm": 1.9825871720189625, "learning_rate": 3.3687020737913645e-06, "loss": 0.2794, "step": 9657 }, { "epoch": 1.4778882938026014, "grad_norm": 1.799646543850758, "learning_rate": 3.366847346451345e-06, "loss": 0.3137, "step": 9658 }, { "epoch": 1.4780413159908186, "grad_norm": 2.1457698517236365, "learning_rate": 3.364993026479172e-06, "loss": 0.3418, "step": 9659 }, { "epoch": 1.478194338179036, "grad_norm": 1.881613328637775, "learning_rate": 3.363139113988736e-06, "loss": 0.3176, "step": 9660 }, { "epoch": 1.4783473603672532, "grad_norm": 2.0964508147709364, "learning_rate": 3.361285609093895e-06, "loss": 0.368, "step": 9661 }, { "epoch": 1.4785003825554706, "grad_norm": 2.3956986412823214, "learning_rate": 3.359432511908468e-06, "loss": 0.4279, "step": 9662 }, { "epoch": 1.478653404743688, "grad_norm": 2.177333571774147, "learning_rate": 3.357579822546273e-06, "loss": 0.3332, "step": 9663 }, { "epoch": 1.478806426931905, "grad_norm": 1.920267606292228, "learning_rate": 3.3557275411210857e-06, "loss": 0.3456, "step": 9664 }, { "epoch": 1.4789594491201223, "grad_norm": 2.13065516579347, "learning_rate": 3.353875667746662e-06, "loss": 0.2951, "step": 9665 }, { "epoch": 1.4791124713083397, "grad_norm": 2.1802357900868103, "learning_rate": 3.3520242025367345e-06, "loss": 0.3092, "step": 9666 }, { "epoch": 1.479265493496557, "grad_norm": 2.2588888010046184, "learning_rate": 3.350173145605007e-06, "loss": 0.3029, "step": 9667 }, { "epoch": 1.4794185156847743, "grad_norm": 2.149844137638874, "learning_rate": 3.3483224970651618e-06, "loss": 0.3287, "step": 9668 }, { "epoch": 1.4795715378729914, "grad_norm": 1.9285140452203176, "learning_rate": 3.3464722570308495e-06, "loss": 0.2797, "step": 9669 }, { "epoch": 1.479724560061209, "grad_norm": 2.174794412352257, "learning_rate": 3.3446224256157113e-06, "loss": 0.3432, "step": 9670 }, { "epoch": 1.4798775822494261, "grad_norm": 1.9492744650336518, "learning_rate": 3.3427730029333427e-06, "loss": 0.3068, "step": 9671 }, { "epoch": 1.4800306044376434, "grad_norm": 2.2467264250993737, "learning_rate": 3.3409239890973234e-06, "loss": 0.3348, "step": 9672 }, { "epoch": 1.4801836266258608, "grad_norm": 2.148025214316827, "learning_rate": 3.3390753842212196e-06, "loss": 0.3306, "step": 9673 }, { "epoch": 1.4803366488140781, "grad_norm": 2.3871375437801916, "learning_rate": 3.3372271884185503e-06, "loss": 0.3524, "step": 9674 }, { "epoch": 1.4804896710022954, "grad_norm": 2.281354552877399, "learning_rate": 3.3353794018028216e-06, "loss": 0.339, "step": 9675 }, { "epoch": 1.4806426931905126, "grad_norm": 2.16698236232759, "learning_rate": 3.3335320244875226e-06, "loss": 0.3163, "step": 9676 }, { "epoch": 1.48079571537873, "grad_norm": 1.9526029274383037, "learning_rate": 3.3316850565860937e-06, "loss": 0.2826, "step": 9677 }, { "epoch": 1.4809487375669472, "grad_norm": 2.420557240605018, "learning_rate": 3.3298384982119767e-06, "loss": 0.3786, "step": 9678 }, { "epoch": 1.4811017597551646, "grad_norm": 2.28465035245862, "learning_rate": 3.327992349478574e-06, "loss": 0.3461, "step": 9679 }, { "epoch": 1.481254781943382, "grad_norm": 2.0005123469497588, "learning_rate": 3.3261466104992557e-06, "loss": 0.2779, "step": 9680 }, { "epoch": 1.481407804131599, "grad_norm": 2.264288271376078, "learning_rate": 3.3243012813873854e-06, "loss": 0.3615, "step": 9681 }, { "epoch": 1.4815608263198163, "grad_norm": 2.1961574735260805, "learning_rate": 3.322456362256292e-06, "loss": 0.3248, "step": 9682 }, { "epoch": 1.4817138485080337, "grad_norm": 1.987011464688957, "learning_rate": 3.32061185321927e-06, "loss": 0.33, "step": 9683 }, { "epoch": 1.481866870696251, "grad_norm": 1.9619117786824731, "learning_rate": 3.318767754389607e-06, "loss": 0.3812, "step": 9684 }, { "epoch": 1.4820198928844683, "grad_norm": 2.075830782082472, "learning_rate": 3.3169240658805533e-06, "loss": 0.2877, "step": 9685 }, { "epoch": 1.4821729150726854, "grad_norm": 2.0458494055910927, "learning_rate": 3.3150807878053383e-06, "loss": 0.3171, "step": 9686 }, { "epoch": 1.4823259372609028, "grad_norm": 2.493267414344924, "learning_rate": 3.3132379202771613e-06, "loss": 0.3898, "step": 9687 }, { "epoch": 1.4824789594491201, "grad_norm": 2.275969640971838, "learning_rate": 3.3113954634092037e-06, "loss": 0.3653, "step": 9688 }, { "epoch": 1.4826319816373374, "grad_norm": 2.1293537358603514, "learning_rate": 3.309553417314617e-06, "loss": 0.3176, "step": 9689 }, { "epoch": 1.4827850038255548, "grad_norm": 2.131470108037654, "learning_rate": 3.3077117821065274e-06, "loss": 0.2997, "step": 9690 }, { "epoch": 1.482938026013772, "grad_norm": 2.2836857699911315, "learning_rate": 3.305870557898039e-06, "loss": 0.3644, "step": 9691 }, { "epoch": 1.4830910482019892, "grad_norm": 1.8703424111131515, "learning_rate": 3.3040297448022253e-06, "loss": 0.2544, "step": 9692 }, { "epoch": 1.4832440703902066, "grad_norm": 1.8819671745260147, "learning_rate": 3.3021893429321407e-06, "loss": 0.2973, "step": 9693 }, { "epoch": 1.483397092578424, "grad_norm": 2.2023836981052343, "learning_rate": 3.300349352400811e-06, "loss": 0.2955, "step": 9694 }, { "epoch": 1.4835501147666412, "grad_norm": 1.8542863847355404, "learning_rate": 3.2985097733212356e-06, "loss": 0.259, "step": 9695 }, { "epoch": 1.4837031369548583, "grad_norm": 2.201198857571715, "learning_rate": 3.2966706058063924e-06, "loss": 0.3678, "step": 9696 }, { "epoch": 1.4838561591430757, "grad_norm": 2.0005225343509174, "learning_rate": 3.294831849969231e-06, "loss": 0.2636, "step": 9697 }, { "epoch": 1.484009181331293, "grad_norm": 2.0741880232412813, "learning_rate": 3.292993505922676e-06, "loss": 0.3257, "step": 9698 }, { "epoch": 1.4841622035195103, "grad_norm": 2.1015544039730294, "learning_rate": 3.2911555737796274e-06, "loss": 0.3469, "step": 9699 }, { "epoch": 1.4843152257077277, "grad_norm": 2.154984766214943, "learning_rate": 3.28931805365296e-06, "loss": 0.3341, "step": 9700 }, { "epoch": 1.4844682478959448, "grad_norm": 2.00350612128123, "learning_rate": 3.2874809456555214e-06, "loss": 0.3539, "step": 9701 }, { "epoch": 1.4846212700841623, "grad_norm": 2.2878236503785114, "learning_rate": 3.285644249900143e-06, "loss": 0.3498, "step": 9702 }, { "epoch": 1.4847742922723794, "grad_norm": 2.0944591858421227, "learning_rate": 3.283807966499615e-06, "loss": 0.311, "step": 9703 }, { "epoch": 1.4849273144605968, "grad_norm": 2.117959473650116, "learning_rate": 3.281972095566709e-06, "loss": 0.3289, "step": 9704 }, { "epoch": 1.4850803366488141, "grad_norm": 2.1069078550225284, "learning_rate": 3.2801366372141854e-06, "loss": 0.2824, "step": 9705 }, { "epoch": 1.4852333588370314, "grad_norm": 1.7445064623021305, "learning_rate": 3.278301591554753e-06, "loss": 0.2507, "step": 9706 }, { "epoch": 1.4853863810252488, "grad_norm": 2.1744862309060826, "learning_rate": 3.2764669587011176e-06, "loss": 0.2917, "step": 9707 }, { "epoch": 1.485539403213466, "grad_norm": 2.099115560590431, "learning_rate": 3.2746327387659528e-06, "loss": 0.3, "step": 9708 }, { "epoch": 1.4856924254016832, "grad_norm": 2.383542556858228, "learning_rate": 3.272798931861895e-06, "loss": 0.3499, "step": 9709 }, { "epoch": 1.4858454475899006, "grad_norm": 1.958523894698532, "learning_rate": 3.2709655381015747e-06, "loss": 0.3295, "step": 9710 }, { "epoch": 1.485998469778118, "grad_norm": 2.316274078181258, "learning_rate": 3.2691325575975873e-06, "loss": 0.2929, "step": 9711 }, { "epoch": 1.4861514919663352, "grad_norm": 2.285956647889531, "learning_rate": 3.267299990462496e-06, "loss": 0.3307, "step": 9712 }, { "epoch": 1.4863045141545523, "grad_norm": 2.21087747854537, "learning_rate": 3.265467836808852e-06, "loss": 0.3439, "step": 9713 }, { "epoch": 1.4864575363427697, "grad_norm": 2.512942424866177, "learning_rate": 3.2636360967491753e-06, "loss": 0.3233, "step": 9714 }, { "epoch": 1.486610558530987, "grad_norm": 2.2420113787509695, "learning_rate": 3.2618047703959587e-06, "loss": 0.3966, "step": 9715 }, { "epoch": 1.4867635807192043, "grad_norm": 1.827313128956776, "learning_rate": 3.25997385786167e-06, "loss": 0.3345, "step": 9716 }, { "epoch": 1.4869166029074217, "grad_norm": 2.540856908588475, "learning_rate": 3.258143359258755e-06, "loss": 0.3662, "step": 9717 }, { "epoch": 1.4870696250956388, "grad_norm": 2.0729164941270626, "learning_rate": 3.2563132746996295e-06, "loss": 0.2955, "step": 9718 }, { "epoch": 1.4872226472838561, "grad_norm": 2.2431616508070684, "learning_rate": 3.2544836042966887e-06, "loss": 0.2798, "step": 9719 }, { "epoch": 1.4873756694720734, "grad_norm": 2.388865441696811, "learning_rate": 3.2526543481622972e-06, "loss": 0.3584, "step": 9720 }, { "epoch": 1.4875286916602908, "grad_norm": 2.3876493187240446, "learning_rate": 3.250825506408798e-06, "loss": 0.3654, "step": 9721 }, { "epoch": 1.4876817138485081, "grad_norm": 1.8569930412454958, "learning_rate": 3.248997079148508e-06, "loss": 0.3371, "step": 9722 }, { "epoch": 1.4878347360367252, "grad_norm": 1.9617608209841546, "learning_rate": 3.247169066493717e-06, "loss": 0.3279, "step": 9723 }, { "epoch": 1.4879877582249426, "grad_norm": 2.1004602635403073, "learning_rate": 3.245341468556691e-06, "loss": 0.3163, "step": 9724 }, { "epoch": 1.48814078041316, "grad_norm": 1.8281973503764155, "learning_rate": 3.2435142854496695e-06, "loss": 0.3414, "step": 9725 }, { "epoch": 1.4882938026013772, "grad_norm": 2.0119727311462468, "learning_rate": 3.241687517284867e-06, "loss": 0.3131, "step": 9726 }, { "epoch": 1.4884468247895946, "grad_norm": 2.0671398705003354, "learning_rate": 3.239861164174474e-06, "loss": 0.3292, "step": 9727 }, { "epoch": 1.4885998469778117, "grad_norm": 2.183939371421867, "learning_rate": 3.2380352262306516e-06, "loss": 0.3413, "step": 9728 }, { "epoch": 1.488752869166029, "grad_norm": 2.6176496586308264, "learning_rate": 3.2362097035655395e-06, "loss": 0.3849, "step": 9729 }, { "epoch": 1.4889058913542463, "grad_norm": 1.984006435842004, "learning_rate": 3.234384596291246e-06, "loss": 0.2825, "step": 9730 }, { "epoch": 1.4890589135424637, "grad_norm": 2.218983095645015, "learning_rate": 3.2325599045198686e-06, "loss": 0.3254, "step": 9731 }, { "epoch": 1.489211935730681, "grad_norm": 2.1406761509420753, "learning_rate": 3.230735628363457e-06, "loss": 0.2822, "step": 9732 }, { "epoch": 1.4893649579188981, "grad_norm": 2.156889368226719, "learning_rate": 3.228911767934051e-06, "loss": 0.3079, "step": 9733 }, { "epoch": 1.4895179801071157, "grad_norm": 2.2627592273788806, "learning_rate": 3.2270883233436668e-06, "loss": 0.3561, "step": 9734 }, { "epoch": 1.4896710022953328, "grad_norm": 2.1268555796252, "learning_rate": 3.225265294704278e-06, "loss": 0.2414, "step": 9735 }, { "epoch": 1.4898240244835501, "grad_norm": 2.5191995631578443, "learning_rate": 3.2234426821278553e-06, "loss": 0.4362, "step": 9736 }, { "epoch": 1.4899770466717674, "grad_norm": 2.1702180434659444, "learning_rate": 3.221620485726329e-06, "loss": 0.36, "step": 9737 }, { "epoch": 1.4901300688599848, "grad_norm": 2.2050301008986017, "learning_rate": 3.219798705611601e-06, "loss": 0.3054, "step": 9738 }, { "epoch": 1.4902830910482021, "grad_norm": 2.2089552013113334, "learning_rate": 3.2179773418955605e-06, "loss": 0.3696, "step": 9739 }, { "epoch": 1.4904361132364192, "grad_norm": 1.9418105664383982, "learning_rate": 3.2161563946900665e-06, "loss": 0.2662, "step": 9740 }, { "epoch": 1.4905891354246366, "grad_norm": 2.1010010388460523, "learning_rate": 3.2143358641069412e-06, "loss": 0.294, "step": 9741 }, { "epoch": 1.490742157612854, "grad_norm": 2.4393911103033235, "learning_rate": 3.2125157502579985e-06, "loss": 0.3728, "step": 9742 }, { "epoch": 1.4908951798010712, "grad_norm": 2.1391415962137, "learning_rate": 3.2106960532550158e-06, "loss": 0.298, "step": 9743 }, { "epoch": 1.4910482019892886, "grad_norm": 2.0613432267115925, "learning_rate": 3.2088767732097493e-06, "loss": 0.2871, "step": 9744 }, { "epoch": 1.4912012241775057, "grad_norm": 1.974687288819572, "learning_rate": 3.2070579102339274e-06, "loss": 0.3126, "step": 9745 }, { "epoch": 1.491354246365723, "grad_norm": 2.0887558994869884, "learning_rate": 3.2052394644392525e-06, "loss": 0.3445, "step": 9746 }, { "epoch": 1.4915072685539403, "grad_norm": 2.186477735593589, "learning_rate": 3.2034214359374038e-06, "loss": 0.3372, "step": 9747 }, { "epoch": 1.4916602907421577, "grad_norm": 2.3663298192862454, "learning_rate": 3.2016038248400338e-06, "loss": 0.3463, "step": 9748 }, { "epoch": 1.491813312930375, "grad_norm": 1.9550108079123667, "learning_rate": 3.199786631258768e-06, "loss": 0.3061, "step": 9749 }, { "epoch": 1.4919663351185921, "grad_norm": 2.179613795100359, "learning_rate": 3.1979698553052065e-06, "loss": 0.3472, "step": 9750 }, { "epoch": 1.4921193573068094, "grad_norm": 2.2746548727858054, "learning_rate": 3.1961534970909237e-06, "loss": 0.4066, "step": 9751 }, { "epoch": 1.4922723794950268, "grad_norm": 1.939192701965579, "learning_rate": 3.194337556727478e-06, "loss": 0.2677, "step": 9752 }, { "epoch": 1.4924254016832441, "grad_norm": 2.264172372199918, "learning_rate": 3.192522034326383e-06, "loss": 0.2917, "step": 9753 }, { "epoch": 1.4925784238714614, "grad_norm": 2.302377291578821, "learning_rate": 3.1907069299991387e-06, "loss": 0.3661, "step": 9754 }, { "epoch": 1.4927314460596786, "grad_norm": 1.8882191488728923, "learning_rate": 3.1888922438572257e-06, "loss": 0.2901, "step": 9755 }, { "epoch": 1.492884468247896, "grad_norm": 2.1078551552877935, "learning_rate": 3.1870779760120818e-06, "loss": 0.3591, "step": 9756 }, { "epoch": 1.4930374904361132, "grad_norm": 2.277226366949766, "learning_rate": 3.1852641265751294e-06, "loss": 0.3308, "step": 9757 }, { "epoch": 1.4931905126243306, "grad_norm": 2.0104743672396794, "learning_rate": 3.1834506956577716e-06, "loss": 0.2591, "step": 9758 }, { "epoch": 1.493343534812548, "grad_norm": 2.018241900306442, "learning_rate": 3.181637683371367e-06, "loss": 0.2783, "step": 9759 }, { "epoch": 1.493496557000765, "grad_norm": 2.220811704079774, "learning_rate": 3.17982508982727e-06, "loss": 0.3125, "step": 9760 }, { "epoch": 1.4936495791889823, "grad_norm": 2.0207958924684144, "learning_rate": 3.1780129151367965e-06, "loss": 0.3498, "step": 9761 }, { "epoch": 1.4938026013771997, "grad_norm": 1.9336176704001233, "learning_rate": 3.176201159411232e-06, "loss": 0.2944, "step": 9762 }, { "epoch": 1.493955623565417, "grad_norm": 2.0518057739899347, "learning_rate": 3.174389822761853e-06, "loss": 0.3147, "step": 9763 }, { "epoch": 1.4941086457536343, "grad_norm": 2.0524245884405903, "learning_rate": 3.172578905299899e-06, "loss": 0.3182, "step": 9764 }, { "epoch": 1.4942616679418514, "grad_norm": 2.403753805984202, "learning_rate": 3.1707684071365786e-06, "loss": 0.3841, "step": 9765 }, { "epoch": 1.4944146901300688, "grad_norm": 2.057508983016502, "learning_rate": 3.168958328383088e-06, "loss": 0.3566, "step": 9766 }, { "epoch": 1.4945677123182861, "grad_norm": 1.9234820653276592, "learning_rate": 3.1671486691505906e-06, "loss": 0.2878, "step": 9767 }, { "epoch": 1.4947207345065034, "grad_norm": 2.063565746243901, "learning_rate": 3.1653394295502226e-06, "loss": 0.3536, "step": 9768 }, { "epoch": 1.4948737566947208, "grad_norm": 2.19824145243771, "learning_rate": 3.1635306096930985e-06, "loss": 0.3115, "step": 9769 }, { "epoch": 1.495026778882938, "grad_norm": 2.3743711638225085, "learning_rate": 3.1617222096903045e-06, "loss": 0.3565, "step": 9770 }, { "epoch": 1.4951798010711554, "grad_norm": 2.041310825172398, "learning_rate": 3.1599142296529005e-06, "loss": 0.3498, "step": 9771 }, { "epoch": 1.4953328232593726, "grad_norm": 2.103580031841215, "learning_rate": 3.1581066696919216e-06, "loss": 0.3418, "step": 9772 }, { "epoch": 1.49548584544759, "grad_norm": 1.6728540200328423, "learning_rate": 3.1562995299183786e-06, "loss": 0.293, "step": 9773 }, { "epoch": 1.4956388676358072, "grad_norm": 2.052812844688033, "learning_rate": 3.1544928104432536e-06, "loss": 0.3172, "step": 9774 }, { "epoch": 1.4957918898240246, "grad_norm": 1.99378419575446, "learning_rate": 3.1526865113775063e-06, "loss": 0.2531, "step": 9775 }, { "epoch": 1.495944912012242, "grad_norm": 2.206302011302261, "learning_rate": 3.1508806328320653e-06, "loss": 0.3198, "step": 9776 }, { "epoch": 1.496097934200459, "grad_norm": 2.375311640575206, "learning_rate": 3.1490751749178394e-06, "loss": 0.3092, "step": 9777 }, { "epoch": 1.4962509563886763, "grad_norm": 2.3242990738348412, "learning_rate": 3.1472701377457082e-06, "loss": 0.3788, "step": 9778 }, { "epoch": 1.4964039785768937, "grad_norm": 2.068015816110016, "learning_rate": 3.145465521426525e-06, "loss": 0.3278, "step": 9779 }, { "epoch": 1.496557000765111, "grad_norm": 2.089112948601707, "learning_rate": 3.1436613260711166e-06, "loss": 0.2761, "step": 9780 }, { "epoch": 1.4967100229533283, "grad_norm": 2.3392197952388565, "learning_rate": 3.141857551790295e-06, "loss": 0.3837, "step": 9781 }, { "epoch": 1.4968630451415454, "grad_norm": 2.4626352152836493, "learning_rate": 3.1400541986948274e-06, "loss": 0.3356, "step": 9782 }, { "epoch": 1.4970160673297628, "grad_norm": 1.943327658989901, "learning_rate": 3.1382512668954635e-06, "loss": 0.2664, "step": 9783 }, { "epoch": 1.4971690895179801, "grad_norm": 2.0015017364893657, "learning_rate": 3.13644875650294e-06, "loss": 0.277, "step": 9784 }, { "epoch": 1.4973221117061974, "grad_norm": 1.991863536015547, "learning_rate": 3.1346466676279453e-06, "loss": 0.3068, "step": 9785 }, { "epoch": 1.4974751338944148, "grad_norm": 2.1289913120304527, "learning_rate": 3.1328450003811526e-06, "loss": 0.2519, "step": 9786 }, { "epoch": 1.497628156082632, "grad_norm": 2.4980243023068014, "learning_rate": 3.131043754873219e-06, "loss": 0.3528, "step": 9787 }, { "epoch": 1.4977811782708492, "grad_norm": 2.2767558515508273, "learning_rate": 3.1292429312147542e-06, "loss": 0.368, "step": 9788 }, { "epoch": 1.4979342004590666, "grad_norm": 2.16350370343632, "learning_rate": 3.127442529516362e-06, "loss": 0.4144, "step": 9789 }, { "epoch": 1.498087222647284, "grad_norm": 2.3882333717742847, "learning_rate": 3.1256425498886132e-06, "loss": 0.3766, "step": 9790 }, { "epoch": 1.4982402448355012, "grad_norm": 2.1894069009771053, "learning_rate": 3.123842992442042e-06, "loss": 0.356, "step": 9791 }, { "epoch": 1.4983932670237183, "grad_norm": 2.1408227407508496, "learning_rate": 3.122043857287176e-06, "loss": 0.2882, "step": 9792 }, { "epoch": 1.4985462892119357, "grad_norm": 1.9419697702027892, "learning_rate": 3.120245144534505e-06, "loss": 0.2727, "step": 9793 }, { "epoch": 1.498699311400153, "grad_norm": 2.0701350644107066, "learning_rate": 3.118446854294488e-06, "loss": 0.2933, "step": 9794 }, { "epoch": 1.4988523335883703, "grad_norm": 1.7478356478182149, "learning_rate": 3.116648986677574e-06, "loss": 0.2833, "step": 9795 }, { "epoch": 1.4990053557765877, "grad_norm": 2.0429077290330424, "learning_rate": 3.114851541794173e-06, "loss": 0.2941, "step": 9796 }, { "epoch": 1.4991583779648048, "grad_norm": 2.2981563657573347, "learning_rate": 3.1130545197546735e-06, "loss": 0.3765, "step": 9797 }, { "epoch": 1.4993114001530221, "grad_norm": 2.1883567916548214, "learning_rate": 3.111257920669438e-06, "loss": 0.3353, "step": 9798 }, { "epoch": 1.4994644223412394, "grad_norm": 2.0688655353997576, "learning_rate": 3.109461744648803e-06, "loss": 0.2819, "step": 9799 }, { "epoch": 1.4996174445294568, "grad_norm": 2.1705196804072133, "learning_rate": 3.107665991803078e-06, "loss": 0.368, "step": 9800 }, { "epoch": 1.4997704667176741, "grad_norm": 2.2061746818166714, "learning_rate": 3.1058706622425473e-06, "loss": 0.3808, "step": 9801 }, { "epoch": 1.4999234889058912, "grad_norm": 2.054827111884937, "learning_rate": 3.1040757560774694e-06, "loss": 0.3585, "step": 9802 }, { "epoch": 1.5000765110941088, "grad_norm": 2.121121071144255, "learning_rate": 3.1022812734180765e-06, "loss": 0.274, "step": 9803 }, { "epoch": 1.5002295332823259, "grad_norm": 2.080239488774426, "learning_rate": 3.1004872143745734e-06, "loss": 0.2902, "step": 9804 }, { "epoch": 1.5003825554705432, "grad_norm": 2.049230990374885, "learning_rate": 3.0986935790571427e-06, "loss": 0.3381, "step": 9805 }, { "epoch": 1.5005355776587606, "grad_norm": 2.2067647222870383, "learning_rate": 3.0969003675759368e-06, "loss": 0.3135, "step": 9806 }, { "epoch": 1.5006885998469777, "grad_norm": 1.9066668807836955, "learning_rate": 3.0951075800410847e-06, "loss": 0.4619, "step": 9807 }, { "epoch": 1.5008416220351952, "grad_norm": 2.2749545171097, "learning_rate": 3.093315216562688e-06, "loss": 0.3464, "step": 9808 }, { "epoch": 1.5009946442234123, "grad_norm": 2.0411962491402984, "learning_rate": 3.0915232772508196e-06, "loss": 0.2835, "step": 9809 }, { "epoch": 1.5011476664116297, "grad_norm": 2.191472037931345, "learning_rate": 3.08973176221554e-06, "loss": 0.2974, "step": 9810 }, { "epoch": 1.501300688599847, "grad_norm": 2.1908312969139843, "learning_rate": 3.0879406715668623e-06, "loss": 0.3244, "step": 9811 }, { "epoch": 1.501453710788064, "grad_norm": 2.136787196257136, "learning_rate": 3.086150005414784e-06, "loss": 0.3084, "step": 9812 }, { "epoch": 1.5016067329762817, "grad_norm": 1.894927128405544, "learning_rate": 3.084359763869289e-06, "loss": 0.3001, "step": 9813 }, { "epoch": 1.5017597551644988, "grad_norm": 2.1745865419199646, "learning_rate": 3.0825699470403114e-06, "loss": 0.3858, "step": 9814 }, { "epoch": 1.501912777352716, "grad_norm": 1.9608382991991724, "learning_rate": 3.080780555037771e-06, "loss": 0.3078, "step": 9815 }, { "epoch": 1.5020657995409334, "grad_norm": 2.1040554688231343, "learning_rate": 3.0789915879715715e-06, "loss": 0.3482, "step": 9816 }, { "epoch": 1.5022188217291508, "grad_norm": 1.9905730959553891, "learning_rate": 3.077203045951567e-06, "loss": 0.277, "step": 9817 }, { "epoch": 1.502371843917368, "grad_norm": 2.1890814408914747, "learning_rate": 3.075414929087609e-06, "loss": 0.2968, "step": 9818 }, { "epoch": 1.5025248661055852, "grad_norm": 2.495997552639193, "learning_rate": 3.0736272374895115e-06, "loss": 0.2815, "step": 9819 }, { "epoch": 1.5026778882938026, "grad_norm": 2.0086629122191604, "learning_rate": 3.0718399712670566e-06, "loss": 0.2857, "step": 9820 }, { "epoch": 1.5028309104820199, "grad_norm": 2.2914428417288217, "learning_rate": 3.0700531305300153e-06, "loss": 0.3293, "step": 9821 }, { "epoch": 1.5029839326702372, "grad_norm": 2.1358875597800973, "learning_rate": 3.0682667153881228e-06, "loss": 0.273, "step": 9822 }, { "epoch": 1.5031369548584546, "grad_norm": 1.958831411297443, "learning_rate": 3.0664807259510842e-06, "loss": 0.3609, "step": 9823 }, { "epoch": 1.5032899770466717, "grad_norm": 2.0320226227226743, "learning_rate": 3.0646951623285904e-06, "loss": 0.2767, "step": 9824 }, { "epoch": 1.5034429992348892, "grad_norm": 2.2398279587072087, "learning_rate": 3.062910024630298e-06, "loss": 0.3316, "step": 9825 }, { "epoch": 1.5035960214231063, "grad_norm": 2.0793764036363735, "learning_rate": 3.061125312965838e-06, "loss": 0.2947, "step": 9826 }, { "epoch": 1.5037490436113237, "grad_norm": 2.350717021248169, "learning_rate": 3.059341027444819e-06, "loss": 0.3965, "step": 9827 }, { "epoch": 1.503902065799541, "grad_norm": 2.0080218668074705, "learning_rate": 3.057557168176819e-06, "loss": 0.4766, "step": 9828 }, { "epoch": 1.504055087987758, "grad_norm": 2.192980069727335, "learning_rate": 3.0557737352713925e-06, "loss": 0.3138, "step": 9829 }, { "epoch": 1.5042081101759757, "grad_norm": 2.277627119535517, "learning_rate": 3.0539907288380664e-06, "loss": 0.271, "step": 9830 }, { "epoch": 1.5043611323641928, "grad_norm": 2.060097566540218, "learning_rate": 3.0522081489863433e-06, "loss": 0.2744, "step": 9831 }, { "epoch": 1.50451415455241, "grad_norm": 2.1953025152712855, "learning_rate": 3.0504259958256966e-06, "loss": 0.2951, "step": 9832 }, { "epoch": 1.5046671767406274, "grad_norm": 2.0592054511449187, "learning_rate": 3.0486442694655737e-06, "loss": 0.3091, "step": 9833 }, { "epoch": 1.5048201989288446, "grad_norm": 2.3291429300803124, "learning_rate": 3.0468629700154072e-06, "loss": 0.3384, "step": 9834 }, { "epoch": 1.504973221117062, "grad_norm": 2.1766038192419943, "learning_rate": 3.045082097584583e-06, "loss": 0.3497, "step": 9835 }, { "epoch": 1.5051262433052792, "grad_norm": 2.269668303049232, "learning_rate": 3.043301652282471e-06, "loss": 0.3362, "step": 9836 }, { "epoch": 1.5052792654934966, "grad_norm": 2.488874865174472, "learning_rate": 3.041521634218426e-06, "loss": 0.3862, "step": 9837 }, { "epoch": 1.5054322876817139, "grad_norm": 1.9458045410505225, "learning_rate": 3.0397420435017565e-06, "loss": 0.2328, "step": 9838 }, { "epoch": 1.505585309869931, "grad_norm": 2.125888170369278, "learning_rate": 3.0379628802417525e-06, "loss": 0.2961, "step": 9839 }, { "epoch": 1.5057383320581486, "grad_norm": 2.210814431359663, "learning_rate": 3.0361841445476914e-06, "loss": 0.3397, "step": 9840 }, { "epoch": 1.5058913542463657, "grad_norm": 2.1884877256421937, "learning_rate": 3.0344058365287977e-06, "loss": 0.3376, "step": 9841 }, { "epoch": 1.506044376434583, "grad_norm": 2.419550996918986, "learning_rate": 3.032627956294294e-06, "loss": 0.3823, "step": 9842 }, { "epoch": 1.5061973986228003, "grad_norm": 2.299565953799042, "learning_rate": 3.0308505039533675e-06, "loss": 0.3659, "step": 9843 }, { "epoch": 1.5063504208110174, "grad_norm": 2.213477786477071, "learning_rate": 3.0290734796151687e-06, "loss": 0.3586, "step": 9844 }, { "epoch": 1.506503442999235, "grad_norm": 2.1247224716904185, "learning_rate": 3.0272968833888407e-06, "loss": 0.3304, "step": 9845 }, { "epoch": 1.506656465187452, "grad_norm": 2.1254476906607307, "learning_rate": 3.0255207153834886e-06, "loss": 0.2704, "step": 9846 }, { "epoch": 1.5068094873756694, "grad_norm": 2.436520978872859, "learning_rate": 3.023744975708194e-06, "loss": 0.3755, "step": 9847 }, { "epoch": 1.5069625095638868, "grad_norm": 2.2069707007141095, "learning_rate": 3.021969664472012e-06, "loss": 0.2866, "step": 9848 }, { "epoch": 1.507115531752104, "grad_norm": 1.9306592406380063, "learning_rate": 3.0201947817839704e-06, "loss": 0.3086, "step": 9849 }, { "epoch": 1.5072685539403214, "grad_norm": 2.26216940279369, "learning_rate": 3.0184203277530723e-06, "loss": 0.38, "step": 9850 }, { "epoch": 1.5074215761285386, "grad_norm": 2.0832013755845, "learning_rate": 3.0166463024882943e-06, "loss": 0.3438, "step": 9851 }, { "epoch": 1.5075745983167559, "grad_norm": 2.2765780863751166, "learning_rate": 3.014872706098586e-06, "loss": 0.3067, "step": 9852 }, { "epoch": 1.5077276205049732, "grad_norm": 2.5699472822824223, "learning_rate": 3.0130995386928706e-06, "loss": 0.3503, "step": 9853 }, { "epoch": 1.5078806426931906, "grad_norm": 2.005607019002461, "learning_rate": 3.0113268003800456e-06, "loss": 0.2863, "step": 9854 }, { "epoch": 1.5080336648814079, "grad_norm": 1.9684914601471286, "learning_rate": 3.0095544912689822e-06, "loss": 0.2837, "step": 9855 }, { "epoch": 1.508186687069625, "grad_norm": 1.7614254445638609, "learning_rate": 3.007782611468524e-06, "loss": 0.3154, "step": 9856 }, { "epoch": 1.5083397092578426, "grad_norm": 2.0535358999385505, "learning_rate": 3.0060111610874886e-06, "loss": 0.2978, "step": 9857 }, { "epoch": 1.5084927314460597, "grad_norm": 2.2628065523811456, "learning_rate": 3.0042401402346687e-06, "loss": 0.3182, "step": 9858 }, { "epoch": 1.508645753634277, "grad_norm": 1.9545007110492567, "learning_rate": 3.0024695490188296e-06, "loss": 0.3188, "step": 9859 }, { "epoch": 1.5087987758224943, "grad_norm": 2.1848377979596627, "learning_rate": 3.00069938754871e-06, "loss": 0.3695, "step": 9860 }, { "epoch": 1.5089517980107114, "grad_norm": 2.1635874874215633, "learning_rate": 2.9989296559330215e-06, "loss": 0.3139, "step": 9861 }, { "epoch": 1.509104820198929, "grad_norm": 2.338481731423221, "learning_rate": 2.9971603542804495e-06, "loss": 0.3425, "step": 9862 }, { "epoch": 1.509257842387146, "grad_norm": 2.2282324761992554, "learning_rate": 2.995391482699661e-06, "loss": 0.3796, "step": 9863 }, { "epoch": 1.5094108645753634, "grad_norm": 2.276627350922109, "learning_rate": 2.9936230412992805e-06, "loss": 0.2884, "step": 9864 }, { "epoch": 1.5095638867635808, "grad_norm": 1.9414960546579652, "learning_rate": 2.9918550301879145e-06, "loss": 0.336, "step": 9865 }, { "epoch": 1.5097169089517979, "grad_norm": 2.1301779835444377, "learning_rate": 2.990087449474154e-06, "loss": 0.3303, "step": 9866 }, { "epoch": 1.5098699311400154, "grad_norm": 1.9945124667754917, "learning_rate": 2.9883202992665438e-06, "loss": 0.2838, "step": 9867 }, { "epoch": 1.5100229533282326, "grad_norm": 2.2782006257588474, "learning_rate": 2.986553579673609e-06, "loss": 0.3139, "step": 9868 }, { "epoch": 1.5101759755164499, "grad_norm": 1.9001158252077175, "learning_rate": 2.984787290803863e-06, "loss": 0.279, "step": 9869 }, { "epoch": 1.5103289977046672, "grad_norm": 1.9890668141893428, "learning_rate": 2.983021432765767e-06, "loss": 0.3103, "step": 9870 }, { "epoch": 1.5104820198928843, "grad_norm": 1.9261758653878254, "learning_rate": 2.9812560056677785e-06, "loss": 0.2946, "step": 9871 }, { "epoch": 1.5106350420811019, "grad_norm": 2.2334108039049054, "learning_rate": 2.9794910096183183e-06, "loss": 0.3224, "step": 9872 }, { "epoch": 1.510788064269319, "grad_norm": 2.1237274490949276, "learning_rate": 2.9777264447257748e-06, "loss": 0.3449, "step": 9873 }, { "epoch": 1.5109410864575363, "grad_norm": 2.1770374643161667, "learning_rate": 2.9759623110985236e-06, "loss": 0.3317, "step": 9874 }, { "epoch": 1.5110941086457537, "grad_norm": 2.4761340756648473, "learning_rate": 2.974198608844906e-06, "loss": 0.3783, "step": 9875 }, { "epoch": 1.5112471308339708, "grad_norm": 2.249465172824092, "learning_rate": 2.9724353380732364e-06, "loss": 0.4108, "step": 9876 }, { "epoch": 1.5114001530221883, "grad_norm": 2.1262704705662143, "learning_rate": 2.9706724988918043e-06, "loss": 0.2787, "step": 9877 }, { "epoch": 1.5115531752104054, "grad_norm": 2.608599424875475, "learning_rate": 2.968910091408873e-06, "loss": 0.3657, "step": 9878 }, { "epoch": 1.5117061973986228, "grad_norm": 2.0937507208713497, "learning_rate": 2.9671481157326785e-06, "loss": 0.273, "step": 9879 }, { "epoch": 1.51185921958684, "grad_norm": 1.946594115741587, "learning_rate": 2.9653865719714326e-06, "loss": 0.2656, "step": 9880 }, { "epoch": 1.5120122417750572, "grad_norm": 1.962879276420753, "learning_rate": 2.9636254602333147e-06, "loss": 0.2833, "step": 9881 }, { "epoch": 1.5121652639632748, "grad_norm": 2.6116433052824033, "learning_rate": 2.9618647806264856e-06, "loss": 0.315, "step": 9882 }, { "epoch": 1.5123182861514919, "grad_norm": 2.350676279516989, "learning_rate": 2.9601045332590727e-06, "loss": 0.3421, "step": 9883 }, { "epoch": 1.5124713083397092, "grad_norm": 1.989790085557811, "learning_rate": 2.9583447182391813e-06, "loss": 0.3153, "step": 9884 }, { "epoch": 1.5126243305279266, "grad_norm": 1.7572196885042126, "learning_rate": 2.9565853356748863e-06, "loss": 0.2197, "step": 9885 }, { "epoch": 1.5127773527161439, "grad_norm": 2.297276054650986, "learning_rate": 2.954826385674241e-06, "loss": 0.3149, "step": 9886 }, { "epoch": 1.5129303749043612, "grad_norm": 2.0684649483413184, "learning_rate": 2.9530678683452685e-06, "loss": 0.2807, "step": 9887 }, { "epoch": 1.5130833970925783, "grad_norm": 2.083082465518758, "learning_rate": 2.951309783795965e-06, "loss": 0.3097, "step": 9888 }, { "epoch": 1.5132364192807957, "grad_norm": 2.232352667798138, "learning_rate": 2.9495521321343035e-06, "loss": 0.2664, "step": 9889 }, { "epoch": 1.513389441469013, "grad_norm": 2.0746473923615874, "learning_rate": 2.947794913468226e-06, "loss": 0.3053, "step": 9890 }, { "epoch": 1.5135424636572303, "grad_norm": 2.248018803212016, "learning_rate": 2.9460381279056482e-06, "loss": 0.285, "step": 9891 }, { "epoch": 1.5136954858454477, "grad_norm": 2.091935846460022, "learning_rate": 2.9442817755544707e-06, "loss": 0.2858, "step": 9892 }, { "epoch": 1.5138485080336648, "grad_norm": 1.9566609105598456, "learning_rate": 2.942525856522549e-06, "loss": 0.3297, "step": 9893 }, { "epoch": 1.5140015302218823, "grad_norm": 2.137727077428057, "learning_rate": 2.9407703709177193e-06, "loss": 0.2879, "step": 9894 }, { "epoch": 1.5141545524100994, "grad_norm": 2.2093864848262124, "learning_rate": 2.939015318847804e-06, "loss": 0.2882, "step": 9895 }, { "epoch": 1.5143075745983168, "grad_norm": 2.095268692569443, "learning_rate": 2.9372607004205777e-06, "loss": 0.3523, "step": 9896 }, { "epoch": 1.514460596786534, "grad_norm": 2.4814418845151143, "learning_rate": 2.935506515743798e-06, "loss": 0.3139, "step": 9897 }, { "epoch": 1.5146136189747512, "grad_norm": 1.987799327564212, "learning_rate": 2.933752764925205e-06, "loss": 0.2749, "step": 9898 }, { "epoch": 1.5147666411629688, "grad_norm": 2.2456331156220917, "learning_rate": 2.931999448072492e-06, "loss": 0.3214, "step": 9899 }, { "epoch": 1.5149196633511859, "grad_norm": 2.0249520005224007, "learning_rate": 2.9302465652933476e-06, "loss": 0.2876, "step": 9900 }, { "epoch": 1.5150726855394032, "grad_norm": 1.9927759375396923, "learning_rate": 2.9284941166954207e-06, "loss": 0.2752, "step": 9901 }, { "epoch": 1.5152257077276206, "grad_norm": 1.8998708876323396, "learning_rate": 2.9267421023863274e-06, "loss": 0.3092, "step": 9902 }, { "epoch": 1.5153787299158377, "grad_norm": 2.1212028678752803, "learning_rate": 2.924990522473676e-06, "loss": 0.3021, "step": 9903 }, { "epoch": 1.5155317521040552, "grad_norm": 1.9764806188380462, "learning_rate": 2.923239377065038e-06, "loss": 0.2792, "step": 9904 }, { "epoch": 1.5156847742922723, "grad_norm": 1.919797871945186, "learning_rate": 2.9214886662679467e-06, "loss": 0.2539, "step": 9905 }, { "epoch": 1.5158377964804897, "grad_norm": 1.7930166385110369, "learning_rate": 2.9197383901899314e-06, "loss": 0.27, "step": 9906 }, { "epoch": 1.515990818668707, "grad_norm": 1.9817109015167513, "learning_rate": 2.9179885489384797e-06, "loss": 0.2365, "step": 9907 }, { "epoch": 1.516143840856924, "grad_norm": 2.177398279153643, "learning_rate": 2.916239142621057e-06, "loss": 0.2703, "step": 9908 }, { "epoch": 1.5162968630451417, "grad_norm": 2.1794166380623454, "learning_rate": 2.914490171345099e-06, "loss": 0.3407, "step": 9909 }, { "epoch": 1.5164498852333588, "grad_norm": 2.40714069176058, "learning_rate": 2.9127416352180195e-06, "loss": 0.3685, "step": 9910 }, { "epoch": 1.516602907421576, "grad_norm": 1.8834924452301767, "learning_rate": 2.9109935343472007e-06, "loss": 0.3248, "step": 9911 }, { "epoch": 1.5167559296097934, "grad_norm": 2.1304268074488304, "learning_rate": 2.9092458688399983e-06, "loss": 0.2673, "step": 9912 }, { "epoch": 1.5169089517980106, "grad_norm": 2.299973146784591, "learning_rate": 2.907498638803753e-06, "loss": 0.3088, "step": 9913 }, { "epoch": 1.517061973986228, "grad_norm": 2.0890436295813135, "learning_rate": 2.905751844345759e-06, "loss": 0.2982, "step": 9914 }, { "epoch": 1.5172149961744452, "grad_norm": 2.243008873416047, "learning_rate": 2.9040054855732934e-06, "loss": 0.3665, "step": 9915 }, { "epoch": 1.5173680183626626, "grad_norm": 2.231300366616912, "learning_rate": 2.902259562593618e-06, "loss": 0.3656, "step": 9916 }, { "epoch": 1.5175210405508799, "grad_norm": 2.314260763210238, "learning_rate": 2.900514075513945e-06, "loss": 0.3373, "step": 9917 }, { "epoch": 1.5176740627390972, "grad_norm": 2.2686857788300676, "learning_rate": 2.8987690244414735e-06, "loss": 0.3182, "step": 9918 }, { "epoch": 1.5178270849273146, "grad_norm": 2.0332689531706256, "learning_rate": 2.8970244094833834e-06, "loss": 0.2877, "step": 9919 }, { "epoch": 1.5179801071155317, "grad_norm": 2.332225199392639, "learning_rate": 2.895280230746804e-06, "loss": 0.3246, "step": 9920 }, { "epoch": 1.518133129303749, "grad_norm": 2.213902332927608, "learning_rate": 2.893536488338863e-06, "loss": 0.2677, "step": 9921 }, { "epoch": 1.5182861514919663, "grad_norm": 2.058077346563243, "learning_rate": 2.89179318236665e-06, "loss": 0.3128, "step": 9922 }, { "epoch": 1.5184391736801837, "grad_norm": 2.1206458507799684, "learning_rate": 2.890050312937218e-06, "loss": 0.2985, "step": 9923 }, { "epoch": 1.518592195868401, "grad_norm": 2.5255851317820666, "learning_rate": 2.8883078801576148e-06, "loss": 0.3492, "step": 9924 }, { "epoch": 1.518745218056618, "grad_norm": 2.239478346754188, "learning_rate": 2.886565884134849e-06, "loss": 0.3186, "step": 9925 }, { "epoch": 1.5188982402448357, "grad_norm": 2.1589701070470655, "learning_rate": 2.8848243249758934e-06, "loss": 0.2861, "step": 9926 }, { "epoch": 1.5190512624330528, "grad_norm": 2.038957431232218, "learning_rate": 2.8830832027877134e-06, "loss": 0.3916, "step": 9927 }, { "epoch": 1.51920428462127, "grad_norm": 2.290410983887635, "learning_rate": 2.8813425176772357e-06, "loss": 0.3217, "step": 9928 }, { "epoch": 1.5193573068094874, "grad_norm": 2.1377659989683018, "learning_rate": 2.8796022697513627e-06, "loss": 0.3644, "step": 9929 }, { "epoch": 1.5195103289977046, "grad_norm": 2.1557974796471977, "learning_rate": 2.87786245911697e-06, "loss": 0.3259, "step": 9930 }, { "epoch": 1.519663351185922, "grad_norm": 2.197537522420924, "learning_rate": 2.876123085880904e-06, "loss": 0.3123, "step": 9931 }, { "epoch": 1.5198163733741392, "grad_norm": 1.8849797423696186, "learning_rate": 2.874384150149989e-06, "loss": 0.2838, "step": 9932 }, { "epoch": 1.5199693955623566, "grad_norm": 2.355866028332641, "learning_rate": 2.87264565203102e-06, "loss": 0.3106, "step": 9933 }, { "epoch": 1.5201224177505739, "grad_norm": 2.4298439369368086, "learning_rate": 2.8709075916307626e-06, "loss": 0.3564, "step": 9934 }, { "epoch": 1.520275439938791, "grad_norm": 2.138768183921702, "learning_rate": 2.8691699690559594e-06, "loss": 0.3101, "step": 9935 }, { "epoch": 1.5204284621270086, "grad_norm": 2.1084569658813064, "learning_rate": 2.8674327844133243e-06, "loss": 0.3014, "step": 9936 }, { "epoch": 1.5205814843152257, "grad_norm": 1.894295714318006, "learning_rate": 2.8656960378095444e-06, "loss": 0.2854, "step": 9937 }, { "epoch": 1.520734506503443, "grad_norm": 2.328776255293014, "learning_rate": 2.863959729351281e-06, "loss": 0.3072, "step": 9938 }, { "epoch": 1.5208875286916603, "grad_norm": 2.4823991572727184, "learning_rate": 2.8622238591451668e-06, "loss": 0.3271, "step": 9939 }, { "epoch": 1.5210405508798774, "grad_norm": 2.054904895081333, "learning_rate": 2.8604884272978083e-06, "loss": 0.2628, "step": 9940 }, { "epoch": 1.521193573068095, "grad_norm": 2.0507676713357705, "learning_rate": 2.8587534339157854e-06, "loss": 0.3012, "step": 9941 }, { "epoch": 1.521346595256312, "grad_norm": 2.174337355448324, "learning_rate": 2.8570188791056507e-06, "loss": 0.3171, "step": 9942 }, { "epoch": 1.5214996174445294, "grad_norm": 1.8669974265756857, "learning_rate": 2.855284762973931e-06, "loss": 0.2611, "step": 9943 }, { "epoch": 1.5216526396327468, "grad_norm": 2.063747456601812, "learning_rate": 2.8535510856271207e-06, "loss": 0.3354, "step": 9944 }, { "epoch": 1.5218056618209639, "grad_norm": 2.4098971885429474, "learning_rate": 2.8518178471717008e-06, "loss": 0.3642, "step": 9945 }, { "epoch": 1.5219586840091814, "grad_norm": 2.3625660024538857, "learning_rate": 2.8500850477141086e-06, "loss": 0.3025, "step": 9946 }, { "epoch": 1.5221117061973986, "grad_norm": 2.3558171194167774, "learning_rate": 2.8483526873607603e-06, "loss": 0.3086, "step": 9947 }, { "epoch": 1.5222647283856159, "grad_norm": 2.101587584902661, "learning_rate": 2.8466207662180578e-06, "loss": 0.2378, "step": 9948 }, { "epoch": 1.5224177505738332, "grad_norm": 2.147038880991046, "learning_rate": 2.8448892843923513e-06, "loss": 0.3382, "step": 9949 }, { "epoch": 1.5225707727620506, "grad_norm": 2.1708700692575142, "learning_rate": 2.8431582419899894e-06, "loss": 0.3196, "step": 9950 }, { "epoch": 1.5227237949502679, "grad_norm": 2.2319621602439548, "learning_rate": 2.841427639117279e-06, "loss": 0.2803, "step": 9951 }, { "epoch": 1.522876817138485, "grad_norm": 1.840557601791769, "learning_rate": 2.839697475880496e-06, "loss": 0.2953, "step": 9952 }, { "epoch": 1.5230298393267023, "grad_norm": 1.9537839576314175, "learning_rate": 2.837967752385906e-06, "loss": 0.3168, "step": 9953 }, { "epoch": 1.5231828615149197, "grad_norm": 1.9900787991732978, "learning_rate": 2.836238468739737e-06, "loss": 0.3043, "step": 9954 }, { "epoch": 1.523335883703137, "grad_norm": 2.0738800146254412, "learning_rate": 2.834509625048182e-06, "loss": 0.2901, "step": 9955 }, { "epoch": 1.5234889058913543, "grad_norm": 2.270382910441135, "learning_rate": 2.8327812214174265e-06, "loss": 0.301, "step": 9956 }, { "epoch": 1.5236419280795714, "grad_norm": 1.9818118098041126, "learning_rate": 2.831053257953613e-06, "loss": 0.2977, "step": 9957 }, { "epoch": 1.523794950267789, "grad_norm": 2.233450766833048, "learning_rate": 2.8293257347628655e-06, "loss": 0.3289, "step": 9958 }, { "epoch": 1.523947972456006, "grad_norm": 2.298257535897543, "learning_rate": 2.8275986519512753e-06, "loss": 0.3549, "step": 9959 }, { "epoch": 1.5241009946442234, "grad_norm": 2.2911297653055316, "learning_rate": 2.8258720096249116e-06, "loss": 0.3306, "step": 9960 }, { "epoch": 1.5242540168324408, "grad_norm": 2.0928809609607075, "learning_rate": 2.824145807889812e-06, "loss": 0.3591, "step": 9961 }, { "epoch": 1.5244070390206579, "grad_norm": 2.085267934794212, "learning_rate": 2.8224200468519914e-06, "loss": 0.2872, "step": 9962 }, { "epoch": 1.5245600612088754, "grad_norm": 2.441486160019589, "learning_rate": 2.8206947266174346e-06, "loss": 0.3302, "step": 9963 }, { "epoch": 1.5247130833970926, "grad_norm": 2.1910546788096297, "learning_rate": 2.818969847292099e-06, "loss": 0.2417, "step": 9964 }, { "epoch": 1.5248661055853099, "grad_norm": 2.224279279401526, "learning_rate": 2.817245408981919e-06, "loss": 0.2423, "step": 9965 }, { "epoch": 1.5250191277735272, "grad_norm": 2.1629445091645914, "learning_rate": 2.8155214117927964e-06, "loss": 0.3308, "step": 9966 }, { "epoch": 1.5251721499617443, "grad_norm": 1.8555635506479633, "learning_rate": 2.81379785583061e-06, "loss": 0.2878, "step": 9967 }, { "epoch": 1.5253251721499619, "grad_norm": 1.9671814004381067, "learning_rate": 2.8120747412012096e-06, "loss": 0.285, "step": 9968 }, { "epoch": 1.525478194338179, "grad_norm": 2.16675648292726, "learning_rate": 2.810352068010419e-06, "loss": 0.3092, "step": 9969 }, { "epoch": 1.5256312165263963, "grad_norm": 2.2905687791113416, "learning_rate": 2.808629836364034e-06, "loss": 0.3053, "step": 9970 }, { "epoch": 1.5257842387146137, "grad_norm": 1.9946227786642385, "learning_rate": 2.806908046367823e-06, "loss": 0.2843, "step": 9971 }, { "epoch": 1.5259372609028308, "grad_norm": 2.174909387644281, "learning_rate": 2.8051866981275298e-06, "loss": 0.3053, "step": 9972 }, { "epoch": 1.5260902830910483, "grad_norm": 2.359615508272599, "learning_rate": 2.803465791748864e-06, "loss": 0.281, "step": 9973 }, { "epoch": 1.5262433052792654, "grad_norm": 2.2931829206578898, "learning_rate": 2.8017453273375227e-06, "loss": 0.3119, "step": 9974 }, { "epoch": 1.5263963274674828, "grad_norm": 2.2215604585576694, "learning_rate": 2.8000253049991577e-06, "loss": 0.3205, "step": 9975 }, { "epoch": 1.5265493496557, "grad_norm": 2.576249260369154, "learning_rate": 2.798305724839402e-06, "loss": 0.351, "step": 9976 }, { "epoch": 1.5267023718439172, "grad_norm": 2.59817822012346, "learning_rate": 2.796586586963871e-06, "loss": 0.3171, "step": 9977 }, { "epoch": 1.5268553940321348, "grad_norm": 2.8692211062905124, "learning_rate": 2.7948678914781347e-06, "loss": 0.3653, "step": 9978 }, { "epoch": 1.5270084162203519, "grad_norm": 2.1262724881376536, "learning_rate": 2.7931496384877443e-06, "loss": 0.2916, "step": 9979 }, { "epoch": 1.5271614384085692, "grad_norm": 1.8299487915344057, "learning_rate": 2.7914318280982346e-06, "loss": 0.2591, "step": 9980 }, { "epoch": 1.5273144605967865, "grad_norm": 2.3353736019494318, "learning_rate": 2.7897144604150907e-06, "loss": 0.3522, "step": 9981 }, { "epoch": 1.5274674827850037, "grad_norm": 2.333490426475123, "learning_rate": 2.7879975355437904e-06, "loss": 0.3201, "step": 9982 }, { "epoch": 1.5276205049732212, "grad_norm": 2.195731537268738, "learning_rate": 2.786281053589779e-06, "loss": 0.3418, "step": 9983 }, { "epoch": 1.5277735271614383, "grad_norm": 2.755496101595831, "learning_rate": 2.7845650146584626e-06, "loss": 0.3516, "step": 9984 }, { "epoch": 1.5279265493496557, "grad_norm": 2.3936080171100325, "learning_rate": 2.782849418855238e-06, "loss": 0.3365, "step": 9985 }, { "epoch": 1.528079571537873, "grad_norm": 2.258254891109735, "learning_rate": 2.7811342662854636e-06, "loss": 0.3154, "step": 9986 }, { "epoch": 1.5282325937260903, "grad_norm": 2.3318371299600336, "learning_rate": 2.7794195570544745e-06, "loss": 0.2555, "step": 9987 }, { "epoch": 1.5283856159143077, "grad_norm": 2.022711659963486, "learning_rate": 2.7777052912675785e-06, "loss": 0.325, "step": 9988 }, { "epoch": 1.5285386381025248, "grad_norm": 2.2626567628611745, "learning_rate": 2.7759914690300536e-06, "loss": 0.3974, "step": 9989 }, { "epoch": 1.528691660290742, "grad_norm": 2.236395178465297, "learning_rate": 2.7742780904471536e-06, "loss": 0.3225, "step": 9990 }, { "epoch": 1.5288446824789594, "grad_norm": 2.183933947211389, "learning_rate": 2.772565155624103e-06, "loss": 0.3282, "step": 9991 }, { "epoch": 1.5289977046671768, "grad_norm": 2.3450557515589407, "learning_rate": 2.7708526646660993e-06, "loss": 0.3569, "step": 9992 }, { "epoch": 1.529150726855394, "grad_norm": 1.7888436151823426, "learning_rate": 2.769140617678315e-06, "loss": 0.2846, "step": 9993 }, { "epoch": 1.5293037490436112, "grad_norm": 2.0381056570029945, "learning_rate": 2.767429014765889e-06, "loss": 0.3703, "step": 9994 }, { "epoch": 1.5294567712318288, "grad_norm": 2.2819477215176645, "learning_rate": 2.7657178560339483e-06, "loss": 0.2884, "step": 9995 }, { "epoch": 1.5296097934200459, "grad_norm": 2.3538836527548694, "learning_rate": 2.7640071415875703e-06, "loss": 0.2908, "step": 9996 }, { "epoch": 1.5297628156082632, "grad_norm": 2.017525350368832, "learning_rate": 2.762296871531819e-06, "loss": 0.369, "step": 9997 }, { "epoch": 1.5299158377964805, "grad_norm": 2.146410794652233, "learning_rate": 2.7605870459717367e-06, "loss": 0.3486, "step": 9998 }, { "epoch": 1.5300688599846977, "grad_norm": 2.170593065314875, "learning_rate": 2.7588776650123215e-06, "loss": 0.312, "step": 9999 }, { "epoch": 1.5302218821729152, "grad_norm": 2.277094271624863, "learning_rate": 2.7571687287585524e-06, "loss": 0.3413, "step": 10000 }, { "epoch": 1.5303749043611323, "grad_norm": 2.468686944978311, "learning_rate": 2.7554602373153938e-06, "loss": 0.3502, "step": 10001 }, { "epoch": 1.5305279265493497, "grad_norm": 2.378155880261049, "learning_rate": 2.7537521907877553e-06, "loss": 0.3338, "step": 10002 }, { "epoch": 1.530680948737567, "grad_norm": 2.177448943408823, "learning_rate": 2.7520445892805457e-06, "loss": 0.3183, "step": 10003 }, { "epoch": 1.530833970925784, "grad_norm": 2.3647380308511594, "learning_rate": 2.7503374328986355e-06, "loss": 0.359, "step": 10004 }, { "epoch": 1.5309869931140017, "grad_norm": 2.137209652082463, "learning_rate": 2.748630721746858e-06, "loss": 0.324, "step": 10005 }, { "epoch": 1.5311400153022188, "grad_norm": 2.421214324585907, "learning_rate": 2.7469244559300377e-06, "loss": 0.3415, "step": 10006 }, { "epoch": 1.531293037490436, "grad_norm": 2.1578732014508675, "learning_rate": 2.7452186355529642e-06, "loss": 0.3266, "step": 10007 }, { "epoch": 1.5314460596786534, "grad_norm": 2.110705447832477, "learning_rate": 2.7435132607203896e-06, "loss": 0.3343, "step": 10008 }, { "epoch": 1.5315990818668705, "grad_norm": 2.1867109539775305, "learning_rate": 2.7418083315370557e-06, "loss": 0.2972, "step": 10009 }, { "epoch": 1.531752104055088, "grad_norm": 2.1025768332337798, "learning_rate": 2.740103848107667e-06, "loss": 0.3167, "step": 10010 }, { "epoch": 1.5319051262433052, "grad_norm": 1.956002678795125, "learning_rate": 2.7383998105369014e-06, "loss": 0.3239, "step": 10011 }, { "epoch": 1.5320581484315225, "grad_norm": 2.234365363174236, "learning_rate": 2.736696218929411e-06, "loss": 0.3692, "step": 10012 }, { "epoch": 1.5322111706197399, "grad_norm": 2.360613287989332, "learning_rate": 2.73499307338982e-06, "loss": 0.3553, "step": 10013 }, { "epoch": 1.532364192807957, "grad_norm": 2.0106390616987166, "learning_rate": 2.7332903740227256e-06, "loss": 0.2996, "step": 10014 }, { "epoch": 1.5325172149961745, "grad_norm": 1.772493142680713, "learning_rate": 2.7315881209326967e-06, "loss": 0.2977, "step": 10015 }, { "epoch": 1.5326702371843917, "grad_norm": 2.069526077229527, "learning_rate": 2.729886314224275e-06, "loss": 0.3248, "step": 10016 }, { "epoch": 1.532823259372609, "grad_norm": 2.1173816114427004, "learning_rate": 2.7281849540019755e-06, "loss": 0.2778, "step": 10017 }, { "epoch": 1.5329762815608263, "grad_norm": 2.4353246203851686, "learning_rate": 2.726484040370286e-06, "loss": 0.3159, "step": 10018 }, { "epoch": 1.5331293037490437, "grad_norm": 2.080059375980662, "learning_rate": 2.724783573433666e-06, "loss": 0.2722, "step": 10019 }, { "epoch": 1.533282325937261, "grad_norm": 2.231678350736275, "learning_rate": 2.7230835532965473e-06, "loss": 0.2778, "step": 10020 }, { "epoch": 1.533435348125478, "grad_norm": 2.077749797828933, "learning_rate": 2.7213839800633346e-06, "loss": 0.2966, "step": 10021 }, { "epoch": 1.5335883703136954, "grad_norm": 2.3270159778273998, "learning_rate": 2.719684853838406e-06, "loss": 0.3374, "step": 10022 }, { "epoch": 1.5337413925019128, "grad_norm": 2.210011146928926, "learning_rate": 2.717986174726108e-06, "loss": 0.3091, "step": 10023 }, { "epoch": 1.53389441469013, "grad_norm": 2.2217583618277064, "learning_rate": 2.716287942830773e-06, "loss": 0.3315, "step": 10024 }, { "epoch": 1.5340474368783474, "grad_norm": 2.041482697435787, "learning_rate": 2.714590158256687e-06, "loss": 0.2848, "step": 10025 }, { "epoch": 1.5342004590665645, "grad_norm": 2.1043763796800947, "learning_rate": 2.7128928211081153e-06, "loss": 0.2653, "step": 10026 }, { "epoch": 1.534353481254782, "grad_norm": 2.154308009610621, "learning_rate": 2.71119593148931e-06, "loss": 0.3251, "step": 10027 }, { "epoch": 1.5345065034429992, "grad_norm": 2.1032935774833383, "learning_rate": 2.7094994895044736e-06, "loss": 0.3703, "step": 10028 }, { "epoch": 1.5346595256312165, "grad_norm": 2.290845349239085, "learning_rate": 2.7078034952577905e-06, "loss": 0.3318, "step": 10029 }, { "epoch": 1.5348125478194339, "grad_norm": 2.1385725936478908, "learning_rate": 2.706107948853428e-06, "loss": 0.3057, "step": 10030 }, { "epoch": 1.534965570007651, "grad_norm": 2.129033724441874, "learning_rate": 2.704412850395505e-06, "loss": 0.2826, "step": 10031 }, { "epoch": 1.5351185921958685, "grad_norm": 2.1770308821410462, "learning_rate": 2.7027181999881326e-06, "loss": 0.4557, "step": 10032 }, { "epoch": 1.5352716143840857, "grad_norm": 1.7528694429046794, "learning_rate": 2.701023997735385e-06, "loss": 0.2957, "step": 10033 }, { "epoch": 1.535424636572303, "grad_norm": 2.167855205306468, "learning_rate": 2.6993302437413006e-06, "loss": 0.3485, "step": 10034 }, { "epoch": 1.5355776587605203, "grad_norm": 2.3501051359832354, "learning_rate": 2.697636938109911e-06, "loss": 0.3644, "step": 10035 }, { "epoch": 1.5357306809487374, "grad_norm": 1.9241981996516704, "learning_rate": 2.6959440809452055e-06, "loss": 0.2791, "step": 10036 }, { "epoch": 1.535883703136955, "grad_norm": 1.9695651915181258, "learning_rate": 2.6942516723511424e-06, "loss": 0.3818, "step": 10037 }, { "epoch": 1.536036725325172, "grad_norm": 1.8640844680454627, "learning_rate": 2.6925597124316673e-06, "loss": 0.29, "step": 10038 }, { "epoch": 1.5361897475133894, "grad_norm": 2.6503153964138466, "learning_rate": 2.6908682012906874e-06, "loss": 0.2854, "step": 10039 }, { "epoch": 1.5363427697016068, "grad_norm": 2.1630247633251076, "learning_rate": 2.689177139032084e-06, "loss": 0.2942, "step": 10040 }, { "epoch": 1.5364957918898239, "grad_norm": 2.031261244127211, "learning_rate": 2.6874865257597126e-06, "loss": 0.2848, "step": 10041 }, { "epoch": 1.5366488140780414, "grad_norm": 1.9718912748795485, "learning_rate": 2.685796361577402e-06, "loss": 0.2793, "step": 10042 }, { "epoch": 1.5368018362662585, "grad_norm": 2.306037088480719, "learning_rate": 2.684106646588949e-06, "loss": 0.32, "step": 10043 }, { "epoch": 1.5369548584544759, "grad_norm": 2.270018705118447, "learning_rate": 2.682417380898126e-06, "loss": 0.397, "step": 10044 }, { "epoch": 1.5371078806426932, "grad_norm": 1.9460779455142152, "learning_rate": 2.680728564608679e-06, "loss": 0.3066, "step": 10045 }, { "epoch": 1.5372609028309103, "grad_norm": 2.040083304159909, "learning_rate": 2.679040197824324e-06, "loss": 0.3022, "step": 10046 }, { "epoch": 1.5374139250191279, "grad_norm": 1.698927479185634, "learning_rate": 2.6773522806487494e-06, "loss": 0.2381, "step": 10047 }, { "epoch": 1.537566947207345, "grad_norm": 2.159105375673701, "learning_rate": 2.675664813185619e-06, "loss": 0.3544, "step": 10048 }, { "epoch": 1.5377199693955623, "grad_norm": 2.1797933921292647, "learning_rate": 2.673977795538565e-06, "loss": 0.3073, "step": 10049 }, { "epoch": 1.5378729915837797, "grad_norm": 2.1427002258465944, "learning_rate": 2.6722912278111936e-06, "loss": 0.3055, "step": 10050 }, { "epoch": 1.538026013771997, "grad_norm": 2.273707572216091, "learning_rate": 2.670605110107084e-06, "loss": 0.3434, "step": 10051 }, { "epoch": 1.5381790359602143, "grad_norm": 2.0306393620915353, "learning_rate": 2.668919442529785e-06, "loss": 0.2714, "step": 10052 }, { "epoch": 1.5383320581484314, "grad_norm": 2.1099140870250404, "learning_rate": 2.6672342251828274e-06, "loss": 0.2864, "step": 10053 }, { "epoch": 1.5384850803366488, "grad_norm": 2.9079589709237963, "learning_rate": 2.6655494581696994e-06, "loss": 0.4068, "step": 10054 }, { "epoch": 1.538638102524866, "grad_norm": 1.9689432719376831, "learning_rate": 2.6638651415938697e-06, "loss": 0.3411, "step": 10055 }, { "epoch": 1.5387911247130834, "grad_norm": 2.114549094429897, "learning_rate": 2.662181275558786e-06, "loss": 0.307, "step": 10056 }, { "epoch": 1.5389441469013008, "grad_norm": 2.057594249117251, "learning_rate": 2.6604978601678523e-06, "loss": 0.2666, "step": 10057 }, { "epoch": 1.5390971690895179, "grad_norm": 2.232955568610074, "learning_rate": 2.658814895524455e-06, "loss": 0.3322, "step": 10058 }, { "epoch": 1.5392501912777354, "grad_norm": 2.2654884542844154, "learning_rate": 2.6571323817319593e-06, "loss": 0.3371, "step": 10059 }, { "epoch": 1.5394032134659525, "grad_norm": 2.4057706843267748, "learning_rate": 2.6554503188936844e-06, "loss": 0.3654, "step": 10060 }, { "epoch": 1.5395562356541699, "grad_norm": 2.1902711910222266, "learning_rate": 2.6537687071129404e-06, "loss": 0.3098, "step": 10061 }, { "epoch": 1.5397092578423872, "grad_norm": 2.073188025318028, "learning_rate": 2.6520875464930006e-06, "loss": 0.2854, "step": 10062 }, { "epoch": 1.5398622800306043, "grad_norm": 2.0989366632201287, "learning_rate": 2.650406837137104e-06, "loss": 0.3067, "step": 10063 }, { "epoch": 1.5400153022188219, "grad_norm": 2.2188949698155147, "learning_rate": 2.6487265791484795e-06, "loss": 0.2936, "step": 10064 }, { "epoch": 1.540168324407039, "grad_norm": 2.263393577409095, "learning_rate": 2.6470467726303163e-06, "loss": 0.3334, "step": 10065 }, { "epoch": 1.5403213465952563, "grad_norm": 2.1229052913443023, "learning_rate": 2.6453674176857693e-06, "loss": 0.3446, "step": 10066 }, { "epoch": 1.5404743687834737, "grad_norm": 2.0820187078812995, "learning_rate": 2.6436885144179848e-06, "loss": 0.36, "step": 10067 }, { "epoch": 1.5406273909716908, "grad_norm": 2.036822752359246, "learning_rate": 2.6420100629300648e-06, "loss": 0.3523, "step": 10068 }, { "epoch": 1.5407804131599083, "grad_norm": 2.118531167022728, "learning_rate": 2.640332063325093e-06, "loss": 0.2695, "step": 10069 }, { "epoch": 1.5409334353481254, "grad_norm": 2.1807415957671457, "learning_rate": 2.6386545157061207e-06, "loss": 0.3469, "step": 10070 }, { "epoch": 1.5410864575363428, "grad_norm": 2.1707900822925947, "learning_rate": 2.636977420176171e-06, "loss": 0.2875, "step": 10071 }, { "epoch": 1.54123947972456, "grad_norm": 2.4577498652524254, "learning_rate": 2.635300776838243e-06, "loss": 0.3673, "step": 10072 }, { "epoch": 1.5413925019127772, "grad_norm": 2.0434551137883297, "learning_rate": 2.633624585795305e-06, "loss": 0.3203, "step": 10073 }, { "epoch": 1.5415455241009948, "grad_norm": 1.7411301508901227, "learning_rate": 2.6319488471502984e-06, "loss": 0.2563, "step": 10074 }, { "epoch": 1.5416985462892119, "grad_norm": 2.1478712253541206, "learning_rate": 2.630273561006138e-06, "loss": 0.295, "step": 10075 }, { "epoch": 1.5418515684774292, "grad_norm": 1.90429011588242, "learning_rate": 2.6285987274657045e-06, "loss": 0.2753, "step": 10076 }, { "epoch": 1.5420045906656465, "grad_norm": 2.1754047440005437, "learning_rate": 2.6269243466318673e-06, "loss": 0.3106, "step": 10077 }, { "epoch": 1.5421576128538637, "grad_norm": 2.2461452419660524, "learning_rate": 2.625250418607446e-06, "loss": 0.3741, "step": 10078 }, { "epoch": 1.5423106350420812, "grad_norm": 2.193766353742696, "learning_rate": 2.623576943495244e-06, "loss": 0.3146, "step": 10079 }, { "epoch": 1.5424636572302983, "grad_norm": 1.7423098728249427, "learning_rate": 2.6219039213980445e-06, "loss": 0.2811, "step": 10080 }, { "epoch": 1.5426166794185157, "grad_norm": 1.961978767244435, "learning_rate": 2.620231352418585e-06, "loss": 0.5452, "step": 10081 }, { "epoch": 1.542769701606733, "grad_norm": 2.062082207863998, "learning_rate": 2.618559236659586e-06, "loss": 0.3589, "step": 10082 }, { "epoch": 1.54292272379495, "grad_norm": 2.0213839010709878, "learning_rate": 2.6168875742237454e-06, "loss": 0.288, "step": 10083 }, { "epoch": 1.5430757459831677, "grad_norm": 2.0088016852578936, "learning_rate": 2.6152163652137165e-06, "loss": 0.2905, "step": 10084 }, { "epoch": 1.5432287681713848, "grad_norm": 1.8323811164141244, "learning_rate": 2.613545609732142e-06, "loss": 0.3337, "step": 10085 }, { "epoch": 1.543381790359602, "grad_norm": 2.0627168321060365, "learning_rate": 2.6118753078816315e-06, "loss": 0.3066, "step": 10086 }, { "epoch": 1.5435348125478194, "grad_norm": 2.2910285838159834, "learning_rate": 2.610205459764755e-06, "loss": 0.3516, "step": 10087 }, { "epoch": 1.5436878347360368, "grad_norm": 1.9985040435482564, "learning_rate": 2.608536065484073e-06, "loss": 0.3262, "step": 10088 }, { "epoch": 1.543840856924254, "grad_norm": 2.2496865624069855, "learning_rate": 2.606867125142107e-06, "loss": 0.3033, "step": 10089 }, { "epoch": 1.5439938791124712, "grad_norm": 2.241097444447969, "learning_rate": 2.605198638841353e-06, "loss": 0.3357, "step": 10090 }, { "epoch": 1.5441469013006885, "grad_norm": 2.1992885647227625, "learning_rate": 2.603530606684279e-06, "loss": 0.3333, "step": 10091 }, { "epoch": 1.5442999234889059, "grad_norm": 1.9012812969047344, "learning_rate": 2.601863028773326e-06, "loss": 0.2586, "step": 10092 }, { "epoch": 1.5444529456771232, "grad_norm": 1.8123047465528561, "learning_rate": 2.600195905210905e-06, "loss": 0.2823, "step": 10093 }, { "epoch": 1.5446059678653405, "grad_norm": 2.3152030446910787, "learning_rate": 2.598529236099403e-06, "loss": 0.3443, "step": 10094 }, { "epoch": 1.5447589900535577, "grad_norm": 2.1743439640613484, "learning_rate": 2.596863021541175e-06, "loss": 0.3019, "step": 10095 }, { "epoch": 1.5449120122417752, "grad_norm": 1.9172471293093292, "learning_rate": 2.595197261638549e-06, "loss": 0.2936, "step": 10096 }, { "epoch": 1.5450650344299923, "grad_norm": 1.9253862639636048, "learning_rate": 2.5935319564938275e-06, "loss": 0.2985, "step": 10097 }, { "epoch": 1.5452180566182097, "grad_norm": 1.9430570051191849, "learning_rate": 2.5918671062092836e-06, "loss": 0.2914, "step": 10098 }, { "epoch": 1.545371078806427, "grad_norm": 2.2577123270161428, "learning_rate": 2.590202710887161e-06, "loss": 0.3272, "step": 10099 }, { "epoch": 1.545524100994644, "grad_norm": 2.2632677012722895, "learning_rate": 2.588538770629677e-06, "loss": 0.2962, "step": 10100 }, { "epoch": 1.5456771231828617, "grad_norm": 1.9504870317490723, "learning_rate": 2.586875285539021e-06, "loss": 0.2772, "step": 10101 }, { "epoch": 1.5458301453710788, "grad_norm": 1.9342150897613446, "learning_rate": 2.5852122557173542e-06, "loss": 0.3467, "step": 10102 }, { "epoch": 1.545983167559296, "grad_norm": 1.951851157913786, "learning_rate": 2.5835496812668095e-06, "loss": 0.2735, "step": 10103 }, { "epoch": 1.5461361897475134, "grad_norm": 2.142026102977652, "learning_rate": 2.581887562289491e-06, "loss": 0.2972, "step": 10104 }, { "epoch": 1.5462892119357305, "grad_norm": 2.193269441158662, "learning_rate": 2.5802258988874762e-06, "loss": 0.3203, "step": 10105 }, { "epoch": 1.546442234123948, "grad_norm": 1.8676996400343906, "learning_rate": 2.5785646911628193e-06, "loss": 0.3037, "step": 10106 }, { "epoch": 1.5465952563121652, "grad_norm": 2.093923674004089, "learning_rate": 2.5769039392175353e-06, "loss": 0.3632, "step": 10107 }, { "epoch": 1.5467482785003825, "grad_norm": 1.9006140368770028, "learning_rate": 2.5752436431536174e-06, "loss": 0.2753, "step": 10108 }, { "epoch": 1.5469013006885999, "grad_norm": 2.2120676657834943, "learning_rate": 2.573583803073039e-06, "loss": 0.2726, "step": 10109 }, { "epoch": 1.547054322876817, "grad_norm": 2.1076883294477424, "learning_rate": 2.571924419077728e-06, "loss": 0.3216, "step": 10110 }, { "epoch": 1.5472073450650345, "grad_norm": 2.0232240951285716, "learning_rate": 2.5702654912695945e-06, "loss": 0.3267, "step": 10111 }, { "epoch": 1.5473603672532517, "grad_norm": 2.373217544040392, "learning_rate": 2.568607019750529e-06, "loss": 0.3187, "step": 10112 }, { "epoch": 1.547513389441469, "grad_norm": 2.182283013798894, "learning_rate": 2.5669490046223713e-06, "loss": 0.3073, "step": 10113 }, { "epoch": 1.5476664116296863, "grad_norm": 1.9217437998228828, "learning_rate": 2.5652914459869573e-06, "loss": 0.2509, "step": 10114 }, { "epoch": 1.5478194338179034, "grad_norm": 2.2608685850264014, "learning_rate": 2.563634343946082e-06, "loss": 0.3531, "step": 10115 }, { "epoch": 1.547972456006121, "grad_norm": 2.32705780663519, "learning_rate": 2.5619776986015077e-06, "loss": 0.3757, "step": 10116 }, { "epoch": 1.548125478194338, "grad_norm": 2.3217036817700647, "learning_rate": 2.560321510054984e-06, "loss": 0.3321, "step": 10117 }, { "epoch": 1.5482785003825554, "grad_norm": 2.1098728752447795, "learning_rate": 2.5586657784082236e-06, "loss": 0.3464, "step": 10118 }, { "epoch": 1.5484315225707728, "grad_norm": 2.1231402121495524, "learning_rate": 2.5570105037629013e-06, "loss": 0.2992, "step": 10119 }, { "epoch": 1.54858454475899, "grad_norm": 2.303456688738964, "learning_rate": 2.5553556862206852e-06, "loss": 0.3267, "step": 10120 }, { "epoch": 1.5487375669472074, "grad_norm": 2.12653714669132, "learning_rate": 2.5537013258832e-06, "loss": 0.2904, "step": 10121 }, { "epoch": 1.5488905891354245, "grad_norm": 2.3782425204672273, "learning_rate": 2.5520474228520454e-06, "loss": 0.3159, "step": 10122 }, { "epoch": 1.5490436113236419, "grad_norm": 2.0454415788829925, "learning_rate": 2.5503939772287957e-06, "loss": 0.283, "step": 10123 }, { "epoch": 1.5491966335118592, "grad_norm": 2.184366989942858, "learning_rate": 2.548740989114995e-06, "loss": 0.3246, "step": 10124 }, { "epoch": 1.5493496557000765, "grad_norm": 2.144316447092154, "learning_rate": 2.5470884586121604e-06, "loss": 0.3074, "step": 10125 }, { "epoch": 1.5495026778882939, "grad_norm": 2.292841312921272, "learning_rate": 2.5454363858217778e-06, "loss": 0.291, "step": 10126 }, { "epoch": 1.549655700076511, "grad_norm": 2.352491997320857, "learning_rate": 2.543784770845311e-06, "loss": 0.3496, "step": 10127 }, { "epoch": 1.5498087222647285, "grad_norm": 2.204005708224258, "learning_rate": 2.542133613784189e-06, "loss": 0.3373, "step": 10128 }, { "epoch": 1.5499617444529457, "grad_norm": 2.2452574362968023, "learning_rate": 2.540482914739818e-06, "loss": 0.3437, "step": 10129 }, { "epoch": 1.550114766641163, "grad_norm": 2.279324045464706, "learning_rate": 2.5388326738135726e-06, "loss": 0.3119, "step": 10130 }, { "epoch": 1.5502677888293803, "grad_norm": 2.256376745605677, "learning_rate": 2.537182891106801e-06, "loss": 0.346, "step": 10131 }, { "epoch": 1.5504208110175974, "grad_norm": 1.927680952087128, "learning_rate": 2.5355335667208226e-06, "loss": 0.2626, "step": 10132 }, { "epoch": 1.550573833205815, "grad_norm": 1.925711586506596, "learning_rate": 2.533884700756929e-06, "loss": 0.2571, "step": 10133 }, { "epoch": 1.550726855394032, "grad_norm": 1.9586609335259824, "learning_rate": 2.5322362933163803e-06, "loss": 0.2807, "step": 10134 }, { "epoch": 1.5508798775822494, "grad_norm": 2.305373905796804, "learning_rate": 2.5305883445004207e-06, "loss": 0.333, "step": 10135 }, { "epoch": 1.5510328997704668, "grad_norm": 1.9006313901845546, "learning_rate": 2.5289408544102488e-06, "loss": 0.2386, "step": 10136 }, { "epoch": 1.5511859219586839, "grad_norm": 1.9406112813516276, "learning_rate": 2.5272938231470433e-06, "loss": 0.3152, "step": 10137 }, { "epoch": 1.5513389441469014, "grad_norm": 2.021371847556138, "learning_rate": 2.5256472508119633e-06, "loss": 0.2632, "step": 10138 }, { "epoch": 1.5514919663351185, "grad_norm": 1.9119227896899973, "learning_rate": 2.5240011375061226e-06, "loss": 0.3115, "step": 10139 }, { "epoch": 1.5516449885233359, "grad_norm": 2.018861968257367, "learning_rate": 2.5223554833306153e-06, "loss": 0.316, "step": 10140 }, { "epoch": 1.5517980107115532, "grad_norm": 2.133098819530022, "learning_rate": 2.5207102883865163e-06, "loss": 0.2879, "step": 10141 }, { "epoch": 1.5519510328997703, "grad_norm": 1.928501157699594, "learning_rate": 2.519065552774851e-06, "loss": 0.219, "step": 10142 }, { "epoch": 1.5521040550879879, "grad_norm": 2.0714889924024713, "learning_rate": 2.5174212765966398e-06, "loss": 0.3353, "step": 10143 }, { "epoch": 1.552257077276205, "grad_norm": 1.9810537630101772, "learning_rate": 2.5157774599528627e-06, "loss": 0.316, "step": 10144 }, { "epoch": 1.5524100994644223, "grad_norm": 2.097340227564223, "learning_rate": 2.5141341029444634e-06, "loss": 0.243, "step": 10145 }, { "epoch": 1.5525631216526397, "grad_norm": 2.322435168108173, "learning_rate": 2.5124912056723784e-06, "loss": 0.3103, "step": 10146 }, { "epoch": 1.5527161438408568, "grad_norm": 2.3468231639208232, "learning_rate": 2.5108487682375017e-06, "loss": 0.3325, "step": 10147 }, { "epoch": 1.5528691660290743, "grad_norm": 2.497911269354406, "learning_rate": 2.509206790740694e-06, "loss": 0.3676, "step": 10148 }, { "epoch": 1.5530221882172914, "grad_norm": 1.8090734411304665, "learning_rate": 2.5075652732828036e-06, "loss": 0.2453, "step": 10149 }, { "epoch": 1.5531752104055088, "grad_norm": 2.403872379075245, "learning_rate": 2.5059242159646412e-06, "loss": 0.2632, "step": 10150 }, { "epoch": 1.553328232593726, "grad_norm": 2.430625917359335, "learning_rate": 2.504283618886989e-06, "loss": 0.38, "step": 10151 }, { "epoch": 1.5534812547819434, "grad_norm": 1.8545837232546376, "learning_rate": 2.502643482150604e-06, "loss": 0.2823, "step": 10152 }, { "epoch": 1.5536342769701608, "grad_norm": 1.9418639929290726, "learning_rate": 2.5010038058562127e-06, "loss": 0.3061, "step": 10153 }, { "epoch": 1.5537872991583779, "grad_norm": 1.8986203674410438, "learning_rate": 2.499364590104514e-06, "loss": 0.2776, "step": 10154 }, { "epoch": 1.5539403213465952, "grad_norm": 2.0270382335653707, "learning_rate": 2.4977258349961774e-06, "loss": 0.2981, "step": 10155 }, { "epoch": 1.5540933435348125, "grad_norm": 2.402283314200567, "learning_rate": 2.496087540631846e-06, "loss": 0.3866, "step": 10156 }, { "epoch": 1.5542463657230299, "grad_norm": 2.310733483309164, "learning_rate": 2.4944497071121355e-06, "loss": 0.3839, "step": 10157 }, { "epoch": 1.5543993879112472, "grad_norm": 1.8695856951851826, "learning_rate": 2.4928123345376276e-06, "loss": 0.249, "step": 10158 }, { "epoch": 1.5545524100994643, "grad_norm": 2.1169212162195827, "learning_rate": 2.491175423008888e-06, "loss": 0.2898, "step": 10159 }, { "epoch": 1.5547054322876819, "grad_norm": 2.161399766428231, "learning_rate": 2.4895389726264376e-06, "loss": 0.3391, "step": 10160 }, { "epoch": 1.554858454475899, "grad_norm": 2.391172981427365, "learning_rate": 2.4879029834907762e-06, "loss": 0.3253, "step": 10161 }, { "epoch": 1.5550114766641163, "grad_norm": 2.100970641331779, "learning_rate": 2.486267455702387e-06, "loss": 0.3261, "step": 10162 }, { "epoch": 1.5551644988523337, "grad_norm": 1.9156665737120335, "learning_rate": 2.4846323893616996e-06, "loss": 0.2877, "step": 10163 }, { "epoch": 1.5553175210405508, "grad_norm": 2.3248612982758057, "learning_rate": 2.4829977845691424e-06, "loss": 0.3276, "step": 10164 }, { "epoch": 1.5554705432287683, "grad_norm": 2.032228598566149, "learning_rate": 2.4813636414251e-06, "loss": 0.2805, "step": 10165 }, { "epoch": 1.5556235654169854, "grad_norm": 1.9488852461732409, "learning_rate": 2.479729960029924e-06, "loss": 0.281, "step": 10166 }, { "epoch": 1.5557765876052028, "grad_norm": 2.137426975125268, "learning_rate": 2.4780967404839528e-06, "loss": 0.3415, "step": 10167 }, { "epoch": 1.55592960979342, "grad_norm": 1.8447028628909974, "learning_rate": 2.4764639828874905e-06, "loss": 0.2041, "step": 10168 }, { "epoch": 1.5560826319816372, "grad_norm": 1.8215233612139778, "learning_rate": 2.4748316873408006e-06, "loss": 0.2766, "step": 10169 }, { "epoch": 1.5562356541698548, "grad_norm": 2.118588620302535, "learning_rate": 2.473199853944138e-06, "loss": 0.3702, "step": 10170 }, { "epoch": 1.5563886763580719, "grad_norm": 2.316013267038449, "learning_rate": 2.4715684827977183e-06, "loss": 0.3036, "step": 10171 }, { "epoch": 1.5565416985462892, "grad_norm": 2.30550773332787, "learning_rate": 2.4699375740017296e-06, "loss": 0.3842, "step": 10172 }, { "epoch": 1.5566947207345065, "grad_norm": 2.0189253349152816, "learning_rate": 2.468307127656331e-06, "loss": 0.2854, "step": 10173 }, { "epoch": 1.5568477429227237, "grad_norm": 1.8466817397772421, "learning_rate": 2.4666771438616568e-06, "loss": 0.2789, "step": 10174 }, { "epoch": 1.5570007651109412, "grad_norm": 2.06378417070918, "learning_rate": 2.46504762271781e-06, "loss": 0.2566, "step": 10175 }, { "epoch": 1.5571537872991583, "grad_norm": 2.0737045550933497, "learning_rate": 2.4634185643248642e-06, "loss": 0.3565, "step": 10176 }, { "epoch": 1.5573068094873757, "grad_norm": 1.941031063446645, "learning_rate": 2.4617899687828693e-06, "loss": 0.2481, "step": 10177 }, { "epoch": 1.557459831675593, "grad_norm": 2.225129231890635, "learning_rate": 2.4601618361918413e-06, "loss": 0.3176, "step": 10178 }, { "epoch": 1.55761285386381, "grad_norm": 2.016757570425307, "learning_rate": 2.45853416665177e-06, "loss": 0.3122, "step": 10179 }, { "epoch": 1.5577658760520277, "grad_norm": 2.17651730016683, "learning_rate": 2.4569069602626196e-06, "loss": 0.2992, "step": 10180 }, { "epoch": 1.5579188982402448, "grad_norm": 1.9707363895900585, "learning_rate": 2.4552802171243204e-06, "loss": 0.3621, "step": 10181 }, { "epoch": 1.558071920428462, "grad_norm": 2.2690988936611562, "learning_rate": 2.4536539373367786e-06, "loss": 0.32, "step": 10182 }, { "epoch": 1.5582249426166794, "grad_norm": 2.260127613008053, "learning_rate": 2.45202812099987e-06, "loss": 0.313, "step": 10183 }, { "epoch": 1.5583779648048968, "grad_norm": 2.092136322623526, "learning_rate": 2.4504027682134423e-06, "loss": 0.3156, "step": 10184 }, { "epoch": 1.558530986993114, "grad_norm": 1.9982530177796707, "learning_rate": 2.4487778790773155e-06, "loss": 0.2735, "step": 10185 }, { "epoch": 1.5586840091813312, "grad_norm": 2.1708897206182507, "learning_rate": 2.447153453691279e-06, "loss": 0.3133, "step": 10186 }, { "epoch": 1.5588370313695485, "grad_norm": 2.0960282856628942, "learning_rate": 2.445529492155092e-06, "loss": 0.3201, "step": 10187 }, { "epoch": 1.5589900535577659, "grad_norm": 2.1436054196035044, "learning_rate": 2.4439059945684997e-06, "loss": 0.3282, "step": 10188 }, { "epoch": 1.5591430757459832, "grad_norm": 1.9695191815716822, "learning_rate": 2.4422829610311948e-06, "loss": 0.3069, "step": 10189 }, { "epoch": 1.5592960979342005, "grad_norm": 2.1926718704756807, "learning_rate": 2.4406603916428584e-06, "loss": 0.3344, "step": 10190 }, { "epoch": 1.5594491201224177, "grad_norm": 2.3383201047133855, "learning_rate": 2.439038286503145e-06, "loss": 0.336, "step": 10191 }, { "epoch": 1.559602142310635, "grad_norm": 1.8372155716325524, "learning_rate": 2.437416645711662e-06, "loss": 0.2345, "step": 10192 }, { "epoch": 1.5597551644988523, "grad_norm": 2.495769900944102, "learning_rate": 2.435795469368012e-06, "loss": 0.3478, "step": 10193 }, { "epoch": 1.5599081866870697, "grad_norm": 2.1221966929936404, "learning_rate": 2.4341747575717554e-06, "loss": 0.3035, "step": 10194 }, { "epoch": 1.560061208875287, "grad_norm": 2.0135930107293802, "learning_rate": 2.43255451042242e-06, "loss": 0.299, "step": 10195 }, { "epoch": 1.560214231063504, "grad_norm": 2.1633570926567245, "learning_rate": 2.4309347280195183e-06, "loss": 0.3154, "step": 10196 }, { "epoch": 1.5603672532517217, "grad_norm": 1.9875556110795365, "learning_rate": 2.4293154104625282e-06, "loss": 0.2904, "step": 10197 }, { "epoch": 1.5605202754399388, "grad_norm": 2.340039983969419, "learning_rate": 2.4276965578508905e-06, "loss": 0.2656, "step": 10198 }, { "epoch": 1.560673297628156, "grad_norm": 2.221132754924476, "learning_rate": 2.426078170284032e-06, "loss": 0.3104, "step": 10199 }, { "epoch": 1.5608263198163734, "grad_norm": 2.073516952937971, "learning_rate": 2.4244602478613433e-06, "loss": 0.3356, "step": 10200 }, { "epoch": 1.5609793420045905, "grad_norm": 2.488583914365108, "learning_rate": 2.4228427906821863e-06, "loss": 0.3038, "step": 10201 }, { "epoch": 1.561132364192808, "grad_norm": 2.445490540566737, "learning_rate": 2.421225798845894e-06, "loss": 0.3105, "step": 10202 }, { "epoch": 1.5612853863810252, "grad_norm": 2.1391009903185396, "learning_rate": 2.4196092724517763e-06, "loss": 0.2995, "step": 10203 }, { "epoch": 1.5614384085692425, "grad_norm": 2.044543683226895, "learning_rate": 2.4179932115991056e-06, "loss": 0.2764, "step": 10204 }, { "epoch": 1.5615914307574599, "grad_norm": 2.046036413929888, "learning_rate": 2.4163776163871334e-06, "loss": 0.2786, "step": 10205 }, { "epoch": 1.561744452945677, "grad_norm": 2.2680242817870875, "learning_rate": 2.41476248691508e-06, "loss": 0.3337, "step": 10206 }, { "epoch": 1.5618974751338945, "grad_norm": 2.0380320601865853, "learning_rate": 2.413147823282135e-06, "loss": 0.33, "step": 10207 }, { "epoch": 1.5620504973221117, "grad_norm": 2.040382014149729, "learning_rate": 2.4115336255874623e-06, "loss": 0.2632, "step": 10208 }, { "epoch": 1.562203519510329, "grad_norm": 2.2044053792612632, "learning_rate": 2.409919893930196e-06, "loss": 0.2983, "step": 10209 }, { "epoch": 1.5623565416985463, "grad_norm": 2.277740664709357, "learning_rate": 2.4083066284094415e-06, "loss": 0.3382, "step": 10210 }, { "epoch": 1.5625095638867634, "grad_norm": 2.373251287168811, "learning_rate": 2.406693829124276e-06, "loss": 0.3217, "step": 10211 }, { "epoch": 1.562662586074981, "grad_norm": 2.196277994030039, "learning_rate": 2.4050814961737466e-06, "loss": 0.3034, "step": 10212 }, { "epoch": 1.562815608263198, "grad_norm": 1.998217282132098, "learning_rate": 2.403469629656875e-06, "loss": 0.2597, "step": 10213 }, { "epoch": 1.5629686304514154, "grad_norm": 1.9683414339934968, "learning_rate": 2.401858229672651e-06, "loss": 0.3089, "step": 10214 }, { "epoch": 1.5631216526396328, "grad_norm": 2.171320953900138, "learning_rate": 2.4002472963200374e-06, "loss": 0.3134, "step": 10215 }, { "epoch": 1.5632746748278499, "grad_norm": 1.9236456967890725, "learning_rate": 2.3986368296979643e-06, "loss": 0.2783, "step": 10216 }, { "epoch": 1.5634276970160674, "grad_norm": 2.1561594723846205, "learning_rate": 2.397026829905347e-06, "loss": 0.3362, "step": 10217 }, { "epoch": 1.5635807192042845, "grad_norm": 2.0638153028428294, "learning_rate": 2.395417297041052e-06, "loss": 0.2939, "step": 10218 }, { "epoch": 1.5637337413925019, "grad_norm": 2.075780646463065, "learning_rate": 2.3938082312039267e-06, "loss": 0.3216, "step": 10219 }, { "epoch": 1.5638867635807192, "grad_norm": 2.147494836310276, "learning_rate": 2.3921996324927988e-06, "loss": 0.3036, "step": 10220 }, { "epoch": 1.5640397857689365, "grad_norm": 2.494807687598165, "learning_rate": 2.390591501006452e-06, "loss": 0.4179, "step": 10221 }, { "epoch": 1.5641928079571539, "grad_norm": 2.2762875943975107, "learning_rate": 2.3889838368436445e-06, "loss": 0.331, "step": 10222 }, { "epoch": 1.564345830145371, "grad_norm": 2.0772333504973384, "learning_rate": 2.3873766401031205e-06, "loss": 0.2941, "step": 10223 }, { "epoch": 1.5644988523335883, "grad_norm": 2.1005853247996313, "learning_rate": 2.385769910883573e-06, "loss": 0.2845, "step": 10224 }, { "epoch": 1.5646518745218057, "grad_norm": 2.1720104480820885, "learning_rate": 2.3841636492836838e-06, "loss": 0.3295, "step": 10225 }, { "epoch": 1.564804896710023, "grad_norm": 2.0423208854212, "learning_rate": 2.382557855402102e-06, "loss": 0.2516, "step": 10226 }, { "epoch": 1.5649579188982403, "grad_norm": 2.0188972984497027, "learning_rate": 2.3809525293374357e-06, "loss": 0.2452, "step": 10227 }, { "epoch": 1.5651109410864574, "grad_norm": 2.0508236985553436, "learning_rate": 2.3793476711882836e-06, "loss": 0.2987, "step": 10228 }, { "epoch": 1.565263963274675, "grad_norm": 2.232455824019605, "learning_rate": 2.377743281053203e-06, "loss": 0.2617, "step": 10229 }, { "epoch": 1.565416985462892, "grad_norm": 2.2874276475622795, "learning_rate": 2.3761393590307267e-06, "loss": 0.3049, "step": 10230 }, { "epoch": 1.5655700076511094, "grad_norm": 2.0678605418290714, "learning_rate": 2.374535905219356e-06, "loss": 0.3362, "step": 10231 }, { "epoch": 1.5657230298393268, "grad_norm": 1.9479610053175493, "learning_rate": 2.3729329197175668e-06, "loss": 0.3345, "step": 10232 }, { "epoch": 1.5658760520275439, "grad_norm": 2.115965723564736, "learning_rate": 2.371330402623805e-06, "loss": 0.3229, "step": 10233 }, { "epoch": 1.5660290742157614, "grad_norm": 2.134915489865641, "learning_rate": 2.3697283540364856e-06, "loss": 0.3516, "step": 10234 }, { "epoch": 1.5661820964039785, "grad_norm": 2.4074271755888987, "learning_rate": 2.368126774053998e-06, "loss": 0.2972, "step": 10235 }, { "epoch": 1.5663351185921959, "grad_norm": 2.2511960150185435, "learning_rate": 2.3665256627747012e-06, "loss": 0.2815, "step": 10236 }, { "epoch": 1.5664881407804132, "grad_norm": 2.0602330321925106, "learning_rate": 2.3649250202969233e-06, "loss": 0.2911, "step": 10237 }, { "epoch": 1.5666411629686303, "grad_norm": 1.927634330204787, "learning_rate": 2.363324846718974e-06, "loss": 0.2757, "step": 10238 }, { "epoch": 1.5667941851568479, "grad_norm": 2.2909319046053906, "learning_rate": 2.3617251421391172e-06, "loss": 0.2859, "step": 10239 }, { "epoch": 1.566947207345065, "grad_norm": 1.8964456203975408, "learning_rate": 2.3601259066555982e-06, "loss": 0.2124, "step": 10240 }, { "epoch": 1.5671002295332823, "grad_norm": 1.9433830612323009, "learning_rate": 2.358527140366641e-06, "loss": 0.2759, "step": 10241 }, { "epoch": 1.5672532517214997, "grad_norm": 2.1053810325565174, "learning_rate": 2.356928843370422e-06, "loss": 0.3441, "step": 10242 }, { "epoch": 1.5674062739097168, "grad_norm": 2.1752026446050117, "learning_rate": 2.3553310157651e-06, "loss": 0.3235, "step": 10243 }, { "epoch": 1.5675592960979343, "grad_norm": 2.1045113271973603, "learning_rate": 2.3537336576488124e-06, "loss": 0.2883, "step": 10244 }, { "epoch": 1.5677123182861514, "grad_norm": 2.1417921242316855, "learning_rate": 2.3521367691196474e-06, "loss": 0.4201, "step": 10245 }, { "epoch": 1.5678653404743688, "grad_norm": 2.2202253891721924, "learning_rate": 2.350540350275684e-06, "loss": 0.344, "step": 10246 }, { "epoch": 1.568018362662586, "grad_norm": 2.051083146052781, "learning_rate": 2.3489444012149665e-06, "loss": 0.3307, "step": 10247 }, { "epoch": 1.5681713848508032, "grad_norm": 2.3576621657679113, "learning_rate": 2.3473489220354985e-06, "loss": 0.2906, "step": 10248 }, { "epoch": 1.5683244070390208, "grad_norm": 2.400771637167478, "learning_rate": 2.3457539128352737e-06, "loss": 0.3003, "step": 10249 }, { "epoch": 1.5684774292272379, "grad_norm": 2.042581493431355, "learning_rate": 2.344159373712247e-06, "loss": 0.2974, "step": 10250 }, { "epoch": 1.5686304514154552, "grad_norm": 2.346004763894137, "learning_rate": 2.3425653047643373e-06, "loss": 0.3402, "step": 10251 }, { "epoch": 1.5687834736036725, "grad_norm": 1.6993995033239386, "learning_rate": 2.340971706089451e-06, "loss": 0.2819, "step": 10252 }, { "epoch": 1.5689364957918899, "grad_norm": 2.3292922099877766, "learning_rate": 2.339378577785455e-06, "loss": 0.3639, "step": 10253 }, { "epoch": 1.5690895179801072, "grad_norm": 1.7548228300139943, "learning_rate": 2.3377859199501886e-06, "loss": 0.2774, "step": 10254 }, { "epoch": 1.5692425401683243, "grad_norm": 2.192534611433887, "learning_rate": 2.3361937326814633e-06, "loss": 0.3263, "step": 10255 }, { "epoch": 1.5693955623565417, "grad_norm": 2.262468415284445, "learning_rate": 2.3346020160770632e-06, "loss": 0.2805, "step": 10256 }, { "epoch": 1.569548584544759, "grad_norm": 2.10740520424962, "learning_rate": 2.3330107702347393e-06, "loss": 0.3117, "step": 10257 }, { "epoch": 1.5697016067329763, "grad_norm": 1.9254659842503297, "learning_rate": 2.3314199952522176e-06, "loss": 0.2384, "step": 10258 }, { "epoch": 1.5698546289211937, "grad_norm": 2.3472990294430174, "learning_rate": 2.3298296912271932e-06, "loss": 0.3014, "step": 10259 }, { "epoch": 1.5700076511094108, "grad_norm": 2.123204083750397, "learning_rate": 2.328239858257335e-06, "loss": 0.3593, "step": 10260 }, { "epoch": 1.5701606732976283, "grad_norm": 2.3856229695225633, "learning_rate": 2.326650496440278e-06, "loss": 0.3334, "step": 10261 }, { "epoch": 1.5703136954858454, "grad_norm": 2.033254452554158, "learning_rate": 2.325061605873632e-06, "loss": 0.3196, "step": 10262 }, { "epoch": 1.5704667176740628, "grad_norm": 2.384658331345606, "learning_rate": 2.3234731866549778e-06, "loss": 0.2796, "step": 10263 }, { "epoch": 1.57061973986228, "grad_norm": 1.9582659952780006, "learning_rate": 2.3218852388818657e-06, "loss": 0.2933, "step": 10264 }, { "epoch": 1.5707727620504972, "grad_norm": 2.1466251525953166, "learning_rate": 2.3202977626518187e-06, "loss": 0.2537, "step": 10265 }, { "epoch": 1.5709257842387148, "grad_norm": 2.072612975719027, "learning_rate": 2.3187107580623257e-06, "loss": 0.3466, "step": 10266 }, { "epoch": 1.5710788064269319, "grad_norm": 1.889187195089687, "learning_rate": 2.3171242252108607e-06, "loss": 0.3119, "step": 10267 }, { "epoch": 1.5712318286151492, "grad_norm": 2.0271223685590263, "learning_rate": 2.3155381641948494e-06, "loss": 0.3211, "step": 10268 }, { "epoch": 1.5713848508033665, "grad_norm": 2.0742514867228707, "learning_rate": 2.313952575111699e-06, "loss": 0.3236, "step": 10269 }, { "epoch": 1.5715378729915837, "grad_norm": 2.4083805585663085, "learning_rate": 2.3123674580587942e-06, "loss": 0.3223, "step": 10270 }, { "epoch": 1.5716908951798012, "grad_norm": 1.7294525210795593, "learning_rate": 2.3107828131334744e-06, "loss": 0.2704, "step": 10271 }, { "epoch": 1.5718439173680183, "grad_norm": 2.3189317261675297, "learning_rate": 2.30919864043306e-06, "loss": 0.3368, "step": 10272 }, { "epoch": 1.5719969395562357, "grad_norm": 2.194260512799246, "learning_rate": 2.3076149400548498e-06, "loss": 0.3096, "step": 10273 }, { "epoch": 1.572149961744453, "grad_norm": 1.8457396636416614, "learning_rate": 2.306031712096093e-06, "loss": 0.2729, "step": 10274 }, { "epoch": 1.57230298393267, "grad_norm": 2.084375191930075, "learning_rate": 2.3044489566540306e-06, "loss": 0.3001, "step": 10275 }, { "epoch": 1.5724560061208877, "grad_norm": 1.91765039031427, "learning_rate": 2.3028666738258653e-06, "loss": 0.2202, "step": 10276 }, { "epoch": 1.5726090283091048, "grad_norm": 2.1171884440790776, "learning_rate": 2.301284863708764e-06, "loss": 0.303, "step": 10277 }, { "epoch": 1.572762050497322, "grad_norm": 2.1586507600026392, "learning_rate": 2.2997035263998792e-06, "loss": 0.3683, "step": 10278 }, { "epoch": 1.5729150726855394, "grad_norm": 2.3001386303828015, "learning_rate": 2.298122661996328e-06, "loss": 0.3143, "step": 10279 }, { "epoch": 1.5730680948737565, "grad_norm": 2.340415887451529, "learning_rate": 2.296542270595188e-06, "loss": 0.3019, "step": 10280 }, { "epoch": 1.573221117061974, "grad_norm": 2.2975152673572947, "learning_rate": 2.294962352293526e-06, "loss": 0.3047, "step": 10281 }, { "epoch": 1.5733741392501912, "grad_norm": 1.7612950905997558, "learning_rate": 2.2933829071883673e-06, "loss": 0.231, "step": 10282 }, { "epoch": 1.5735271614384085, "grad_norm": 2.139252431583115, "learning_rate": 2.291803935376714e-06, "loss": 0.3253, "step": 10283 }, { "epoch": 1.5736801836266259, "grad_norm": 2.2296485507686836, "learning_rate": 2.2902254369555354e-06, "loss": 0.3236, "step": 10284 }, { "epoch": 1.5738332058148432, "grad_norm": 2.230439426279258, "learning_rate": 2.2886474120217726e-06, "loss": 0.3202, "step": 10285 }, { "epoch": 1.5739862280030605, "grad_norm": 2.194048175922532, "learning_rate": 2.287069860672341e-06, "loss": 0.3116, "step": 10286 }, { "epoch": 1.5741392501912777, "grad_norm": 2.104361793722446, "learning_rate": 2.2854927830041205e-06, "loss": 0.3093, "step": 10287 }, { "epoch": 1.574292272379495, "grad_norm": 2.190600186554258, "learning_rate": 2.2839161791139685e-06, "loss": 0.2952, "step": 10288 }, { "epoch": 1.5744452945677123, "grad_norm": 2.2380497722604566, "learning_rate": 2.2823400490987103e-06, "loss": 0.2993, "step": 10289 }, { "epoch": 1.5745983167559297, "grad_norm": 2.0662226158733907, "learning_rate": 2.2807643930551403e-06, "loss": 0.2838, "step": 10290 }, { "epoch": 1.574751338944147, "grad_norm": 1.8754728763422386, "learning_rate": 2.279189211080026e-06, "loss": 0.2747, "step": 10291 }, { "epoch": 1.574904361132364, "grad_norm": 2.239515204953391, "learning_rate": 2.277614503270108e-06, "loss": 0.3475, "step": 10292 }, { "epoch": 1.5750573833205816, "grad_norm": 1.9798738633771396, "learning_rate": 2.276040269722092e-06, "loss": 0.3008, "step": 10293 }, { "epoch": 1.5752104055087988, "grad_norm": 2.2716397492829454, "learning_rate": 2.2744665105326603e-06, "loss": 0.3376, "step": 10294 }, { "epoch": 1.575363427697016, "grad_norm": 2.2329279492017795, "learning_rate": 2.2728932257984613e-06, "loss": 0.3152, "step": 10295 }, { "epoch": 1.5755164498852334, "grad_norm": 1.999846969202547, "learning_rate": 2.2713204156161193e-06, "loss": 0.3223, "step": 10296 }, { "epoch": 1.5756694720734505, "grad_norm": 2.186381035023827, "learning_rate": 2.269748080082225e-06, "loss": 0.3592, "step": 10297 }, { "epoch": 1.575822494261668, "grad_norm": 2.1638727598497742, "learning_rate": 2.268176219293339e-06, "loss": 0.3635, "step": 10298 }, { "epoch": 1.5759755164498852, "grad_norm": 1.9826793258017292, "learning_rate": 2.2666048333460046e-06, "loss": 0.3428, "step": 10299 }, { "epoch": 1.5761285386381025, "grad_norm": 2.1235423137566536, "learning_rate": 2.2650339223367167e-06, "loss": 0.2813, "step": 10300 }, { "epoch": 1.5762815608263199, "grad_norm": 2.150327660568215, "learning_rate": 2.263463486361953e-06, "loss": 0.3257, "step": 10301 }, { "epoch": 1.576434583014537, "grad_norm": 2.1608998717546952, "learning_rate": 2.2618935255181673e-06, "loss": 0.3238, "step": 10302 }, { "epoch": 1.5765876052027545, "grad_norm": 2.163568899899916, "learning_rate": 2.2603240399017668e-06, "loss": 0.2742, "step": 10303 }, { "epoch": 1.5767406273909716, "grad_norm": 2.0529341198278206, "learning_rate": 2.2587550296091477e-06, "loss": 0.308, "step": 10304 }, { "epoch": 1.576893649579189, "grad_norm": 2.0794813595152464, "learning_rate": 2.2571864947366685e-06, "loss": 0.3075, "step": 10305 }, { "epoch": 1.5770466717674063, "grad_norm": 2.0384171796033796, "learning_rate": 2.255618435380651e-06, "loss": 0.3338, "step": 10306 }, { "epoch": 1.5771996939556234, "grad_norm": 2.226350908567524, "learning_rate": 2.2540508516374036e-06, "loss": 0.3045, "step": 10307 }, { "epoch": 1.577352716143841, "grad_norm": 2.0631861942769785, "learning_rate": 2.2524837436031997e-06, "loss": 0.2344, "step": 10308 }, { "epoch": 1.577505738332058, "grad_norm": 2.3266694287378527, "learning_rate": 2.2509171113742724e-06, "loss": 0.3964, "step": 10309 }, { "epoch": 1.5776587605202754, "grad_norm": 1.9944872958494284, "learning_rate": 2.249350955046842e-06, "loss": 0.2898, "step": 10310 }, { "epoch": 1.5778117827084928, "grad_norm": 1.9506141300360098, "learning_rate": 2.24778527471709e-06, "loss": 0.2996, "step": 10311 }, { "epoch": 1.5779648048967099, "grad_norm": 1.9074519577115954, "learning_rate": 2.246220070481171e-06, "loss": 0.2978, "step": 10312 }, { "epoch": 1.5781178270849274, "grad_norm": 1.9938550842973741, "learning_rate": 2.24465534243521e-06, "loss": 0.2799, "step": 10313 }, { "epoch": 1.5782708492731445, "grad_norm": 1.8507728107065444, "learning_rate": 2.2430910906753045e-06, "loss": 0.3082, "step": 10314 }, { "epoch": 1.5784238714613619, "grad_norm": 1.9282543196104605, "learning_rate": 2.2415273152975205e-06, "loss": 0.2698, "step": 10315 }, { "epoch": 1.5785768936495792, "grad_norm": 2.120689019338789, "learning_rate": 2.2399640163978942e-06, "loss": 0.316, "step": 10316 }, { "epoch": 1.5787299158377963, "grad_norm": 2.193624778585067, "learning_rate": 2.238401194072436e-06, "loss": 0.2874, "step": 10317 }, { "epoch": 1.5788829380260139, "grad_norm": 2.302872403542241, "learning_rate": 2.2368388484171246e-06, "loss": 0.3184, "step": 10318 }, { "epoch": 1.579035960214231, "grad_norm": 1.642833529579855, "learning_rate": 2.235276979527905e-06, "loss": 0.2235, "step": 10319 }, { "epoch": 1.5791889824024483, "grad_norm": 2.2481428159239334, "learning_rate": 2.2337155875007076e-06, "loss": 0.2923, "step": 10320 }, { "epoch": 1.5793420045906656, "grad_norm": 1.9982154763461255, "learning_rate": 2.232154672431416e-06, "loss": 0.2771, "step": 10321 }, { "epoch": 1.579495026778883, "grad_norm": 2.013720888615236, "learning_rate": 2.2305942344158906e-06, "loss": 0.276, "step": 10322 }, { "epoch": 1.5796480489671003, "grad_norm": 2.068002598244875, "learning_rate": 2.2290342735499724e-06, "loss": 0.2944, "step": 10323 }, { "epoch": 1.5798010711553174, "grad_norm": 1.9030134400531125, "learning_rate": 2.227474789929458e-06, "loss": 0.2344, "step": 10324 }, { "epoch": 1.5799540933435348, "grad_norm": 2.022291552569888, "learning_rate": 2.225915783650119e-06, "loss": 0.2472, "step": 10325 }, { "epoch": 1.580107115531752, "grad_norm": 2.24395570623936, "learning_rate": 2.2243572548077107e-06, "loss": 0.3125, "step": 10326 }, { "epoch": 1.5802601377199694, "grad_norm": 1.9891422743630058, "learning_rate": 2.2227992034979363e-06, "loss": 0.2601, "step": 10327 }, { "epoch": 1.5804131599081868, "grad_norm": 1.8983232454344225, "learning_rate": 2.2212416298164895e-06, "loss": 0.3107, "step": 10328 }, { "epoch": 1.5805661820964039, "grad_norm": 1.7352050344116838, "learning_rate": 2.219684533859028e-06, "loss": 0.2608, "step": 10329 }, { "epoch": 1.5807192042846214, "grad_norm": 2.0679773793018636, "learning_rate": 2.21812791572117e-06, "loss": 0.2791, "step": 10330 }, { "epoch": 1.5808722264728385, "grad_norm": 2.0633576159015488, "learning_rate": 2.216571775498523e-06, "loss": 0.3305, "step": 10331 }, { "epoch": 1.5810252486610559, "grad_norm": 2.0864993072287645, "learning_rate": 2.215016113286652e-06, "loss": 0.2381, "step": 10332 }, { "epoch": 1.5811782708492732, "grad_norm": 1.9834409122072287, "learning_rate": 2.213460929181097e-06, "loss": 0.3014, "step": 10333 }, { "epoch": 1.5813312930374903, "grad_norm": 2.2354577819111445, "learning_rate": 2.211906223277367e-06, "loss": 0.2997, "step": 10334 }, { "epoch": 1.5814843152257079, "grad_norm": 2.2939978595458363, "learning_rate": 2.210351995670943e-06, "loss": 0.318, "step": 10335 }, { "epoch": 1.581637337413925, "grad_norm": 2.019619517073732, "learning_rate": 2.2087982464572755e-06, "loss": 0.3068, "step": 10336 }, { "epoch": 1.5817903596021423, "grad_norm": 2.1592548402626175, "learning_rate": 2.207244975731788e-06, "loss": 0.255, "step": 10337 }, { "epoch": 1.5819433817903596, "grad_norm": 2.0626900857490402, "learning_rate": 2.2056921835898717e-06, "loss": 0.2745, "step": 10338 }, { "epoch": 1.5820964039785768, "grad_norm": 2.318142925818211, "learning_rate": 2.20413987012689e-06, "loss": 0.3219, "step": 10339 }, { "epoch": 1.5822494261667943, "grad_norm": 1.997112063237294, "learning_rate": 2.2025880354381767e-06, "loss": 0.3089, "step": 10340 }, { "epoch": 1.5824024483550114, "grad_norm": 2.284517687797388, "learning_rate": 2.2010366796190353e-06, "loss": 0.3464, "step": 10341 }, { "epoch": 1.5825554705432288, "grad_norm": 2.670105379186421, "learning_rate": 2.1994858027647414e-06, "loss": 0.3209, "step": 10342 }, { "epoch": 1.582708492731446, "grad_norm": 2.0680620302934094, "learning_rate": 2.197935404970539e-06, "loss": 0.2762, "step": 10343 }, { "epoch": 1.5828615149196632, "grad_norm": 2.0968791504944897, "learning_rate": 2.1963854863316468e-06, "loss": 0.2899, "step": 10344 }, { "epoch": 1.5830145371078808, "grad_norm": 2.0214319597523103, "learning_rate": 2.1948360469432494e-06, "loss": 0.3563, "step": 10345 }, { "epoch": 1.5831675592960979, "grad_norm": 2.131484039985648, "learning_rate": 2.1932870869005042e-06, "loss": 0.3074, "step": 10346 }, { "epoch": 1.5833205814843152, "grad_norm": 2.103344766881095, "learning_rate": 2.191738606298539e-06, "loss": 0.3058, "step": 10347 }, { "epoch": 1.5834736036725325, "grad_norm": 2.556940059879879, "learning_rate": 2.1901906052324495e-06, "loss": 0.4076, "step": 10348 }, { "epoch": 1.5836266258607496, "grad_norm": 2.2405335632188517, "learning_rate": 2.1886430837973115e-06, "loss": 0.3214, "step": 10349 }, { "epoch": 1.5837796480489672, "grad_norm": 2.14456826606491, "learning_rate": 2.1870960420881584e-06, "loss": 0.3017, "step": 10350 }, { "epoch": 1.5839326702371843, "grad_norm": 2.2376186462192096, "learning_rate": 2.185549480199999e-06, "loss": 0.2989, "step": 10351 }, { "epoch": 1.5840856924254016, "grad_norm": 2.05365198255844, "learning_rate": 2.184003398227821e-06, "loss": 0.2639, "step": 10352 }, { "epoch": 1.584238714613619, "grad_norm": 1.5879002011757888, "learning_rate": 2.182457796266568e-06, "loss": 0.2415, "step": 10353 }, { "epoch": 1.5843917368018363, "grad_norm": 2.2942998647330173, "learning_rate": 2.180912674411162e-06, "loss": 0.2657, "step": 10354 }, { "epoch": 1.5845447589900536, "grad_norm": 2.1415116546843294, "learning_rate": 2.1793680327565024e-06, "loss": 0.3094, "step": 10355 }, { "epoch": 1.5846977811782708, "grad_norm": 1.868653381216291, "learning_rate": 2.177823871397441e-06, "loss": 0.2852, "step": 10356 }, { "epoch": 1.584850803366488, "grad_norm": 2.309292831698454, "learning_rate": 2.176280190428819e-06, "loss": 0.3522, "step": 10357 }, { "epoch": 1.5850038255547054, "grad_norm": 2.1710738028332863, "learning_rate": 2.1747369899454386e-06, "loss": 0.3116, "step": 10358 }, { "epoch": 1.5851568477429228, "grad_norm": 2.0734244476848454, "learning_rate": 2.1731942700420683e-06, "loss": 0.2549, "step": 10359 }, { "epoch": 1.58530986993114, "grad_norm": 2.351848876345775, "learning_rate": 2.171652030813458e-06, "loss": 0.325, "step": 10360 }, { "epoch": 1.5854628921193572, "grad_norm": 2.257892033882186, "learning_rate": 2.1701102723543242e-06, "loss": 0.2825, "step": 10361 }, { "epoch": 1.5856159143075748, "grad_norm": 2.0733014553722517, "learning_rate": 2.1685689947593445e-06, "loss": 0.3186, "step": 10362 }, { "epoch": 1.5857689364957919, "grad_norm": 2.1921386004476706, "learning_rate": 2.167028198123182e-06, "loss": 0.332, "step": 10363 }, { "epoch": 1.5859219586840092, "grad_norm": 2.220723693801886, "learning_rate": 2.16548788254046e-06, "loss": 0.3319, "step": 10364 }, { "epoch": 1.5860749808722265, "grad_norm": 2.0745477917442696, "learning_rate": 2.1639480481057774e-06, "loss": 0.2772, "step": 10365 }, { "epoch": 1.5862280030604436, "grad_norm": 2.1995867088192798, "learning_rate": 2.162408694913699e-06, "loss": 0.322, "step": 10366 }, { "epoch": 1.5863810252486612, "grad_norm": 2.029684586828926, "learning_rate": 2.160869823058763e-06, "loss": 0.2683, "step": 10367 }, { "epoch": 1.5865340474368783, "grad_norm": 2.316636320242865, "learning_rate": 2.1593314326354787e-06, "loss": 0.3374, "step": 10368 }, { "epoch": 1.5866870696250956, "grad_norm": 2.2433828028014045, "learning_rate": 2.157793523738324e-06, "loss": 0.3329, "step": 10369 }, { "epoch": 1.586840091813313, "grad_norm": 1.9832856482770373, "learning_rate": 2.1562560964617473e-06, "loss": 0.2735, "step": 10370 }, { "epoch": 1.58699311400153, "grad_norm": 2.1848410328916987, "learning_rate": 2.1547191509001687e-06, "loss": 0.2742, "step": 10371 }, { "epoch": 1.5871461361897476, "grad_norm": 2.118690825527389, "learning_rate": 2.1531826871479787e-06, "loss": 0.3074, "step": 10372 }, { "epoch": 1.5872991583779648, "grad_norm": 2.3314417826081018, "learning_rate": 2.151646705299536e-06, "loss": 0.2814, "step": 10373 }, { "epoch": 1.587452180566182, "grad_norm": 2.402232417525519, "learning_rate": 2.1501112054491725e-06, "loss": 0.3494, "step": 10374 }, { "epoch": 1.5876052027543994, "grad_norm": 2.326820597715731, "learning_rate": 2.1485761876911892e-06, "loss": 0.3038, "step": 10375 }, { "epoch": 1.5877582249426165, "grad_norm": 2.1383455574598713, "learning_rate": 2.1470416521198567e-06, "loss": 0.352, "step": 10376 }, { "epoch": 1.587911247130834, "grad_norm": 2.2995939874004625, "learning_rate": 2.1455075988294137e-06, "loss": 0.3279, "step": 10377 }, { "epoch": 1.5880642693190512, "grad_norm": 1.9137006604044404, "learning_rate": 2.143974027914083e-06, "loss": 0.3033, "step": 10378 }, { "epoch": 1.5882172915072685, "grad_norm": 2.2833225627008074, "learning_rate": 2.142440939468037e-06, "loss": 0.3236, "step": 10379 }, { "epoch": 1.5883703136954859, "grad_norm": 1.9860651109123406, "learning_rate": 2.1409083335854287e-06, "loss": 0.2383, "step": 10380 }, { "epoch": 1.588523335883703, "grad_norm": 2.48380903765018, "learning_rate": 2.1393762103603898e-06, "loss": 0.3249, "step": 10381 }, { "epoch": 1.5886763580719205, "grad_norm": 2.4131936637247184, "learning_rate": 2.1378445698870064e-06, "loss": 0.3444, "step": 10382 }, { "epoch": 1.5888293802601376, "grad_norm": 2.1244925827371386, "learning_rate": 2.136313412259342e-06, "loss": 0.3347, "step": 10383 }, { "epoch": 1.588982402448355, "grad_norm": 2.1275395552021252, "learning_rate": 2.134782737571439e-06, "loss": 0.2968, "step": 10384 }, { "epoch": 1.5891354246365723, "grad_norm": 2.382525615883336, "learning_rate": 2.1332525459172927e-06, "loss": 0.3307, "step": 10385 }, { "epoch": 1.5892884468247896, "grad_norm": 2.193605115589541, "learning_rate": 2.131722837390885e-06, "loss": 0.2865, "step": 10386 }, { "epoch": 1.589441469013007, "grad_norm": 2.3407094562764215, "learning_rate": 2.130193612086161e-06, "loss": 0.3284, "step": 10387 }, { "epoch": 1.589594491201224, "grad_norm": 2.213402841205263, "learning_rate": 2.128664870097028e-06, "loss": 0.31, "step": 10388 }, { "epoch": 1.5897475133894414, "grad_norm": 2.0222607907984798, "learning_rate": 2.127136611517382e-06, "loss": 0.2872, "step": 10389 }, { "epoch": 1.5899005355776588, "grad_norm": 2.2442947647359714, "learning_rate": 2.1256088364410775e-06, "loss": 0.2878, "step": 10390 }, { "epoch": 1.590053557765876, "grad_norm": 2.240624536205986, "learning_rate": 2.1240815449619335e-06, "loss": 0.3405, "step": 10391 }, { "epoch": 1.5902065799540934, "grad_norm": 2.1876775626139024, "learning_rate": 2.1225547371737564e-06, "loss": 0.3478, "step": 10392 }, { "epoch": 1.5903596021423105, "grad_norm": 2.162284723354573, "learning_rate": 2.1210284131703084e-06, "loss": 0.3532, "step": 10393 }, { "epoch": 1.590512624330528, "grad_norm": 2.1405508919523593, "learning_rate": 2.119502573045329e-06, "loss": 0.3358, "step": 10394 }, { "epoch": 1.5906656465187452, "grad_norm": 1.9600807168130765, "learning_rate": 2.117977216892525e-06, "loss": 0.3341, "step": 10395 }, { "epoch": 1.5908186687069625, "grad_norm": 2.314495390443742, "learning_rate": 2.1164523448055752e-06, "loss": 0.2895, "step": 10396 }, { "epoch": 1.5909716908951799, "grad_norm": 2.201921905625236, "learning_rate": 2.114927956878128e-06, "loss": 0.2522, "step": 10397 }, { "epoch": 1.591124713083397, "grad_norm": 2.268191757936805, "learning_rate": 2.1134040532038e-06, "loss": 0.3256, "step": 10398 }, { "epoch": 1.5912777352716145, "grad_norm": 2.2869195645309968, "learning_rate": 2.1118806338761823e-06, "loss": 0.3375, "step": 10399 }, { "epoch": 1.5914307574598316, "grad_norm": 2.0769558928508727, "learning_rate": 2.110357698988834e-06, "loss": 0.2647, "step": 10400 }, { "epoch": 1.591583779648049, "grad_norm": 2.4711031090770286, "learning_rate": 2.1088352486352802e-06, "loss": 0.3858, "step": 10401 }, { "epoch": 1.5917368018362663, "grad_norm": 2.119268678002257, "learning_rate": 2.1073132829090305e-06, "loss": 0.2872, "step": 10402 }, { "epoch": 1.5918898240244834, "grad_norm": 1.9850108207669523, "learning_rate": 2.105791801903545e-06, "loss": 0.3069, "step": 10403 }, { "epoch": 1.592042846212701, "grad_norm": 2.224819324782824, "learning_rate": 2.104270805712265e-06, "loss": 0.3448, "step": 10404 }, { "epoch": 1.592195868400918, "grad_norm": 1.873089072531048, "learning_rate": 2.1027502944286083e-06, "loss": 0.2505, "step": 10405 }, { "epoch": 1.5923488905891354, "grad_norm": 1.86192926757717, "learning_rate": 2.101230268145944e-06, "loss": 0.3051, "step": 10406 }, { "epoch": 1.5925019127773528, "grad_norm": 1.9442480263553694, "learning_rate": 2.099710726957633e-06, "loss": 0.293, "step": 10407 }, { "epoch": 1.5926549349655699, "grad_norm": 2.358985236520384, "learning_rate": 2.098191670956995e-06, "loss": 0.3361, "step": 10408 }, { "epoch": 1.5928079571537874, "grad_norm": 2.114912121413648, "learning_rate": 2.0966731002373132e-06, "loss": 0.3205, "step": 10409 }, { "epoch": 1.5929609793420045, "grad_norm": 2.324773058036772, "learning_rate": 2.0951550148918566e-06, "loss": 0.3892, "step": 10410 }, { "epoch": 1.5931140015302219, "grad_norm": 1.8540830139440694, "learning_rate": 2.093637415013857e-06, "loss": 0.2622, "step": 10411 }, { "epoch": 1.5932670237184392, "grad_norm": 1.901825704329433, "learning_rate": 2.0921203006965086e-06, "loss": 0.2557, "step": 10412 }, { "epoch": 1.5934200459066563, "grad_norm": 2.1564336487480387, "learning_rate": 2.0906036720329904e-06, "loss": 0.3251, "step": 10413 }, { "epoch": 1.5935730680948739, "grad_norm": 2.0254893602452304, "learning_rate": 2.0890875291164425e-06, "loss": 0.2756, "step": 10414 }, { "epoch": 1.593726090283091, "grad_norm": 2.145515414045669, "learning_rate": 2.087571872039977e-06, "loss": 0.311, "step": 10415 }, { "epoch": 1.5938791124713083, "grad_norm": 2.2101983287594034, "learning_rate": 2.0860567008966783e-06, "loss": 0.3755, "step": 10416 }, { "epoch": 1.5940321346595256, "grad_norm": 2.5275320370762353, "learning_rate": 2.084542015779595e-06, "loss": 0.3384, "step": 10417 }, { "epoch": 1.5941851568477428, "grad_norm": 2.058712242376689, "learning_rate": 2.083027816781753e-06, "loss": 0.3568, "step": 10418 }, { "epoch": 1.5943381790359603, "grad_norm": 1.9173450107481087, "learning_rate": 2.081514103996144e-06, "loss": 0.2783, "step": 10419 }, { "epoch": 1.5944912012241774, "grad_norm": 2.0613941843193793, "learning_rate": 2.080000877515731e-06, "loss": 0.2779, "step": 10420 }, { "epoch": 1.5946442234123948, "grad_norm": 2.0254402953785022, "learning_rate": 2.0784881374334464e-06, "loss": 0.2917, "step": 10421 }, { "epoch": 1.594797245600612, "grad_norm": 2.200732027895814, "learning_rate": 2.076975883842196e-06, "loss": 0.3322, "step": 10422 }, { "epoch": 1.5949502677888294, "grad_norm": 2.0881731560154146, "learning_rate": 2.075464116834851e-06, "loss": 0.3173, "step": 10423 }, { "epoch": 1.5951032899770468, "grad_norm": 2.1056631422662258, "learning_rate": 2.0739528365042548e-06, "loss": 0.2872, "step": 10424 }, { "epoch": 1.5952563121652639, "grad_norm": 2.169035501067074, "learning_rate": 2.0724420429432234e-06, "loss": 0.3098, "step": 10425 }, { "epoch": 1.5954093343534812, "grad_norm": 2.0153248522385647, "learning_rate": 2.070931736244538e-06, "loss": 0.2683, "step": 10426 }, { "epoch": 1.5955623565416985, "grad_norm": 2.215459845696938, "learning_rate": 2.0694219165009534e-06, "loss": 0.3179, "step": 10427 }, { "epoch": 1.5957153787299159, "grad_norm": 2.1656297130536872, "learning_rate": 2.0679125838051926e-06, "loss": 0.3034, "step": 10428 }, { "epoch": 1.5958684009181332, "grad_norm": 1.9130239058279197, "learning_rate": 2.0664037382499514e-06, "loss": 0.2878, "step": 10429 }, { "epoch": 1.5960214231063503, "grad_norm": 2.121543196047268, "learning_rate": 2.0648953799278905e-06, "loss": 0.3468, "step": 10430 }, { "epoch": 1.5961744452945679, "grad_norm": 1.9881410386253346, "learning_rate": 2.0633875089316524e-06, "loss": 0.3357, "step": 10431 }, { "epoch": 1.596327467482785, "grad_norm": 1.692514957785927, "learning_rate": 2.0618801253538323e-06, "loss": 0.2143, "step": 10432 }, { "epoch": 1.5964804896710023, "grad_norm": 2.1161285582341782, "learning_rate": 2.060373229287005e-06, "loss": 0.2955, "step": 10433 }, { "epoch": 1.5966335118592196, "grad_norm": 2.1256187952301437, "learning_rate": 2.058866820823723e-06, "loss": 0.3186, "step": 10434 }, { "epoch": 1.5967865340474368, "grad_norm": 1.8594388357448477, "learning_rate": 2.0573609000564933e-06, "loss": 0.2979, "step": 10435 }, { "epoch": 1.5969395562356543, "grad_norm": 2.0752161038423074, "learning_rate": 2.0558554670777987e-06, "loss": 0.2839, "step": 10436 }, { "epoch": 1.5970925784238714, "grad_norm": 1.8719102046704, "learning_rate": 2.054350521980104e-06, "loss": 0.2496, "step": 10437 }, { "epoch": 1.5972456006120888, "grad_norm": 2.2235478483167306, "learning_rate": 2.052846064855821e-06, "loss": 0.3171, "step": 10438 }, { "epoch": 1.597398622800306, "grad_norm": 2.0084905451553143, "learning_rate": 2.051342095797354e-06, "loss": 0.2823, "step": 10439 }, { "epoch": 1.5975516449885232, "grad_norm": 2.183017243080984, "learning_rate": 2.049838614897067e-06, "loss": 0.2913, "step": 10440 }, { "epoch": 1.5977046671767408, "grad_norm": 2.010797629418803, "learning_rate": 2.048335622247286e-06, "loss": 0.3119, "step": 10441 }, { "epoch": 1.5978576893649579, "grad_norm": 1.6995369473672757, "learning_rate": 2.0468331179403245e-06, "loss": 0.233, "step": 10442 }, { "epoch": 1.5980107115531752, "grad_norm": 2.253316101580062, "learning_rate": 2.045331102068454e-06, "loss": 0.3347, "step": 10443 }, { "epoch": 1.5981637337413925, "grad_norm": 2.064383868203437, "learning_rate": 2.0438295747239203e-06, "loss": 0.2746, "step": 10444 }, { "epoch": 1.5983167559296096, "grad_norm": 2.2317677875246336, "learning_rate": 2.0423285359989366e-06, "loss": 0.2718, "step": 10445 }, { "epoch": 1.5984697781178272, "grad_norm": 2.0477416135369517, "learning_rate": 2.0408279859856874e-06, "loss": 0.2805, "step": 10446 }, { "epoch": 1.5986228003060443, "grad_norm": 1.957177005619269, "learning_rate": 2.0393279247763287e-06, "loss": 0.2795, "step": 10447 }, { "epoch": 1.5987758224942616, "grad_norm": 2.3488538209818706, "learning_rate": 2.0378283524629837e-06, "loss": 0.3327, "step": 10448 }, { "epoch": 1.598928844682479, "grad_norm": 1.9422180459270484, "learning_rate": 2.036329269137749e-06, "loss": 0.297, "step": 10449 }, { "epoch": 1.599081866870696, "grad_norm": 2.2860116966764186, "learning_rate": 2.0348306748926873e-06, "loss": 0.3003, "step": 10450 }, { "epoch": 1.5992348890589136, "grad_norm": 2.24122516023989, "learning_rate": 2.033332569819834e-06, "loss": 0.3337, "step": 10451 }, { "epoch": 1.5993879112471308, "grad_norm": 2.1824835991354146, "learning_rate": 2.0318349540111924e-06, "loss": 0.2964, "step": 10452 }, { "epoch": 1.599540933435348, "grad_norm": 2.3276355546648695, "learning_rate": 2.030337827558738e-06, "loss": 0.3569, "step": 10453 }, { "epoch": 1.5996939556235654, "grad_norm": 2.0981843178955915, "learning_rate": 2.0288411905544158e-06, "loss": 0.3176, "step": 10454 }, { "epoch": 1.5998469778117828, "grad_norm": 2.2062338938417296, "learning_rate": 2.0273450430901396e-06, "loss": 0.2734, "step": 10455 }, { "epoch": 1.6, "grad_norm": 2.643436905327408, "learning_rate": 2.0258493852577933e-06, "loss": 0.329, "step": 10456 }, { "epoch": 1.6001530221882172, "grad_norm": 1.964606132543307, "learning_rate": 2.0243542171492314e-06, "loss": 0.2629, "step": 10457 }, { "epoch": 1.6003060443764345, "grad_norm": 2.0319908205624424, "learning_rate": 2.0228595388562776e-06, "loss": 0.2485, "step": 10458 }, { "epoch": 1.6004590665646519, "grad_norm": 2.157070383503065, "learning_rate": 2.0213653504707243e-06, "loss": 0.2879, "step": 10459 }, { "epoch": 1.6006120887528692, "grad_norm": 2.0638106425645106, "learning_rate": 2.019871652084342e-06, "loss": 0.3038, "step": 10460 }, { "epoch": 1.6007651109410865, "grad_norm": 2.413922284247371, "learning_rate": 2.0183784437888577e-06, "loss": 0.42, "step": 10461 }, { "epoch": 1.6009181331293036, "grad_norm": 2.1985129144555353, "learning_rate": 2.0168857256759745e-06, "loss": 0.3118, "step": 10462 }, { "epoch": 1.6010711553175212, "grad_norm": 2.2967862222343305, "learning_rate": 2.0153934978373745e-06, "loss": 0.3459, "step": 10463 }, { "epoch": 1.6012241775057383, "grad_norm": 2.1976923086941325, "learning_rate": 2.0139017603646925e-06, "loss": 0.3005, "step": 10464 }, { "epoch": 1.6013771996939556, "grad_norm": 1.9323315456919672, "learning_rate": 2.012410513349543e-06, "loss": 0.2619, "step": 10465 }, { "epoch": 1.601530221882173, "grad_norm": 2.7793989397342838, "learning_rate": 2.010919756883517e-06, "loss": 0.3693, "step": 10466 }, { "epoch": 1.60168324407039, "grad_norm": 2.1105341449495194, "learning_rate": 2.009429491058157e-06, "loss": 0.291, "step": 10467 }, { "epoch": 1.6018362662586076, "grad_norm": 2.212110475794262, "learning_rate": 2.0079397159649938e-06, "loss": 0.3009, "step": 10468 }, { "epoch": 1.6019892884468248, "grad_norm": 2.2904429553853576, "learning_rate": 2.0064504316955204e-06, "loss": 0.2809, "step": 10469 }, { "epoch": 1.602142310635042, "grad_norm": 2.2952348089169443, "learning_rate": 2.0049616383411906e-06, "loss": 0.3095, "step": 10470 }, { "epoch": 1.6022953328232594, "grad_norm": 2.0420024184363377, "learning_rate": 2.0034733359934476e-06, "loss": 0.3069, "step": 10471 }, { "epoch": 1.6024483550114765, "grad_norm": 2.087684753510197, "learning_rate": 2.001985524743689e-06, "loss": 0.2769, "step": 10472 }, { "epoch": 1.602601377199694, "grad_norm": 2.3146003021955575, "learning_rate": 2.0004982046832875e-06, "loss": 0.3284, "step": 10473 }, { "epoch": 1.6027543993879112, "grad_norm": 2.3341543365189037, "learning_rate": 1.9990113759035856e-06, "loss": 0.2941, "step": 10474 }, { "epoch": 1.6029074215761285, "grad_norm": 2.42343083766534, "learning_rate": 1.9975250384958954e-06, "loss": 0.3841, "step": 10475 }, { "epoch": 1.6030604437643459, "grad_norm": 2.0000642760572793, "learning_rate": 1.9960391925514975e-06, "loss": 0.2387, "step": 10476 }, { "epoch": 1.603213465952563, "grad_norm": 2.106691781994404, "learning_rate": 1.9945538381616456e-06, "loss": 0.2961, "step": 10477 }, { "epoch": 1.6033664881407805, "grad_norm": 2.0613737551747042, "learning_rate": 1.9930689754175604e-06, "loss": 0.3309, "step": 10478 }, { "epoch": 1.6035195103289976, "grad_norm": 2.4398182852589105, "learning_rate": 1.9915846044104313e-06, "loss": 0.3037, "step": 10479 }, { "epoch": 1.603672532517215, "grad_norm": 2.1090739150374, "learning_rate": 1.9901007252314185e-06, "loss": 0.2903, "step": 10480 }, { "epoch": 1.6038255547054323, "grad_norm": 2.144499019468979, "learning_rate": 1.988617337971661e-06, "loss": 0.31, "step": 10481 }, { "epoch": 1.6039785768936494, "grad_norm": 2.026883977576024, "learning_rate": 1.9871344427222504e-06, "loss": 0.3783, "step": 10482 }, { "epoch": 1.604131599081867, "grad_norm": 2.411293659913249, "learning_rate": 1.9856520395742575e-06, "loss": 0.3794, "step": 10483 }, { "epoch": 1.604284621270084, "grad_norm": 2.0058040325716755, "learning_rate": 1.9841701286187297e-06, "loss": 0.2296, "step": 10484 }, { "epoch": 1.6044376434583014, "grad_norm": 2.0780361687783278, "learning_rate": 1.982688709946671e-06, "loss": 0.2714, "step": 10485 }, { "epoch": 1.6045906656465188, "grad_norm": 2.0576034473812266, "learning_rate": 1.9812077836490595e-06, "loss": 0.3636, "step": 10486 }, { "epoch": 1.604743687834736, "grad_norm": 1.9414393221409934, "learning_rate": 1.979727349816852e-06, "loss": 0.3066, "step": 10487 }, { "epoch": 1.6048967100229534, "grad_norm": 2.4956241751854003, "learning_rate": 1.9782474085409597e-06, "loss": 0.3205, "step": 10488 }, { "epoch": 1.6050497322111705, "grad_norm": 2.1620399546925717, "learning_rate": 1.9767679599122767e-06, "loss": 0.3238, "step": 10489 }, { "epoch": 1.6052027543993879, "grad_norm": 2.3001838451511665, "learning_rate": 1.9752890040216644e-06, "loss": 0.3707, "step": 10490 }, { "epoch": 1.6053557765876052, "grad_norm": 2.1205275312048237, "learning_rate": 1.9738105409599405e-06, "loss": 0.3203, "step": 10491 }, { "epoch": 1.6055087987758225, "grad_norm": 2.3261888986198023, "learning_rate": 1.972332570817913e-06, "loss": 0.3607, "step": 10492 }, { "epoch": 1.6056618209640399, "grad_norm": 2.2311365973332733, "learning_rate": 1.97085509368635e-06, "loss": 0.2721, "step": 10493 }, { "epoch": 1.605814843152257, "grad_norm": 2.0157940050326655, "learning_rate": 1.9693781096559794e-06, "loss": 0.2747, "step": 10494 }, { "epoch": 1.6059678653404745, "grad_norm": 1.9952768637916527, "learning_rate": 1.9679016188175193e-06, "loss": 0.2393, "step": 10495 }, { "epoch": 1.6061208875286916, "grad_norm": 2.3814074053620207, "learning_rate": 1.9664256212616417e-06, "loss": 0.3275, "step": 10496 }, { "epoch": 1.606273909716909, "grad_norm": 2.105466025822687, "learning_rate": 1.9649501170789943e-06, "loss": 0.2923, "step": 10497 }, { "epoch": 1.6064269319051263, "grad_norm": 1.8103329861441821, "learning_rate": 1.963475106360193e-06, "loss": 0.2546, "step": 10498 }, { "epoch": 1.6065799540933434, "grad_norm": 1.981345827940942, "learning_rate": 1.962000589195825e-06, "loss": 0.2861, "step": 10499 }, { "epoch": 1.606732976281561, "grad_norm": 2.031188520976353, "learning_rate": 1.960526565676445e-06, "loss": 0.3322, "step": 10500 }, { "epoch": 1.606885998469778, "grad_norm": 2.073844327814389, "learning_rate": 1.9590530358925796e-06, "loss": 0.3111, "step": 10501 }, { "epoch": 1.6070390206579954, "grad_norm": 2.224144695075106, "learning_rate": 1.9575799999347247e-06, "loss": 0.3317, "step": 10502 }, { "epoch": 1.6071920428462128, "grad_norm": 2.2658544063551456, "learning_rate": 1.9561074578933424e-06, "loss": 0.4216, "step": 10503 }, { "epoch": 1.6073450650344299, "grad_norm": 2.3868575362093907, "learning_rate": 1.9546354098588693e-06, "loss": 0.3201, "step": 10504 }, { "epoch": 1.6074980872226474, "grad_norm": 2.052778963043461, "learning_rate": 1.9531638559217104e-06, "loss": 0.3202, "step": 10505 }, { "epoch": 1.6076511094108645, "grad_norm": 2.027431317380972, "learning_rate": 1.951692796172238e-06, "loss": 0.3444, "step": 10506 }, { "epoch": 1.6078041315990819, "grad_norm": 1.9896009671346127, "learning_rate": 1.9502222307007956e-06, "loss": 0.2569, "step": 10507 }, { "epoch": 1.6079571537872992, "grad_norm": 2.3888546056899918, "learning_rate": 1.948752159597698e-06, "loss": 0.3455, "step": 10508 }, { "epoch": 1.6081101759755163, "grad_norm": 2.3012402578354036, "learning_rate": 1.9472825829532226e-06, "loss": 0.3532, "step": 10509 }, { "epoch": 1.6082631981637339, "grad_norm": 2.214180934529892, "learning_rate": 1.945813500857633e-06, "loss": 0.3095, "step": 10510 }, { "epoch": 1.608416220351951, "grad_norm": 2.406222516683202, "learning_rate": 1.9443449134011416e-06, "loss": 0.3571, "step": 10511 }, { "epoch": 1.6085692425401683, "grad_norm": 1.972700407695371, "learning_rate": 1.9428768206739403e-06, "loss": 0.3136, "step": 10512 }, { "epoch": 1.6087222647283856, "grad_norm": 2.103254444351623, "learning_rate": 1.9414092227661976e-06, "loss": 0.2693, "step": 10513 }, { "epoch": 1.6088752869166028, "grad_norm": 2.0014590653047346, "learning_rate": 1.939942119768038e-06, "loss": 0.2823, "step": 10514 }, { "epoch": 1.6090283091048203, "grad_norm": 2.558600077343717, "learning_rate": 1.9384755117695607e-06, "loss": 0.4164, "step": 10515 }, { "epoch": 1.6091813312930374, "grad_norm": 1.9254914747715335, "learning_rate": 1.9370093988608453e-06, "loss": 0.2698, "step": 10516 }, { "epoch": 1.6093343534812548, "grad_norm": 2.3909848914674203, "learning_rate": 1.935543781131919e-06, "loss": 0.3229, "step": 10517 }, { "epoch": 1.609487375669472, "grad_norm": 2.024294601203651, "learning_rate": 1.934078658672801e-06, "loss": 0.2838, "step": 10518 }, { "epoch": 1.6096403978576894, "grad_norm": 2.5391493263896154, "learning_rate": 1.9326140315734685e-06, "loss": 0.418, "step": 10519 }, { "epoch": 1.6097934200459068, "grad_norm": 2.267744207696907, "learning_rate": 1.931149899923863e-06, "loss": 0.2905, "step": 10520 }, { "epoch": 1.6099464422341239, "grad_norm": 1.9815819153211391, "learning_rate": 1.9296862638139103e-06, "loss": 0.2529, "step": 10521 }, { "epoch": 1.6100994644223412, "grad_norm": 1.8905960118777305, "learning_rate": 1.928223123333498e-06, "loss": 0.2605, "step": 10522 }, { "epoch": 1.6102524866105585, "grad_norm": 2.1424726322335386, "learning_rate": 1.926760478572476e-06, "loss": 0.322, "step": 10523 }, { "epoch": 1.6104055087987759, "grad_norm": 2.2819861093889036, "learning_rate": 1.9252983296206784e-06, "loss": 0.2884, "step": 10524 }, { "epoch": 1.6105585309869932, "grad_norm": 2.0572968938301877, "learning_rate": 1.9238366765678972e-06, "loss": 0.3299, "step": 10525 }, { "epoch": 1.6107115531752103, "grad_norm": 1.969348700776263, "learning_rate": 1.922375519503902e-06, "loss": 0.2684, "step": 10526 }, { "epoch": 1.6108645753634276, "grad_norm": 2.202527841852906, "learning_rate": 1.9209148585184244e-06, "loss": 0.3269, "step": 10527 }, { "epoch": 1.611017597551645, "grad_norm": 1.940551357103671, "learning_rate": 1.9194546937011716e-06, "loss": 0.2926, "step": 10528 }, { "epoch": 1.6111706197398623, "grad_norm": 2.4080905545015825, "learning_rate": 1.9179950251418168e-06, "loss": 0.3119, "step": 10529 }, { "epoch": 1.6113236419280796, "grad_norm": 2.1771118306491495, "learning_rate": 1.9165358529300047e-06, "loss": 0.2689, "step": 10530 }, { "epoch": 1.6114766641162968, "grad_norm": 2.2555773274378237, "learning_rate": 1.915077177155349e-06, "loss": 0.319, "step": 10531 }, { "epoch": 1.6116296863045143, "grad_norm": 2.0593411592193664, "learning_rate": 1.9136189979074314e-06, "loss": 0.3501, "step": 10532 }, { "epoch": 1.6117827084927314, "grad_norm": 1.712499352328928, "learning_rate": 1.9121613152758067e-06, "loss": 0.2723, "step": 10533 }, { "epoch": 1.6119357306809488, "grad_norm": 2.229661991037623, "learning_rate": 1.910704129349994e-06, "loss": 0.3884, "step": 10534 }, { "epoch": 1.612088752869166, "grad_norm": 2.44309670412781, "learning_rate": 1.909247440219487e-06, "loss": 0.3624, "step": 10535 }, { "epoch": 1.6122417750573832, "grad_norm": 2.0981658994548904, "learning_rate": 1.907791247973746e-06, "loss": 0.3294, "step": 10536 }, { "epoch": 1.6123947972456008, "grad_norm": 2.2664865350975956, "learning_rate": 1.906335552702201e-06, "loss": 0.3435, "step": 10537 }, { "epoch": 1.6125478194338179, "grad_norm": 2.166203828647484, "learning_rate": 1.9048803544942518e-06, "loss": 0.3217, "step": 10538 }, { "epoch": 1.6127008416220352, "grad_norm": 2.1129488376182204, "learning_rate": 1.9034256534392692e-06, "loss": 0.2719, "step": 10539 }, { "epoch": 1.6128538638102525, "grad_norm": 1.9130793331631273, "learning_rate": 1.9019714496265906e-06, "loss": 0.2827, "step": 10540 }, { "epoch": 1.6130068859984696, "grad_norm": 1.9783507773097913, "learning_rate": 1.9005177431455223e-06, "loss": 0.3302, "step": 10541 }, { "epoch": 1.6131599081866872, "grad_norm": 2.266976194472374, "learning_rate": 1.8990645340853498e-06, "loss": 0.3105, "step": 10542 }, { "epoch": 1.6133129303749043, "grad_norm": 2.447531198231755, "learning_rate": 1.8976118225353135e-06, "loss": 0.337, "step": 10543 }, { "epoch": 1.6134659525631216, "grad_norm": 1.9466005333498533, "learning_rate": 1.896159608584629e-06, "loss": 0.2737, "step": 10544 }, { "epoch": 1.613618974751339, "grad_norm": 2.270820493266619, "learning_rate": 1.8947078923224905e-06, "loss": 0.3383, "step": 10545 }, { "epoch": 1.613771996939556, "grad_norm": 2.175481962547, "learning_rate": 1.8932566738380432e-06, "loss": 0.2804, "step": 10546 }, { "epoch": 1.6139250191277736, "grad_norm": 2.3916081886581337, "learning_rate": 1.891805953220419e-06, "loss": 0.3189, "step": 10547 }, { "epoch": 1.6140780413159908, "grad_norm": 2.199411097369911, "learning_rate": 1.890355730558715e-06, "loss": 0.3678, "step": 10548 }, { "epoch": 1.614231063504208, "grad_norm": 2.186939011479005, "learning_rate": 1.888906005941984e-06, "loss": 0.3018, "step": 10549 }, { "epoch": 1.6143840856924254, "grad_norm": 2.1798549473165565, "learning_rate": 1.887456779459269e-06, "loss": 0.2894, "step": 10550 }, { "epoch": 1.6145371078806425, "grad_norm": 2.273224534202083, "learning_rate": 1.8860080511995727e-06, "loss": 0.3557, "step": 10551 }, { "epoch": 1.61469013006886, "grad_norm": 2.1166615630191603, "learning_rate": 1.884559821251859e-06, "loss": 0.3081, "step": 10552 }, { "epoch": 1.6148431522570772, "grad_norm": 1.9044929221608777, "learning_rate": 1.8831120897050759e-06, "loss": 0.2839, "step": 10553 }, { "epoch": 1.6149961744452945, "grad_norm": 2.1024143177946377, "learning_rate": 1.8816648566481343e-06, "loss": 0.2778, "step": 10554 }, { "epoch": 1.6151491966335119, "grad_norm": 2.1048915778898163, "learning_rate": 1.8802181221699124e-06, "loss": 0.3376, "step": 10555 }, { "epoch": 1.6153022188217292, "grad_norm": 2.187077064266125, "learning_rate": 1.8787718863592597e-06, "loss": 0.3192, "step": 10556 }, { "epoch": 1.6154552410099465, "grad_norm": 1.9487619847012356, "learning_rate": 1.8773261493049965e-06, "loss": 0.2797, "step": 10557 }, { "epoch": 1.6156082631981636, "grad_norm": 2.1489496371148378, "learning_rate": 1.8758809110959098e-06, "loss": 0.3082, "step": 10558 }, { "epoch": 1.615761285386381, "grad_norm": 1.877140278626581, "learning_rate": 1.8744361718207593e-06, "loss": 0.3091, "step": 10559 }, { "epoch": 1.6159143075745983, "grad_norm": 2.4222748132697367, "learning_rate": 1.87299193156827e-06, "loss": 0.321, "step": 10560 }, { "epoch": 1.6160673297628156, "grad_norm": 2.402037261974164, "learning_rate": 1.8715481904271393e-06, "loss": 0.3186, "step": 10561 }, { "epoch": 1.616220351951033, "grad_norm": 1.842256510948127, "learning_rate": 1.8701049484860312e-06, "loss": 0.274, "step": 10562 }, { "epoch": 1.61637337413925, "grad_norm": 2.1205220163507255, "learning_rate": 1.8686622058335867e-06, "loss": 0.2792, "step": 10563 }, { "epoch": 1.6165263963274676, "grad_norm": 2.161144153109064, "learning_rate": 1.8672199625584042e-06, "loss": 0.3228, "step": 10564 }, { "epoch": 1.6166794185156848, "grad_norm": 2.288586202317026, "learning_rate": 1.8657782187490558e-06, "loss": 0.3729, "step": 10565 }, { "epoch": 1.616832440703902, "grad_norm": 2.110696657148073, "learning_rate": 1.8643369744940942e-06, "loss": 0.312, "step": 10566 }, { "epoch": 1.6169854628921194, "grad_norm": 2.0382296277093466, "learning_rate": 1.862896229882023e-06, "loss": 0.2529, "step": 10567 }, { "epoch": 1.6171384850803365, "grad_norm": 2.040328917756369, "learning_rate": 1.8614559850013247e-06, "loss": 0.2704, "step": 10568 }, { "epoch": 1.617291507268554, "grad_norm": 2.0325733210458905, "learning_rate": 1.8600162399404576e-06, "loss": 0.2916, "step": 10569 }, { "epoch": 1.6174445294567712, "grad_norm": 1.8749942403353452, "learning_rate": 1.8585769947878318e-06, "loss": 0.2755, "step": 10570 }, { "epoch": 1.6175975516449885, "grad_norm": 1.9462632302198313, "learning_rate": 1.8571382496318446e-06, "loss": 0.2756, "step": 10571 }, { "epoch": 1.6177505738332059, "grad_norm": 2.0840998504651407, "learning_rate": 1.8557000045608553e-06, "loss": 0.2953, "step": 10572 }, { "epoch": 1.617903596021423, "grad_norm": 2.3883921872480305, "learning_rate": 1.8542622596631831e-06, "loss": 0.3609, "step": 10573 }, { "epoch": 1.6180566182096405, "grad_norm": 2.256482256007678, "learning_rate": 1.8528250150271365e-06, "loss": 0.342, "step": 10574 }, { "epoch": 1.6182096403978576, "grad_norm": 2.3045778230254768, "learning_rate": 1.8513882707409791e-06, "loss": 0.3323, "step": 10575 }, { "epoch": 1.618362662586075, "grad_norm": 2.3223204819305505, "learning_rate": 1.8499520268929406e-06, "loss": 0.3327, "step": 10576 }, { "epoch": 1.6185156847742923, "grad_norm": 2.1041983316848696, "learning_rate": 1.8485162835712334e-06, "loss": 0.327, "step": 10577 }, { "epoch": 1.6186687069625094, "grad_norm": 1.9349892975599834, "learning_rate": 1.8470810408640305e-06, "loss": 0.2856, "step": 10578 }, { "epoch": 1.618821729150727, "grad_norm": 2.1178930440291057, "learning_rate": 1.8456462988594748e-06, "loss": 0.3653, "step": 10579 }, { "epoch": 1.618974751338944, "grad_norm": 2.1031709608253695, "learning_rate": 1.8442120576456802e-06, "loss": 0.2744, "step": 10580 }, { "epoch": 1.6191277735271614, "grad_norm": 2.478206559028707, "learning_rate": 1.8427783173107283e-06, "loss": 0.3079, "step": 10581 }, { "epoch": 1.6192807957153788, "grad_norm": 1.8838818778239124, "learning_rate": 1.8413450779426723e-06, "loss": 0.2817, "step": 10582 }, { "epoch": 1.6194338179035959, "grad_norm": 1.912703665415085, "learning_rate": 1.8399123396295305e-06, "loss": 0.3116, "step": 10583 }, { "epoch": 1.6195868400918134, "grad_norm": 2.2889397537082345, "learning_rate": 1.8384801024592957e-06, "loss": 0.3419, "step": 10584 }, { "epoch": 1.6197398622800305, "grad_norm": 2.012218099198962, "learning_rate": 1.8370483665199246e-06, "loss": 0.247, "step": 10585 }, { "epoch": 1.6198928844682479, "grad_norm": 2.6818823744928593, "learning_rate": 1.8356171318993477e-06, "loss": 0.3282, "step": 10586 }, { "epoch": 1.6200459066564652, "grad_norm": 2.2120147507952304, "learning_rate": 1.8341863986854624e-06, "loss": 0.3462, "step": 10587 }, { "epoch": 1.6201989288446825, "grad_norm": 2.052125974963057, "learning_rate": 1.8327561669661343e-06, "loss": 0.3083, "step": 10588 }, { "epoch": 1.6203519510328999, "grad_norm": 1.9840591498263909, "learning_rate": 1.8313264368292005e-06, "loss": 0.2652, "step": 10589 }, { "epoch": 1.620504973221117, "grad_norm": 2.4214798106894975, "learning_rate": 1.8298972083624667e-06, "loss": 0.348, "step": 10590 }, { "epoch": 1.6206579954093343, "grad_norm": 1.8997166893876836, "learning_rate": 1.8284684816537045e-06, "loss": 0.3332, "step": 10591 }, { "epoch": 1.6208110175975516, "grad_norm": 2.369531878415244, "learning_rate": 1.8270402567906654e-06, "loss": 0.3397, "step": 10592 }, { "epoch": 1.620964039785769, "grad_norm": 2.170929041681945, "learning_rate": 1.825612533861053e-06, "loss": 0.2398, "step": 10593 }, { "epoch": 1.6211170619739863, "grad_norm": 1.9795611289660413, "learning_rate": 1.8241853129525522e-06, "loss": 0.2896, "step": 10594 }, { "epoch": 1.6212700841622034, "grad_norm": 2.155517297499104, "learning_rate": 1.8227585941528192e-06, "loss": 0.2697, "step": 10595 }, { "epoch": 1.621423106350421, "grad_norm": 2.228766404040802, "learning_rate": 1.8213323775494684e-06, "loss": 0.2509, "step": 10596 }, { "epoch": 1.621576128538638, "grad_norm": 2.187183161177902, "learning_rate": 1.81990666323009e-06, "loss": 0.3133, "step": 10597 }, { "epoch": 1.6217291507268554, "grad_norm": 1.8590016521156796, "learning_rate": 1.8184814512822479e-06, "loss": 0.2984, "step": 10598 }, { "epoch": 1.6218821729150728, "grad_norm": 2.134756508469202, "learning_rate": 1.8170567417934615e-06, "loss": 0.3212, "step": 10599 }, { "epoch": 1.6220351951032899, "grad_norm": 1.975640103428586, "learning_rate": 1.815632534851235e-06, "loss": 0.344, "step": 10600 }, { "epoch": 1.6221882172915074, "grad_norm": 1.9895208241922568, "learning_rate": 1.8142088305430339e-06, "loss": 0.3345, "step": 10601 }, { "epoch": 1.6223412394797245, "grad_norm": 2.0141924746839344, "learning_rate": 1.8127856289562873e-06, "loss": 0.2954, "step": 10602 }, { "epoch": 1.6224942616679419, "grad_norm": 2.392543783330661, "learning_rate": 1.8113629301784052e-06, "loss": 0.3212, "step": 10603 }, { "epoch": 1.6226472838561592, "grad_norm": 2.2038116103794976, "learning_rate": 1.8099407342967635e-06, "loss": 0.2888, "step": 10604 }, { "epoch": 1.6228003060443763, "grad_norm": 2.0600551971159806, "learning_rate": 1.8085190413986953e-06, "loss": 0.3045, "step": 10605 }, { "epoch": 1.6229533282325939, "grad_norm": 1.9894692711668345, "learning_rate": 1.807097851571521e-06, "loss": 0.3068, "step": 10606 }, { "epoch": 1.623106350420811, "grad_norm": 2.2340507250131147, "learning_rate": 1.8056771649025173e-06, "loss": 0.3865, "step": 10607 }, { "epoch": 1.6232593726090283, "grad_norm": 1.8290540957109105, "learning_rate": 1.8042569814789367e-06, "loss": 0.245, "step": 10608 }, { "epoch": 1.6234123947972456, "grad_norm": 2.3069059295735155, "learning_rate": 1.8028373013879964e-06, "loss": 0.3339, "step": 10609 }, { "epoch": 1.6235654169854628, "grad_norm": 2.1424771142080723, "learning_rate": 1.801418124716884e-06, "loss": 0.2891, "step": 10610 }, { "epoch": 1.6237184391736803, "grad_norm": 2.4355336467958035, "learning_rate": 1.7999994515527586e-06, "loss": 0.3452, "step": 10611 }, { "epoch": 1.6238714613618974, "grad_norm": 2.112129584337102, "learning_rate": 1.7985812819827452e-06, "loss": 0.2674, "step": 10612 }, { "epoch": 1.6240244835501148, "grad_norm": 1.8521996070155238, "learning_rate": 1.7971636160939388e-06, "loss": 0.263, "step": 10613 }, { "epoch": 1.624177505738332, "grad_norm": 2.216065359992707, "learning_rate": 1.7957464539734048e-06, "loss": 0.3601, "step": 10614 }, { "epoch": 1.6243305279265492, "grad_norm": 1.7979366955047242, "learning_rate": 1.7943297957081762e-06, "loss": 0.2903, "step": 10615 }, { "epoch": 1.6244835501147668, "grad_norm": 2.39989702737361, "learning_rate": 1.7929136413852567e-06, "loss": 0.428, "step": 10616 }, { "epoch": 1.6246365723029839, "grad_norm": 2.2140572585003557, "learning_rate": 1.7914979910916152e-06, "loss": 0.31, "step": 10617 }, { "epoch": 1.6247895944912012, "grad_norm": 2.4071733588786715, "learning_rate": 1.7900828449141949e-06, "loss": 0.3316, "step": 10618 }, { "epoch": 1.6249426166794185, "grad_norm": 2.1107088272132812, "learning_rate": 1.788668202939904e-06, "loss": 0.3253, "step": 10619 }, { "epoch": 1.6250956388676359, "grad_norm": 2.166748268503527, "learning_rate": 1.7872540652556192e-06, "loss": 0.3268, "step": 10620 }, { "epoch": 1.6252486610558532, "grad_norm": 2.0274159462473382, "learning_rate": 1.785840431948196e-06, "loss": 0.2423, "step": 10621 }, { "epoch": 1.6254016832440703, "grad_norm": 2.2267309500004413, "learning_rate": 1.7844273031044435e-06, "loss": 0.2898, "step": 10622 }, { "epoch": 1.6255547054322876, "grad_norm": 2.1327367988472776, "learning_rate": 1.783014678811147e-06, "loss": 0.2974, "step": 10623 }, { "epoch": 1.625707727620505, "grad_norm": 2.054036296303469, "learning_rate": 1.781602559155069e-06, "loss": 0.303, "step": 10624 }, { "epoch": 1.6258607498087223, "grad_norm": 1.9841387608214618, "learning_rate": 1.7801909442229258e-06, "loss": 0.2797, "step": 10625 }, { "epoch": 1.6260137719969396, "grad_norm": 1.964548815517046, "learning_rate": 1.7787798341014107e-06, "loss": 0.2808, "step": 10626 }, { "epoch": 1.6261667941851567, "grad_norm": 2.1037655301339906, "learning_rate": 1.7773692288771927e-06, "loss": 0.2973, "step": 10627 }, { "epoch": 1.6263198163733743, "grad_norm": 2.024239937854239, "learning_rate": 1.7759591286368915e-06, "loss": 0.2694, "step": 10628 }, { "epoch": 1.6264728385615914, "grad_norm": 2.094301076866119, "learning_rate": 1.774549533467116e-06, "loss": 0.2871, "step": 10629 }, { "epoch": 1.6266258607498087, "grad_norm": 1.9001204144586141, "learning_rate": 1.773140443454434e-06, "loss": 0.2899, "step": 10630 }, { "epoch": 1.626778882938026, "grad_norm": 2.0305966737469117, "learning_rate": 1.7717318586853772e-06, "loss": 0.3259, "step": 10631 }, { "epoch": 1.6269319051262432, "grad_norm": 2.132174412861507, "learning_rate": 1.7703237792464567e-06, "loss": 0.3068, "step": 10632 }, { "epoch": 1.6270849273144607, "grad_norm": 2.267922670169627, "learning_rate": 1.7689162052241515e-06, "loss": 0.3581, "step": 10633 }, { "epoch": 1.6272379495026779, "grad_norm": 2.0521196074770414, "learning_rate": 1.767509136704897e-06, "loss": 0.2833, "step": 10634 }, { "epoch": 1.6273909716908952, "grad_norm": 1.95384614994354, "learning_rate": 1.7661025737751148e-06, "loss": 0.2801, "step": 10635 }, { "epoch": 1.6275439938791125, "grad_norm": 2.073529431779787, "learning_rate": 1.7646965165211837e-06, "loss": 0.2925, "step": 10636 }, { "epoch": 1.6276970160673296, "grad_norm": 1.681491994873035, "learning_rate": 1.7632909650294571e-06, "loss": 0.2322, "step": 10637 }, { "epoch": 1.6278500382555472, "grad_norm": 2.013831593099779, "learning_rate": 1.7618859193862547e-06, "loss": 0.246, "step": 10638 }, { "epoch": 1.6280030604437643, "grad_norm": 2.222352955584193, "learning_rate": 1.7604813796778652e-06, "loss": 0.3341, "step": 10639 }, { "epoch": 1.6281560826319816, "grad_norm": 2.0854233819651253, "learning_rate": 1.7590773459905475e-06, "loss": 0.2933, "step": 10640 }, { "epoch": 1.628309104820199, "grad_norm": 2.1521488694164495, "learning_rate": 1.7576738184105292e-06, "loss": 0.3917, "step": 10641 }, { "epoch": 1.628462127008416, "grad_norm": 2.0691690612534135, "learning_rate": 1.7562707970240046e-06, "loss": 0.2824, "step": 10642 }, { "epoch": 1.6286151491966336, "grad_norm": 2.4170362752448065, "learning_rate": 1.7548682819171414e-06, "loss": 0.4363, "step": 10643 }, { "epoch": 1.6287681713848507, "grad_norm": 2.362068837715601, "learning_rate": 1.7534662731760687e-06, "loss": 0.3469, "step": 10644 }, { "epoch": 1.628921193573068, "grad_norm": 2.3419814264762713, "learning_rate": 1.7520647708868977e-06, "loss": 0.3054, "step": 10645 }, { "epoch": 1.6290742157612854, "grad_norm": 2.180124099872857, "learning_rate": 1.7506637751356936e-06, "loss": 0.309, "step": 10646 }, { "epoch": 1.6292272379495025, "grad_norm": 2.1503373897058373, "learning_rate": 1.7492632860084957e-06, "loss": 0.3065, "step": 10647 }, { "epoch": 1.62938026013772, "grad_norm": 1.7921363423401993, "learning_rate": 1.7478633035913217e-06, "loss": 0.275, "step": 10648 }, { "epoch": 1.6295332823259372, "grad_norm": 2.052364874867607, "learning_rate": 1.7464638279701385e-06, "loss": 0.3333, "step": 10649 }, { "epoch": 1.6296863045141545, "grad_norm": 2.027999984299312, "learning_rate": 1.7450648592309039e-06, "loss": 0.2747, "step": 10650 }, { "epoch": 1.6298393267023719, "grad_norm": 1.9461536696532593, "learning_rate": 1.7436663974595314e-06, "loss": 0.2983, "step": 10651 }, { "epoch": 1.629992348890589, "grad_norm": 2.1762010864690153, "learning_rate": 1.742268442741899e-06, "loss": 0.3249, "step": 10652 }, { "epoch": 1.6301453710788065, "grad_norm": 1.848456650261626, "learning_rate": 1.7408709951638692e-06, "loss": 0.2466, "step": 10653 }, { "epoch": 1.6302983932670236, "grad_norm": 2.1106357287709314, "learning_rate": 1.7394740548112644e-06, "loss": 0.2985, "step": 10654 }, { "epoch": 1.630451415455241, "grad_norm": 2.2833521016179117, "learning_rate": 1.738077621769867e-06, "loss": 0.2894, "step": 10655 }, { "epoch": 1.6306044376434583, "grad_norm": 2.0606386731477, "learning_rate": 1.7366816961254463e-06, "loss": 0.2897, "step": 10656 }, { "epoch": 1.6307574598316756, "grad_norm": 1.9264653530292764, "learning_rate": 1.7352862779637302e-06, "loss": 0.3205, "step": 10657 }, { "epoch": 1.630910482019893, "grad_norm": 2.0009189375636045, "learning_rate": 1.7338913673704138e-06, "loss": 0.2725, "step": 10658 }, { "epoch": 1.63106350420811, "grad_norm": 2.1654583638020024, "learning_rate": 1.7324969644311672e-06, "loss": 0.2803, "step": 10659 }, { "epoch": 1.6312165263963274, "grad_norm": 1.6338114436592361, "learning_rate": 1.7311030692316244e-06, "loss": 0.2143, "step": 10660 }, { "epoch": 1.6313695485845447, "grad_norm": 2.2733662110358415, "learning_rate": 1.72970968185739e-06, "loss": 0.295, "step": 10661 }, { "epoch": 1.631522570772762, "grad_norm": 2.1242848843223703, "learning_rate": 1.7283168023940366e-06, "loss": 0.3347, "step": 10662 }, { "epoch": 1.6316755929609794, "grad_norm": 2.0020305645316747, "learning_rate": 1.7269244309271083e-06, "loss": 0.263, "step": 10663 }, { "epoch": 1.6318286151491965, "grad_norm": 2.4292633335719582, "learning_rate": 1.7255325675421154e-06, "loss": 0.3645, "step": 10664 }, { "epoch": 1.631981637337414, "grad_norm": 2.2540963885960483, "learning_rate": 1.7241412123245372e-06, "loss": 0.3293, "step": 10665 }, { "epoch": 1.6321346595256312, "grad_norm": 1.7674776566718227, "learning_rate": 1.722750365359822e-06, "loss": 0.2674, "step": 10666 }, { "epoch": 1.6322876817138485, "grad_norm": 2.0795766976304413, "learning_rate": 1.7213600267333884e-06, "loss": 0.351, "step": 10667 }, { "epoch": 1.6324407039020659, "grad_norm": 2.539896141459289, "learning_rate": 1.7199701965306214e-06, "loss": 0.3219, "step": 10668 }, { "epoch": 1.632593726090283, "grad_norm": 2.151179350291576, "learning_rate": 1.718580874836877e-06, "loss": 0.299, "step": 10669 }, { "epoch": 1.6327467482785005, "grad_norm": 2.59810278830666, "learning_rate": 1.717192061737477e-06, "loss": 0.3523, "step": 10670 }, { "epoch": 1.6328997704667176, "grad_norm": 1.9421911778296748, "learning_rate": 1.7158037573177156e-06, "loss": 0.2264, "step": 10671 }, { "epoch": 1.633052792654935, "grad_norm": 2.4319925226909413, "learning_rate": 1.714415961662853e-06, "loss": 0.3519, "step": 10672 }, { "epoch": 1.6332058148431523, "grad_norm": 2.196304094029701, "learning_rate": 1.7130286748581183e-06, "loss": 0.2792, "step": 10673 }, { "epoch": 1.6333588370313694, "grad_norm": 1.8814963990285223, "learning_rate": 1.7116418969887149e-06, "loss": 0.254, "step": 10674 }, { "epoch": 1.633511859219587, "grad_norm": 2.163967544140257, "learning_rate": 1.7102556281398053e-06, "loss": 0.3448, "step": 10675 }, { "epoch": 1.633664881407804, "grad_norm": 2.3130978412784096, "learning_rate": 1.7088698683965243e-06, "loss": 0.3146, "step": 10676 }, { "epoch": 1.6338179035960214, "grad_norm": 1.9372326688003392, "learning_rate": 1.7074846178439853e-06, "loss": 0.2683, "step": 10677 }, { "epoch": 1.6339709257842387, "grad_norm": 2.136531609644606, "learning_rate": 1.7060998765672531e-06, "loss": 0.2711, "step": 10678 }, { "epoch": 1.6341239479724559, "grad_norm": 2.2288583019578354, "learning_rate": 1.7047156446513723e-06, "loss": 0.3449, "step": 10679 }, { "epoch": 1.6342769701606734, "grad_norm": 1.9287035630634422, "learning_rate": 1.7033319221813593e-06, "loss": 0.279, "step": 10680 }, { "epoch": 1.6344299923488905, "grad_norm": 1.9373764849343669, "learning_rate": 1.7019487092421838e-06, "loss": 0.311, "step": 10681 }, { "epoch": 1.6345830145371079, "grad_norm": 2.1086222036034785, "learning_rate": 1.7005660059188034e-06, "loss": 0.3082, "step": 10682 }, { "epoch": 1.6347360367253252, "grad_norm": 1.992491784010115, "learning_rate": 1.6991838122961358e-06, "loss": 0.2944, "step": 10683 }, { "epoch": 1.6348890589135423, "grad_norm": 1.948514553292706, "learning_rate": 1.6978021284590562e-06, "loss": 0.3036, "step": 10684 }, { "epoch": 1.6350420811017599, "grad_norm": 2.3051469233814905, "learning_rate": 1.6964209544924304e-06, "loss": 0.302, "step": 10685 }, { "epoch": 1.635195103289977, "grad_norm": 2.0805217135051053, "learning_rate": 1.695040290481077e-06, "loss": 0.2704, "step": 10686 }, { "epoch": 1.6353481254781943, "grad_norm": 2.130342610927485, "learning_rate": 1.6936601365097883e-06, "loss": 0.3191, "step": 10687 }, { "epoch": 1.6355011476664116, "grad_norm": 2.1838518937601763, "learning_rate": 1.692280492663325e-06, "loss": 0.2934, "step": 10688 }, { "epoch": 1.635654169854629, "grad_norm": 1.6849116928654915, "learning_rate": 1.6909013590264178e-06, "loss": 0.1704, "step": 10689 }, { "epoch": 1.6358071920428463, "grad_norm": 2.2691614114392835, "learning_rate": 1.689522735683763e-06, "loss": 0.285, "step": 10690 }, { "epoch": 1.6359602142310634, "grad_norm": 2.0610432754287547, "learning_rate": 1.6881446227200272e-06, "loss": 0.2869, "step": 10691 }, { "epoch": 1.6361132364192807, "grad_norm": 2.220578815693314, "learning_rate": 1.686767020219846e-06, "loss": 0.263, "step": 10692 }, { "epoch": 1.636266258607498, "grad_norm": 2.237510475416876, "learning_rate": 1.6853899282678243e-06, "loss": 0.3804, "step": 10693 }, { "epoch": 1.6364192807957154, "grad_norm": 2.3743835475077018, "learning_rate": 1.6840133469485342e-06, "loss": 0.3615, "step": 10694 }, { "epoch": 1.6365723029839327, "grad_norm": 2.0430532081213353, "learning_rate": 1.682637276346517e-06, "loss": 0.3135, "step": 10695 }, { "epoch": 1.6367253251721499, "grad_norm": 2.203751506438757, "learning_rate": 1.681261716546282e-06, "loss": 0.3146, "step": 10696 }, { "epoch": 1.6368783473603674, "grad_norm": 2.203912076629452, "learning_rate": 1.6798866676323078e-06, "loss": 0.2949, "step": 10697 }, { "epoch": 1.6370313695485845, "grad_norm": 2.1840574837792697, "learning_rate": 1.6785121296890416e-06, "loss": 0.2636, "step": 10698 }, { "epoch": 1.6371843917368019, "grad_norm": 1.9430320860711479, "learning_rate": 1.6771381028009003e-06, "loss": 0.2496, "step": 10699 }, { "epoch": 1.6373374139250192, "grad_norm": 2.8639026682037336, "learning_rate": 1.6757645870522665e-06, "loss": 0.2489, "step": 10700 }, { "epoch": 1.6374904361132363, "grad_norm": 1.9705293616477655, "learning_rate": 1.6743915825274948e-06, "loss": 0.3078, "step": 10701 }, { "epoch": 1.6376434583014539, "grad_norm": 2.1633250353549585, "learning_rate": 1.6730190893109032e-06, "loss": 0.303, "step": 10702 }, { "epoch": 1.637796480489671, "grad_norm": 2.1591337070007635, "learning_rate": 1.671647107486789e-06, "loss": 0.2965, "step": 10703 }, { "epoch": 1.6379495026778883, "grad_norm": 2.228367036532636, "learning_rate": 1.6702756371394046e-06, "loss": 0.3568, "step": 10704 }, { "epoch": 1.6381025248661056, "grad_norm": 2.035251795089624, "learning_rate": 1.668904678352977e-06, "loss": 0.2833, "step": 10705 }, { "epoch": 1.6382555470543227, "grad_norm": 2.1454705760944965, "learning_rate": 1.6675342312117094e-06, "loss": 0.3327, "step": 10706 }, { "epoch": 1.6384085692425403, "grad_norm": 1.845039677873378, "learning_rate": 1.6661642957997592e-06, "loss": 0.2323, "step": 10707 }, { "epoch": 1.6385615914307574, "grad_norm": 2.128005587177012, "learning_rate": 1.6647948722012586e-06, "loss": 0.3273, "step": 10708 }, { "epoch": 1.6387146136189747, "grad_norm": 2.483060924862472, "learning_rate": 1.6634259605003178e-06, "loss": 0.3343, "step": 10709 }, { "epoch": 1.638867635807192, "grad_norm": 1.8476913614331738, "learning_rate": 1.6620575607809963e-06, "loss": 0.241, "step": 10710 }, { "epoch": 1.6390206579954092, "grad_norm": 1.873225854131616, "learning_rate": 1.6606896731273414e-06, "loss": 0.2667, "step": 10711 }, { "epoch": 1.6391736801836267, "grad_norm": 2.0053587873486225, "learning_rate": 1.6593222976233591e-06, "loss": 0.3382, "step": 10712 }, { "epoch": 1.6393267023718439, "grad_norm": 2.381386238829408, "learning_rate": 1.6579554343530192e-06, "loss": 0.3578, "step": 10713 }, { "epoch": 1.6394797245600612, "grad_norm": 2.0143214868755015, "learning_rate": 1.6565890834002718e-06, "loss": 0.2752, "step": 10714 }, { "epoch": 1.6396327467482785, "grad_norm": 2.0593276322342104, "learning_rate": 1.6552232448490314e-06, "loss": 0.3165, "step": 10715 }, { "epoch": 1.6397857689364956, "grad_norm": 2.1151191992071348, "learning_rate": 1.6538579187831715e-06, "loss": 0.3537, "step": 10716 }, { "epoch": 1.6399387911247132, "grad_norm": 2.2127685314861774, "learning_rate": 1.6524931052865501e-06, "loss": 0.2841, "step": 10717 }, { "epoch": 1.6400918133129303, "grad_norm": 2.193174920060312, "learning_rate": 1.6511288044429818e-06, "loss": 0.2926, "step": 10718 }, { "epoch": 1.6402448355011476, "grad_norm": 2.010676753287093, "learning_rate": 1.6497650163362556e-06, "loss": 0.2297, "step": 10719 }, { "epoch": 1.640397857689365, "grad_norm": 2.2276182035195373, "learning_rate": 1.6484017410501251e-06, "loss": 0.3368, "step": 10720 }, { "epoch": 1.6405508798775823, "grad_norm": 2.036294692508882, "learning_rate": 1.647038978668316e-06, "loss": 0.2753, "step": 10721 }, { "epoch": 1.6407039020657996, "grad_norm": 1.9936948700499255, "learning_rate": 1.6456767292745201e-06, "loss": 0.2486, "step": 10722 }, { "epoch": 1.6408569242540167, "grad_norm": 2.0439149869667412, "learning_rate": 1.644314992952395e-06, "loss": 0.27, "step": 10723 }, { "epoch": 1.641009946442234, "grad_norm": 2.1513599382318205, "learning_rate": 1.6429537697855802e-06, "loss": 0.3388, "step": 10724 }, { "epoch": 1.6411629686304514, "grad_norm": 2.5542635210630666, "learning_rate": 1.6415930598576647e-06, "loss": 0.3008, "step": 10725 }, { "epoch": 1.6413159908186687, "grad_norm": 2.1803917893956997, "learning_rate": 1.6402328632522147e-06, "loss": 0.3298, "step": 10726 }, { "epoch": 1.641469013006886, "grad_norm": 2.0606574091597727, "learning_rate": 1.6388731800527725e-06, "loss": 0.2659, "step": 10727 }, { "epoch": 1.6416220351951032, "grad_norm": 2.5470074979530706, "learning_rate": 1.6375140103428355e-06, "loss": 0.3826, "step": 10728 }, { "epoch": 1.6417750573833207, "grad_norm": 1.9157050495130552, "learning_rate": 1.636155354205875e-06, "loss": 0.2514, "step": 10729 }, { "epoch": 1.6419280795715379, "grad_norm": 2.312294859804467, "learning_rate": 1.6347972117253386e-06, "loss": 0.3057, "step": 10730 }, { "epoch": 1.6420811017597552, "grad_norm": 2.2051790746379005, "learning_rate": 1.633439582984625e-06, "loss": 0.3248, "step": 10731 }, { "epoch": 1.6422341239479725, "grad_norm": 2.1339386560489295, "learning_rate": 1.6320824680671199e-06, "loss": 0.2573, "step": 10732 }, { "epoch": 1.6423871461361896, "grad_norm": 2.2289140984293496, "learning_rate": 1.6307258670561676e-06, "loss": 0.2824, "step": 10733 }, { "epoch": 1.6425401683244072, "grad_norm": 1.9393319401090998, "learning_rate": 1.6293697800350761e-06, "loss": 0.238, "step": 10734 }, { "epoch": 1.6426931905126243, "grad_norm": 2.1686155278174, "learning_rate": 1.6280142070871352e-06, "loss": 0.2616, "step": 10735 }, { "epoch": 1.6428462127008416, "grad_norm": 1.712168777957528, "learning_rate": 1.626659148295595e-06, "loss": 0.2245, "step": 10736 }, { "epoch": 1.642999234889059, "grad_norm": 2.2094323994725866, "learning_rate": 1.6253046037436693e-06, "loss": 0.314, "step": 10737 }, { "epoch": 1.643152257077276, "grad_norm": 1.836421884614005, "learning_rate": 1.6239505735145511e-06, "loss": 0.2484, "step": 10738 }, { "epoch": 1.6433052792654936, "grad_norm": 2.1152088410801375, "learning_rate": 1.6225970576913963e-06, "loss": 0.3049, "step": 10739 }, { "epoch": 1.6434583014537107, "grad_norm": 2.0485816420127, "learning_rate": 1.6212440563573284e-06, "loss": 0.2677, "step": 10740 }, { "epoch": 1.643611323641928, "grad_norm": 2.003129581012719, "learning_rate": 1.6198915695954408e-06, "loss": 0.2557, "step": 10741 }, { "epoch": 1.6437643458301454, "grad_norm": 1.7381668985451146, "learning_rate": 1.6185395974887952e-06, "loss": 0.2338, "step": 10742 }, { "epoch": 1.6439173680183625, "grad_norm": 2.0542402267329876, "learning_rate": 1.6171881401204215e-06, "loss": 0.2303, "step": 10743 }, { "epoch": 1.64407039020658, "grad_norm": 2.116449407226027, "learning_rate": 1.6158371975733178e-06, "loss": 0.2836, "step": 10744 }, { "epoch": 1.6442234123947972, "grad_norm": 2.3970780751820944, "learning_rate": 1.6144867699304512e-06, "loss": 0.3241, "step": 10745 }, { "epoch": 1.6443764345830145, "grad_norm": 1.993119568552427, "learning_rate": 1.6131368572747564e-06, "loss": 0.2613, "step": 10746 }, { "epoch": 1.6445294567712319, "grad_norm": 2.109118877668952, "learning_rate": 1.611787459689136e-06, "loss": 0.2621, "step": 10747 }, { "epoch": 1.644682478959449, "grad_norm": 2.1555885076869563, "learning_rate": 1.6104385772564625e-06, "loss": 0.2521, "step": 10748 }, { "epoch": 1.6448355011476665, "grad_norm": 2.425326814567966, "learning_rate": 1.6090902100595774e-06, "loss": 0.3719, "step": 10749 }, { "epoch": 1.6449885233358836, "grad_norm": 2.1643534965090483, "learning_rate": 1.6077423581812869e-06, "loss": 0.3239, "step": 10750 }, { "epoch": 1.645141545524101, "grad_norm": 1.9450833734330513, "learning_rate": 1.606395021704369e-06, "loss": 0.2902, "step": 10751 }, { "epoch": 1.6452945677123183, "grad_norm": 2.1136401370200053, "learning_rate": 1.6050482007115687e-06, "loss": 0.3279, "step": 10752 }, { "epoch": 1.6454475899005354, "grad_norm": 2.065969679238715, "learning_rate": 1.6037018952855998e-06, "loss": 0.2373, "step": 10753 }, { "epoch": 1.645600612088753, "grad_norm": 2.377631789499768, "learning_rate": 1.6023561055091441e-06, "loss": 0.2707, "step": 10754 }, { "epoch": 1.64575363427697, "grad_norm": 1.8720142905869108, "learning_rate": 1.6010108314648498e-06, "loss": 0.2948, "step": 10755 }, { "epoch": 1.6459066564651874, "grad_norm": 2.0211503929517196, "learning_rate": 1.5996660732353409e-06, "loss": 0.2744, "step": 10756 }, { "epoch": 1.6460596786534047, "grad_norm": 1.8960009862980085, "learning_rate": 1.5983218309031989e-06, "loss": 0.296, "step": 10757 }, { "epoch": 1.646212700841622, "grad_norm": 2.3533780465336545, "learning_rate": 1.5969781045509792e-06, "loss": 0.3347, "step": 10758 }, { "epoch": 1.6463657230298394, "grad_norm": 1.8612954689985926, "learning_rate": 1.595634894261211e-06, "loss": 0.2747, "step": 10759 }, { "epoch": 1.6465187452180565, "grad_norm": 2.2717995170800385, "learning_rate": 1.5942922001163775e-06, "loss": 0.3192, "step": 10760 }, { "epoch": 1.6466717674062739, "grad_norm": 2.157957154061846, "learning_rate": 1.5929500221989448e-06, "loss": 0.3263, "step": 10761 }, { "epoch": 1.6468247895944912, "grad_norm": 2.285376952361559, "learning_rate": 1.5916083605913435e-06, "loss": 0.312, "step": 10762 }, { "epoch": 1.6469778117827085, "grad_norm": 2.137538885073889, "learning_rate": 1.5902672153759613e-06, "loss": 0.273, "step": 10763 }, { "epoch": 1.6471308339709259, "grad_norm": 2.1379714062032216, "learning_rate": 1.58892658663517e-06, "loss": 0.3213, "step": 10764 }, { "epoch": 1.647283856159143, "grad_norm": 1.8926610476687151, "learning_rate": 1.5875864744513048e-06, "loss": 0.2281, "step": 10765 }, { "epoch": 1.6474368783473605, "grad_norm": 2.3552325347343146, "learning_rate": 1.5862468789066587e-06, "loss": 0.2999, "step": 10766 }, { "epoch": 1.6475899005355776, "grad_norm": 2.0190965406363457, "learning_rate": 1.5849078000835083e-06, "loss": 0.2834, "step": 10767 }, { "epoch": 1.647742922723795, "grad_norm": 1.8876247283806757, "learning_rate": 1.583569238064091e-06, "loss": 0.251, "step": 10768 }, { "epoch": 1.6478959449120123, "grad_norm": 2.0416052122157824, "learning_rate": 1.582231192930611e-06, "loss": 0.3513, "step": 10769 }, { "epoch": 1.6480489671002294, "grad_norm": 1.949524372805771, "learning_rate": 1.580893664765245e-06, "loss": 0.2473, "step": 10770 }, { "epoch": 1.648201989288447, "grad_norm": 2.0017501920680085, "learning_rate": 1.5795566536501339e-06, "loss": 0.3034, "step": 10771 }, { "epoch": 1.648355011476664, "grad_norm": 2.3663624170073243, "learning_rate": 1.5782201596673908e-06, "loss": 0.3085, "step": 10772 }, { "epoch": 1.6485080336648814, "grad_norm": 1.9866391576555056, "learning_rate": 1.5768841828990934e-06, "loss": 0.2308, "step": 10773 }, { "epoch": 1.6486610558530987, "grad_norm": 1.9819239180672437, "learning_rate": 1.5755487234272892e-06, "loss": 0.3391, "step": 10774 }, { "epoch": 1.6488140780413159, "grad_norm": 2.210194731319105, "learning_rate": 1.5742137813339942e-06, "loss": 0.2937, "step": 10775 }, { "epoch": 1.6489671002295334, "grad_norm": 2.347026374742834, "learning_rate": 1.5728793567011934e-06, "loss": 0.3295, "step": 10776 }, { "epoch": 1.6491201224177505, "grad_norm": 2.005811317294377, "learning_rate": 1.5715454496108384e-06, "loss": 0.2935, "step": 10777 }, { "epoch": 1.6492731446059679, "grad_norm": 2.153939130371352, "learning_rate": 1.57021206014485e-06, "loss": 0.3447, "step": 10778 }, { "epoch": 1.6494261667941852, "grad_norm": 2.067837021033556, "learning_rate": 1.5688791883851152e-06, "loss": 0.273, "step": 10779 }, { "epoch": 1.6495791889824023, "grad_norm": 2.0123842804250747, "learning_rate": 1.5675468344134936e-06, "loss": 0.2966, "step": 10780 }, { "epoch": 1.6497322111706199, "grad_norm": 2.142451040406505, "learning_rate": 1.566214998311808e-06, "loss": 0.2641, "step": 10781 }, { "epoch": 1.649885233358837, "grad_norm": 2.073097588130826, "learning_rate": 1.5648836801618527e-06, "loss": 0.27, "step": 10782 }, { "epoch": 1.6500382555470543, "grad_norm": 2.483413467106734, "learning_rate": 1.5635528800453892e-06, "loss": 0.3521, "step": 10783 }, { "epoch": 1.6501912777352716, "grad_norm": 2.1582050153024945, "learning_rate": 1.562222598044144e-06, "loss": 0.2844, "step": 10784 }, { "epoch": 1.6503442999234887, "grad_norm": 2.176784737134013, "learning_rate": 1.5608928342398232e-06, "loss": 0.2599, "step": 10785 }, { "epoch": 1.6504973221117063, "grad_norm": 2.5654567717802217, "learning_rate": 1.5595635887140847e-06, "loss": 0.3485, "step": 10786 }, { "epoch": 1.6506503442999234, "grad_norm": 2.190836740951483, "learning_rate": 1.5582348615485632e-06, "loss": 0.2981, "step": 10787 }, { "epoch": 1.6508033664881407, "grad_norm": 1.7488115769482173, "learning_rate": 1.5569066528248676e-06, "loss": 0.2574, "step": 10788 }, { "epoch": 1.650956388676358, "grad_norm": 1.919495686623538, "learning_rate": 1.5555789626245599e-06, "loss": 0.2921, "step": 10789 }, { "epoch": 1.6511094108645754, "grad_norm": 2.268063202918329, "learning_rate": 1.5542517910291843e-06, "loss": 0.3139, "step": 10790 }, { "epoch": 1.6512624330527927, "grad_norm": 2.049890249473881, "learning_rate": 1.5529251381202492e-06, "loss": 0.301, "step": 10791 }, { "epoch": 1.6514154552410099, "grad_norm": 2.2210056204592146, "learning_rate": 1.5515990039792217e-06, "loss": 0.2149, "step": 10792 }, { "epoch": 1.6515684774292272, "grad_norm": 1.8970844610815365, "learning_rate": 1.5502733886875521e-06, "loss": 0.2351, "step": 10793 }, { "epoch": 1.6517214996174445, "grad_norm": 1.987111276832506, "learning_rate": 1.5489482923266519e-06, "loss": 0.2797, "step": 10794 }, { "epoch": 1.6518745218056619, "grad_norm": 2.1966652245652547, "learning_rate": 1.5476237149778928e-06, "loss": 0.3025, "step": 10795 }, { "epoch": 1.6520275439938792, "grad_norm": 1.7725625214287049, "learning_rate": 1.5462996567226296e-06, "loss": 0.2398, "step": 10796 }, { "epoch": 1.6521805661820963, "grad_norm": 2.095813332604148, "learning_rate": 1.5449761176421752e-06, "loss": 0.3259, "step": 10797 }, { "epoch": 1.6523335883703139, "grad_norm": 2.1249341291903736, "learning_rate": 1.5436530978178132e-06, "loss": 0.3051, "step": 10798 }, { "epoch": 1.652486610558531, "grad_norm": 2.414881397404002, "learning_rate": 1.5423305973307966e-06, "loss": 0.355, "step": 10799 }, { "epoch": 1.6526396327467483, "grad_norm": 2.101162420996157, "learning_rate": 1.541008616262345e-06, "loss": 0.3226, "step": 10800 }, { "epoch": 1.6527926549349656, "grad_norm": 2.1531043110040433, "learning_rate": 1.5396871546936453e-06, "loss": 0.2871, "step": 10801 }, { "epoch": 1.6529456771231827, "grad_norm": 2.235894912765083, "learning_rate": 1.5383662127058552e-06, "loss": 0.2943, "step": 10802 }, { "epoch": 1.6530986993114003, "grad_norm": 2.3541968452917006, "learning_rate": 1.5370457903800973e-06, "loss": 0.3262, "step": 10803 }, { "epoch": 1.6532517214996174, "grad_norm": 1.954556761150049, "learning_rate": 1.5357258877974645e-06, "loss": 0.2851, "step": 10804 }, { "epoch": 1.6534047436878347, "grad_norm": 2.526920108336962, "learning_rate": 1.5344065050390155e-06, "loss": 0.3662, "step": 10805 }, { "epoch": 1.653557765876052, "grad_norm": 2.179565146522856, "learning_rate": 1.5330876421857855e-06, "loss": 0.3278, "step": 10806 }, { "epoch": 1.6537107880642692, "grad_norm": 2.074137636510761, "learning_rate": 1.531769299318764e-06, "loss": 0.3092, "step": 10807 }, { "epoch": 1.6538638102524867, "grad_norm": 2.525170469543575, "learning_rate": 1.530451476518915e-06, "loss": 0.3664, "step": 10808 }, { "epoch": 1.6540168324407039, "grad_norm": 2.3703063766098875, "learning_rate": 1.5291341738671782e-06, "loss": 0.3352, "step": 10809 }, { "epoch": 1.6541698546289212, "grad_norm": 2.3707433160320037, "learning_rate": 1.5278173914444472e-06, "loss": 0.3119, "step": 10810 }, { "epoch": 1.6543228768171385, "grad_norm": 2.281609337415562, "learning_rate": 1.5265011293315923e-06, "loss": 0.2825, "step": 10811 }, { "epoch": 1.6544758990053556, "grad_norm": 2.3882911358515, "learning_rate": 1.5251853876094557e-06, "loss": 0.2972, "step": 10812 }, { "epoch": 1.6546289211935732, "grad_norm": 2.0222330062798695, "learning_rate": 1.523870166358834e-06, "loss": 0.3287, "step": 10813 }, { "epoch": 1.6547819433817903, "grad_norm": 2.1272748346654566, "learning_rate": 1.5225554656605057e-06, "loss": 0.2728, "step": 10814 }, { "epoch": 1.6549349655700076, "grad_norm": 2.120587577042761, "learning_rate": 1.5212412855952131e-06, "loss": 0.3192, "step": 10815 }, { "epoch": 1.655087987758225, "grad_norm": 2.259649605135267, "learning_rate": 1.519927626243658e-06, "loss": 0.3069, "step": 10816 }, { "epoch": 1.655241009946442, "grad_norm": 1.7862660695016823, "learning_rate": 1.5186144876865228e-06, "loss": 0.2747, "step": 10817 }, { "epoch": 1.6553940321346596, "grad_norm": 2.1200121696415875, "learning_rate": 1.517301870004454e-06, "loss": 0.2835, "step": 10818 }, { "epoch": 1.6555470543228767, "grad_norm": 2.0400460017927635, "learning_rate": 1.515989773278058e-06, "loss": 0.28, "step": 10819 }, { "epoch": 1.655700076511094, "grad_norm": 1.8889820609466452, "learning_rate": 1.514678197587921e-06, "loss": 0.2624, "step": 10820 }, { "epoch": 1.6558530986993114, "grad_norm": 1.9934048791905552, "learning_rate": 1.5133671430145913e-06, "loss": 0.2727, "step": 10821 }, { "epoch": 1.6560061208875287, "grad_norm": 1.9503046841502563, "learning_rate": 1.5120566096385846e-06, "loss": 0.302, "step": 10822 }, { "epoch": 1.656159143075746, "grad_norm": 1.8025401263324794, "learning_rate": 1.5107465975403868e-06, "loss": 0.2487, "step": 10823 }, { "epoch": 1.6563121652639632, "grad_norm": 2.2381643970107246, "learning_rate": 1.509437106800451e-06, "loss": 0.3447, "step": 10824 }, { "epoch": 1.6564651874521805, "grad_norm": 2.213589525316061, "learning_rate": 1.5081281374991975e-06, "loss": 0.3024, "step": 10825 }, { "epoch": 1.6566182096403979, "grad_norm": 2.1435019979757364, "learning_rate": 1.5068196897170152e-06, "loss": 0.3324, "step": 10826 }, { "epoch": 1.6567712318286152, "grad_norm": 2.666594275663528, "learning_rate": 1.505511763534262e-06, "loss": 0.3843, "step": 10827 }, { "epoch": 1.6569242540168325, "grad_norm": 2.273364682415013, "learning_rate": 1.5042043590312616e-06, "loss": 0.2817, "step": 10828 }, { "epoch": 1.6570772762050496, "grad_norm": 1.972543812149342, "learning_rate": 1.5028974762883065e-06, "loss": 0.3084, "step": 10829 }, { "epoch": 1.6572302983932672, "grad_norm": 1.9225873877071888, "learning_rate": 1.5015911153856588e-06, "loss": 0.2399, "step": 10830 }, { "epoch": 1.6573833205814843, "grad_norm": 1.9259450792736108, "learning_rate": 1.5002852764035468e-06, "loss": 0.2644, "step": 10831 }, { "epoch": 1.6575363427697016, "grad_norm": 2.25980625558073, "learning_rate": 1.4989799594221676e-06, "loss": 0.2851, "step": 10832 }, { "epoch": 1.657689364957919, "grad_norm": 1.9883780300425007, "learning_rate": 1.4976751645216846e-06, "loss": 0.2178, "step": 10833 }, { "epoch": 1.657842387146136, "grad_norm": 2.194469310656908, "learning_rate": 1.4963708917822283e-06, "loss": 0.2982, "step": 10834 }, { "epoch": 1.6579954093343536, "grad_norm": 2.216289901686091, "learning_rate": 1.4950671412839068e-06, "loss": 0.3195, "step": 10835 }, { "epoch": 1.6581484315225707, "grad_norm": 2.5717793300199836, "learning_rate": 1.4937639131067817e-06, "loss": 0.3651, "step": 10836 }, { "epoch": 1.658301453710788, "grad_norm": 2.137589206573856, "learning_rate": 1.4924612073308887e-06, "loss": 0.3486, "step": 10837 }, { "epoch": 1.6584544758990054, "grad_norm": 2.154092460386197, "learning_rate": 1.4911590240362395e-06, "loss": 0.2565, "step": 10838 }, { "epoch": 1.6586074980872225, "grad_norm": 2.0526315485709814, "learning_rate": 1.489857363302799e-06, "loss": 0.2636, "step": 10839 }, { "epoch": 1.65876052027544, "grad_norm": 2.214596346525897, "learning_rate": 1.488556225210508e-06, "loss": 0.3014, "step": 10840 }, { "epoch": 1.6589135424636572, "grad_norm": 2.0302186159970566, "learning_rate": 1.4872556098392798e-06, "loss": 0.306, "step": 10841 }, { "epoch": 1.6590665646518745, "grad_norm": 1.9744823847536939, "learning_rate": 1.485955517268982e-06, "loss": 0.2682, "step": 10842 }, { "epoch": 1.6592195868400919, "grad_norm": 1.970294762784387, "learning_rate": 1.4846559475794653e-06, "loss": 0.3153, "step": 10843 }, { "epoch": 1.659372609028309, "grad_norm": 2.0777672103857747, "learning_rate": 1.4833569008505422e-06, "loss": 0.2852, "step": 10844 }, { "epoch": 1.6595256312165265, "grad_norm": 2.187430071032332, "learning_rate": 1.4820583771619845e-06, "loss": 0.2573, "step": 10845 }, { "epoch": 1.6596786534047436, "grad_norm": 2.263106010448733, "learning_rate": 1.4807603765935451e-06, "loss": 0.2328, "step": 10846 }, { "epoch": 1.659831675592961, "grad_norm": 2.2069090905920876, "learning_rate": 1.4794628992249427e-06, "loss": 0.3041, "step": 10847 }, { "epoch": 1.6599846977811783, "grad_norm": 2.2081482884286685, "learning_rate": 1.4781659451358498e-06, "loss": 0.3292, "step": 10848 }, { "epoch": 1.6601377199693954, "grad_norm": 2.1366601858632275, "learning_rate": 1.4768695144059276e-06, "loss": 0.2761, "step": 10849 }, { "epoch": 1.660290742157613, "grad_norm": 2.106645047341696, "learning_rate": 1.4755736071147897e-06, "loss": 0.3162, "step": 10850 }, { "epoch": 1.66044376434583, "grad_norm": 2.3656997479027413, "learning_rate": 1.474278223342026e-06, "loss": 0.3084, "step": 10851 }, { "epoch": 1.6605967865340474, "grad_norm": 2.192837488975101, "learning_rate": 1.4729833631671887e-06, "loss": 0.2746, "step": 10852 }, { "epoch": 1.6607498087222647, "grad_norm": 2.139378747762477, "learning_rate": 1.4716890266698002e-06, "loss": 0.325, "step": 10853 }, { "epoch": 1.660902830910482, "grad_norm": 2.3221793138150773, "learning_rate": 1.470395213929352e-06, "loss": 0.3499, "step": 10854 }, { "epoch": 1.6610558530986994, "grad_norm": 1.8374502022826251, "learning_rate": 1.4691019250253025e-06, "loss": 0.2696, "step": 10855 }, { "epoch": 1.6612088752869165, "grad_norm": 2.166262856949546, "learning_rate": 1.4678091600370758e-06, "loss": 0.2904, "step": 10856 }, { "epoch": 1.6613618974751339, "grad_norm": 2.2757916046025533, "learning_rate": 1.4665169190440664e-06, "loss": 0.2943, "step": 10857 }, { "epoch": 1.6615149196633512, "grad_norm": 1.9638029604739888, "learning_rate": 1.4652252021256375e-06, "loss": 0.2033, "step": 10858 }, { "epoch": 1.6616679418515685, "grad_norm": 2.235095501903873, "learning_rate": 1.4639340093611165e-06, "loss": 0.3472, "step": 10859 }, { "epoch": 1.6618209640397859, "grad_norm": 2.1923690145521393, "learning_rate": 1.4626433408298014e-06, "loss": 0.3573, "step": 10860 }, { "epoch": 1.661973986228003, "grad_norm": 1.927484721477276, "learning_rate": 1.4613531966109561e-06, "loss": 0.2985, "step": 10861 }, { "epoch": 1.6621270084162203, "grad_norm": 2.0199650132275404, "learning_rate": 1.4600635767838155e-06, "loss": 0.2646, "step": 10862 }, { "epoch": 1.6622800306044376, "grad_norm": 2.2089164022086027, "learning_rate": 1.458774481427576e-06, "loss": 0.3142, "step": 10863 }, { "epoch": 1.662433052792655, "grad_norm": 2.0269913480817245, "learning_rate": 1.4574859106214144e-06, "loss": 0.2498, "step": 10864 }, { "epoch": 1.6625860749808723, "grad_norm": 2.3356780814963773, "learning_rate": 1.4561978644444596e-06, "loss": 0.2692, "step": 10865 }, { "epoch": 1.6627390971690894, "grad_norm": 2.055522130474578, "learning_rate": 1.4549103429758138e-06, "loss": 0.2867, "step": 10866 }, { "epoch": 1.662892119357307, "grad_norm": 1.9009789290007288, "learning_rate": 1.4536233462945582e-06, "loss": 0.266, "step": 10867 }, { "epoch": 1.663045141545524, "grad_norm": 1.8610570330440441, "learning_rate": 1.4523368744797239e-06, "loss": 0.2756, "step": 10868 }, { "epoch": 1.6631981637337414, "grad_norm": 2.0347304270079927, "learning_rate": 1.4510509276103179e-06, "loss": 0.2777, "step": 10869 }, { "epoch": 1.6633511859219587, "grad_norm": 2.0341956783646835, "learning_rate": 1.4497655057653237e-06, "loss": 0.2307, "step": 10870 }, { "epoch": 1.6635042081101759, "grad_norm": 2.103395181152377, "learning_rate": 1.4484806090236736e-06, "loss": 0.2885, "step": 10871 }, { "epoch": 1.6636572302983934, "grad_norm": 2.4448504211780824, "learning_rate": 1.4471962374642845e-06, "loss": 0.3329, "step": 10872 }, { "epoch": 1.6638102524866105, "grad_norm": 2.098333249073084, "learning_rate": 1.4459123911660366e-06, "loss": 0.2922, "step": 10873 }, { "epoch": 1.6639632746748279, "grad_norm": 2.0247796530057687, "learning_rate": 1.444629070207767e-06, "loss": 0.2942, "step": 10874 }, { "epoch": 1.6641162968630452, "grad_norm": 2.1636751847253457, "learning_rate": 1.443346274668298e-06, "loss": 0.3873, "step": 10875 }, { "epoch": 1.6642693190512623, "grad_norm": 2.231073006220205, "learning_rate": 1.44206400462641e-06, "loss": 0.3651, "step": 10876 }, { "epoch": 1.6644223412394799, "grad_norm": 1.656595975610177, "learning_rate": 1.4407822601608457e-06, "loss": 0.2371, "step": 10877 }, { "epoch": 1.664575363427697, "grad_norm": 1.969119693034, "learning_rate": 1.4395010413503297e-06, "loss": 0.235, "step": 10878 }, { "epoch": 1.6647283856159143, "grad_norm": 1.8045403787736842, "learning_rate": 1.4382203482735425e-06, "loss": 0.2718, "step": 10879 }, { "epoch": 1.6648814078041316, "grad_norm": 1.9410478871577124, "learning_rate": 1.4369401810091377e-06, "loss": 0.3118, "step": 10880 }, { "epoch": 1.6650344299923487, "grad_norm": 2.1581941318540356, "learning_rate": 1.435660539635736e-06, "loss": 0.2823, "step": 10881 }, { "epoch": 1.6651874521805663, "grad_norm": 2.3185022235247192, "learning_rate": 1.4343814242319243e-06, "loss": 0.4697, "step": 10882 }, { "epoch": 1.6653404743687834, "grad_norm": 1.7032804581381125, "learning_rate": 1.4331028348762577e-06, "loss": 0.2148, "step": 10883 }, { "epoch": 1.6654934965570007, "grad_norm": 2.0705076500454287, "learning_rate": 1.43182477164726e-06, "loss": 0.2949, "step": 10884 }, { "epoch": 1.665646518745218, "grad_norm": 2.0861927158849665, "learning_rate": 1.4305472346234227e-06, "loss": 0.3027, "step": 10885 }, { "epoch": 1.6657995409334352, "grad_norm": 2.204515921292248, "learning_rate": 1.4292702238832035e-06, "loss": 0.3218, "step": 10886 }, { "epoch": 1.6659525631216527, "grad_norm": 2.1139004154729792, "learning_rate": 1.4279937395050258e-06, "loss": 0.2753, "step": 10887 }, { "epoch": 1.6661055853098699, "grad_norm": 1.9279461931220945, "learning_rate": 1.4267177815672906e-06, "loss": 0.2917, "step": 10888 }, { "epoch": 1.6662586074980872, "grad_norm": 1.877735560651574, "learning_rate": 1.4254423501483538e-06, "loss": 0.2467, "step": 10889 }, { "epoch": 1.6664116296863045, "grad_norm": 2.3057435678936398, "learning_rate": 1.4241674453265441e-06, "loss": 0.289, "step": 10890 }, { "epoch": 1.6665646518745219, "grad_norm": 2.1293163309803123, "learning_rate": 1.4228930671801645e-06, "loss": 0.2736, "step": 10891 }, { "epoch": 1.6667176740627392, "grad_norm": 2.3130308684961656, "learning_rate": 1.421619215787473e-06, "loss": 0.3645, "step": 10892 }, { "epoch": 1.6668706962509563, "grad_norm": 1.9476549249402393, "learning_rate": 1.4203458912267032e-06, "loss": 0.1831, "step": 10893 }, { "epoch": 1.6670237184391736, "grad_norm": 1.5150591864711775, "learning_rate": 1.4190730935760589e-06, "loss": 0.2004, "step": 10894 }, { "epoch": 1.667176740627391, "grad_norm": 2.1337667385478625, "learning_rate": 1.4178008229137008e-06, "loss": 0.3594, "step": 10895 }, { "epoch": 1.6673297628156083, "grad_norm": 2.0534710712272726, "learning_rate": 1.416529079317771e-06, "loss": 0.2937, "step": 10896 }, { "epoch": 1.6674827850038256, "grad_norm": 2.0695438008325646, "learning_rate": 1.4152578628663706e-06, "loss": 0.3173, "step": 10897 }, { "epoch": 1.6676358071920427, "grad_norm": 1.994886244529576, "learning_rate": 1.4139871736375643e-06, "loss": 0.3141, "step": 10898 }, { "epoch": 1.6677888293802603, "grad_norm": 2.387752398099085, "learning_rate": 1.4127170117093958e-06, "loss": 0.375, "step": 10899 }, { "epoch": 1.6679418515684774, "grad_norm": 2.190527276810829, "learning_rate": 1.4114473771598702e-06, "loss": 0.2836, "step": 10900 }, { "epoch": 1.6680948737566947, "grad_norm": 2.670201984085481, "learning_rate": 1.4101782700669597e-06, "loss": 0.3888, "step": 10901 }, { "epoch": 1.668247895944912, "grad_norm": 2.1367696600688304, "learning_rate": 1.4089096905086053e-06, "loss": 0.3103, "step": 10902 }, { "epoch": 1.6684009181331292, "grad_norm": 1.7876575207949825, "learning_rate": 1.4076416385627146e-06, "loss": 0.2303, "step": 10903 }, { "epoch": 1.6685539403213467, "grad_norm": 2.411778716322336, "learning_rate": 1.4063741143071652e-06, "loss": 0.2621, "step": 10904 }, { "epoch": 1.6687069625095639, "grad_norm": 2.160323074264381, "learning_rate": 1.4051071178197996e-06, "loss": 0.2919, "step": 10905 }, { "epoch": 1.6688599846977812, "grad_norm": 2.0159113695934696, "learning_rate": 1.4038406491784285e-06, "loss": 0.2663, "step": 10906 }, { "epoch": 1.6690130068859985, "grad_norm": 1.8991370060184651, "learning_rate": 1.4025747084608322e-06, "loss": 0.2995, "step": 10907 }, { "epoch": 1.6691660290742156, "grad_norm": 1.9419152106094528, "learning_rate": 1.401309295744756e-06, "loss": 0.2903, "step": 10908 }, { "epoch": 1.6693190512624332, "grad_norm": 1.7963355100642329, "learning_rate": 1.4000444111079147e-06, "loss": 0.2973, "step": 10909 }, { "epoch": 1.6694720734506503, "grad_norm": 1.8830633868108124, "learning_rate": 1.3987800546279895e-06, "loss": 0.2105, "step": 10910 }, { "epoch": 1.6696250956388676, "grad_norm": 2.2856483161991092, "learning_rate": 1.397516226382629e-06, "loss": 0.2671, "step": 10911 }, { "epoch": 1.669778117827085, "grad_norm": 1.771961190145075, "learning_rate": 1.3962529264494507e-06, "loss": 0.1899, "step": 10912 }, { "epoch": 1.669931140015302, "grad_norm": 1.8975790556274168, "learning_rate": 1.3949901549060373e-06, "loss": 0.2446, "step": 10913 }, { "epoch": 1.6700841622035196, "grad_norm": 1.596883540271513, "learning_rate": 1.3937279118299417e-06, "loss": 0.1948, "step": 10914 }, { "epoch": 1.6702371843917367, "grad_norm": 2.1678140693547583, "learning_rate": 1.392466197298683e-06, "loss": 0.2797, "step": 10915 }, { "epoch": 1.670390206579954, "grad_norm": 2.5530718585428835, "learning_rate": 1.3912050113897457e-06, "loss": 0.3456, "step": 10916 }, { "epoch": 1.6705432287681714, "grad_norm": 2.055834230738391, "learning_rate": 1.3899443541805912e-06, "loss": 0.272, "step": 10917 }, { "epoch": 1.6706962509563885, "grad_norm": 2.2459829700144107, "learning_rate": 1.3886842257486344e-06, "loss": 0.3651, "step": 10918 }, { "epoch": 1.670849273144606, "grad_norm": 2.1164224680206662, "learning_rate": 1.3874246261712644e-06, "loss": 0.2782, "step": 10919 }, { "epoch": 1.6710022953328232, "grad_norm": 1.8515975103873454, "learning_rate": 1.3861655555258457e-06, "loss": 0.2176, "step": 10920 }, { "epoch": 1.6711553175210405, "grad_norm": 2.025232486640509, "learning_rate": 1.3849070138896948e-06, "loss": 0.2984, "step": 10921 }, { "epoch": 1.6713083397092579, "grad_norm": 2.182041754809988, "learning_rate": 1.3836490013401049e-06, "loss": 0.3218, "step": 10922 }, { "epoch": 1.6714613618974752, "grad_norm": 2.104884022879343, "learning_rate": 1.3823915179543411e-06, "loss": 0.3369, "step": 10923 }, { "epoch": 1.6716143840856925, "grad_norm": 2.259995500180198, "learning_rate": 1.381134563809623e-06, "loss": 0.3001, "step": 10924 }, { "epoch": 1.6717674062739096, "grad_norm": 1.9805033206567582, "learning_rate": 1.3798781389831505e-06, "loss": 0.245, "step": 10925 }, { "epoch": 1.671920428462127, "grad_norm": 2.108247142452472, "learning_rate": 1.3786222435520846e-06, "loss": 0.2678, "step": 10926 }, { "epoch": 1.6720734506503443, "grad_norm": 2.2290200835316747, "learning_rate": 1.377366877593551e-06, "loss": 0.2508, "step": 10927 }, { "epoch": 1.6722264728385616, "grad_norm": 2.2448037125405973, "learning_rate": 1.3761120411846506e-06, "loss": 0.3183, "step": 10928 }, { "epoch": 1.672379495026779, "grad_norm": 2.168758056304277, "learning_rate": 1.374857734402446e-06, "loss": 0.3377, "step": 10929 }, { "epoch": 1.672532517214996, "grad_norm": 1.8955123459943806, "learning_rate": 1.3736039573239712e-06, "loss": 0.2873, "step": 10930 }, { "epoch": 1.6726855394032136, "grad_norm": 2.1207109975957437, "learning_rate": 1.3723507100262234e-06, "loss": 0.2861, "step": 10931 }, { "epoch": 1.6728385615914307, "grad_norm": 1.9763204898257254, "learning_rate": 1.37109799258617e-06, "loss": 0.2584, "step": 10932 }, { "epoch": 1.672991583779648, "grad_norm": 2.475408425541011, "learning_rate": 1.3698458050807451e-06, "loss": 0.3446, "step": 10933 }, { "epoch": 1.6731446059678654, "grad_norm": 2.092829234547134, "learning_rate": 1.3685941475868502e-06, "loss": 0.3435, "step": 10934 }, { "epoch": 1.6732976281560825, "grad_norm": 2.40580483114001, "learning_rate": 1.3673430201813553e-06, "loss": 0.3217, "step": 10935 }, { "epoch": 1.6734506503443, "grad_norm": 2.17627753965489, "learning_rate": 1.3660924229410976e-06, "loss": 0.2959, "step": 10936 }, { "epoch": 1.6736036725325172, "grad_norm": 2.753710906264951, "learning_rate": 1.3648423559428792e-06, "loss": 0.3364, "step": 10937 }, { "epoch": 1.6737566947207345, "grad_norm": 2.4017439533471885, "learning_rate": 1.3635928192634728e-06, "loss": 0.3363, "step": 10938 }, { "epoch": 1.6739097169089519, "grad_norm": 2.0006436609312455, "learning_rate": 1.3623438129796173e-06, "loss": 0.3522, "step": 10939 }, { "epoch": 1.674062739097169, "grad_norm": 2.0945862255681624, "learning_rate": 1.361095337168019e-06, "loss": 0.283, "step": 10940 }, { "epoch": 1.6742157612853865, "grad_norm": 1.9868929754295699, "learning_rate": 1.3598473919053524e-06, "loss": 0.2842, "step": 10941 }, { "epoch": 1.6743687834736036, "grad_norm": 1.7594827525908399, "learning_rate": 1.3585999772682568e-06, "loss": 0.2647, "step": 10942 }, { "epoch": 1.674521805661821, "grad_norm": 2.061653215879647, "learning_rate": 1.3573530933333423e-06, "loss": 0.2889, "step": 10943 }, { "epoch": 1.6746748278500383, "grad_norm": 2.0181626743821615, "learning_rate": 1.3561067401771854e-06, "loss": 0.3083, "step": 10944 }, { "epoch": 1.6748278500382554, "grad_norm": 2.2322001593250973, "learning_rate": 1.354860917876325e-06, "loss": 0.3274, "step": 10945 }, { "epoch": 1.674980872226473, "grad_norm": 2.033061742018054, "learning_rate": 1.3536156265072808e-06, "loss": 0.2822, "step": 10946 }, { "epoch": 1.67513389441469, "grad_norm": 2.0820930216656577, "learning_rate": 1.352370866146524e-06, "loss": 0.2432, "step": 10947 }, { "epoch": 1.6752869166029074, "grad_norm": 2.2752337340265045, "learning_rate": 1.3511266368704989e-06, "loss": 0.2551, "step": 10948 }, { "epoch": 1.6754399387911247, "grad_norm": 2.2610051373444007, "learning_rate": 1.3498829387556268e-06, "loss": 0.3106, "step": 10949 }, { "epoch": 1.6755929609793418, "grad_norm": 1.7493537756146116, "learning_rate": 1.3486397718782796e-06, "loss": 0.2036, "step": 10950 }, { "epoch": 1.6757459831675594, "grad_norm": 2.3185756256801606, "learning_rate": 1.3473971363148074e-06, "loss": 0.3175, "step": 10951 }, { "epoch": 1.6758990053557765, "grad_norm": 2.425341043817578, "learning_rate": 1.3461550321415295e-06, "loss": 0.3125, "step": 10952 }, { "epoch": 1.6760520275439938, "grad_norm": 2.432317825748467, "learning_rate": 1.3449134594347213e-06, "loss": 0.3109, "step": 10953 }, { "epoch": 1.6762050497322112, "grad_norm": 2.5743220175885297, "learning_rate": 1.3436724182706396e-06, "loss": 0.305, "step": 10954 }, { "epoch": 1.6763580719204285, "grad_norm": 1.9770393582495958, "learning_rate": 1.342431908725499e-06, "loss": 0.2984, "step": 10955 }, { "epoch": 1.6765110941086458, "grad_norm": 2.1248514001509116, "learning_rate": 1.3411919308754794e-06, "loss": 0.3008, "step": 10956 }, { "epoch": 1.676664116296863, "grad_norm": 2.0468319850996872, "learning_rate": 1.3399524847967405e-06, "loss": 0.2727, "step": 10957 }, { "epoch": 1.6768171384850803, "grad_norm": 2.036751420201803, "learning_rate": 1.3387135705653986e-06, "loss": 0.274, "step": 10958 }, { "epoch": 1.6769701606732976, "grad_norm": 1.9180774151723226, "learning_rate": 1.3374751882575354e-06, "loss": 0.2751, "step": 10959 }, { "epoch": 1.677123182861515, "grad_norm": 2.4520454556972746, "learning_rate": 1.336237337949211e-06, "loss": 0.3382, "step": 10960 }, { "epoch": 1.6772762050497323, "grad_norm": 1.761323796608222, "learning_rate": 1.3350000197164436e-06, "loss": 0.2501, "step": 10961 }, { "epoch": 1.6774292272379494, "grad_norm": 2.062316820272847, "learning_rate": 1.3337632336352234e-06, "loss": 0.2693, "step": 10962 }, { "epoch": 1.677582249426167, "grad_norm": 2.0720852517253214, "learning_rate": 1.3325269797815066e-06, "loss": 0.3164, "step": 10963 }, { "epoch": 1.677735271614384, "grad_norm": 2.27703499150179, "learning_rate": 1.3312912582312143e-06, "loss": 0.3367, "step": 10964 }, { "epoch": 1.6778882938026014, "grad_norm": 1.960915018382373, "learning_rate": 1.3300560690602382e-06, "loss": 0.2368, "step": 10965 }, { "epoch": 1.6780413159908187, "grad_norm": 1.9766829049595693, "learning_rate": 1.328821412344433e-06, "loss": 0.2469, "step": 10966 }, { "epoch": 1.6781943381790358, "grad_norm": 1.9250997866626893, "learning_rate": 1.3275872881596319e-06, "loss": 0.2875, "step": 10967 }, { "epoch": 1.6783473603672534, "grad_norm": 1.549837070098533, "learning_rate": 1.3263536965816203e-06, "loss": 0.2024, "step": 10968 }, { "epoch": 1.6785003825554705, "grad_norm": 1.7642085141734583, "learning_rate": 1.3251206376861569e-06, "loss": 0.248, "step": 10969 }, { "epoch": 1.6786534047436878, "grad_norm": 2.0836891565839486, "learning_rate": 1.3238881115489755e-06, "loss": 0.2646, "step": 10970 }, { "epoch": 1.6788064269319052, "grad_norm": 2.008808530117174, "learning_rate": 1.3226561182457642e-06, "loss": 0.3069, "step": 10971 }, { "epoch": 1.6789594491201223, "grad_norm": 2.25601227776761, "learning_rate": 1.321424657852185e-06, "loss": 0.3359, "step": 10972 }, { "epoch": 1.6791124713083398, "grad_norm": 2.028677462966108, "learning_rate": 1.3201937304438728e-06, "loss": 0.3192, "step": 10973 }, { "epoch": 1.679265493496557, "grad_norm": 1.9466678524250804, "learning_rate": 1.3189633360964137e-06, "loss": 0.3153, "step": 10974 }, { "epoch": 1.6794185156847743, "grad_norm": 2.045679294294252, "learning_rate": 1.3177334748853798e-06, "loss": 0.3224, "step": 10975 }, { "epoch": 1.6795715378729916, "grad_norm": 1.902697425925076, "learning_rate": 1.3165041468863004e-06, "loss": 0.2409, "step": 10976 }, { "epoch": 1.6797245600612087, "grad_norm": 1.9497283206798874, "learning_rate": 1.3152753521746676e-06, "loss": 0.2643, "step": 10977 }, { "epoch": 1.6798775822494263, "grad_norm": 2.1252780328224996, "learning_rate": 1.3140470908259517e-06, "loss": 0.327, "step": 10978 }, { "epoch": 1.6800306044376434, "grad_norm": 2.0928532425925597, "learning_rate": 1.312819362915585e-06, "loss": 0.2725, "step": 10979 }, { "epoch": 1.6801836266258607, "grad_norm": 2.2119579928711413, "learning_rate": 1.3115921685189625e-06, "loss": 0.2886, "step": 10980 }, { "epoch": 1.680336648814078, "grad_norm": 1.7936452479086349, "learning_rate": 1.3103655077114563e-06, "loss": 0.2175, "step": 10981 }, { "epoch": 1.6804896710022952, "grad_norm": 2.2272606564737445, "learning_rate": 1.3091393805683972e-06, "loss": 0.3237, "step": 10982 }, { "epoch": 1.6806426931905127, "grad_norm": 2.356171938700306, "learning_rate": 1.3079137871650894e-06, "loss": 0.2936, "step": 10983 }, { "epoch": 1.6807957153787298, "grad_norm": 2.232352653535342, "learning_rate": 1.306688727576798e-06, "loss": 0.3052, "step": 10984 }, { "epoch": 1.6809487375669472, "grad_norm": 2.1782319417814153, "learning_rate": 1.3054642018787612e-06, "loss": 0.3149, "step": 10985 }, { "epoch": 1.6811017597551645, "grad_norm": 2.2406002945095738, "learning_rate": 1.3042402101461804e-06, "loss": 0.2902, "step": 10986 }, { "epoch": 1.6812547819433816, "grad_norm": 1.9244081104162174, "learning_rate": 1.303016752454226e-06, "loss": 0.2905, "step": 10987 }, { "epoch": 1.6814078041315992, "grad_norm": 1.797026733705302, "learning_rate": 1.3017938288780363e-06, "loss": 0.3086, "step": 10988 }, { "epoch": 1.6815608263198163, "grad_norm": 1.9509682390662428, "learning_rate": 1.300571439492715e-06, "loss": 0.2575, "step": 10989 }, { "epoch": 1.6817138485080336, "grad_norm": 2.45570148479363, "learning_rate": 1.2993495843733339e-06, "loss": 0.3735, "step": 10990 }, { "epoch": 1.681866870696251, "grad_norm": 1.949368989482816, "learning_rate": 1.298128263594932e-06, "loss": 0.3218, "step": 10991 }, { "epoch": 1.6820198928844683, "grad_norm": 2.067653056965363, "learning_rate": 1.2969074772325151e-06, "loss": 0.3221, "step": 10992 }, { "epoch": 1.6821729150726856, "grad_norm": 2.311033815856632, "learning_rate": 1.2956872253610576e-06, "loss": 0.3396, "step": 10993 }, { "epoch": 1.6823259372609027, "grad_norm": 1.9032585435009628, "learning_rate": 1.294467508055498e-06, "loss": 0.2379, "step": 10994 }, { "epoch": 1.68247895944912, "grad_norm": 2.2597516199257406, "learning_rate": 1.2932483253907446e-06, "loss": 0.2719, "step": 10995 }, { "epoch": 1.6826319816373374, "grad_norm": 2.196069288251133, "learning_rate": 1.2920296774416742e-06, "loss": 0.3325, "step": 10996 }, { "epoch": 1.6827850038255547, "grad_norm": 2.089356908329131, "learning_rate": 1.2908115642831254e-06, "loss": 0.2846, "step": 10997 }, { "epoch": 1.682938026013772, "grad_norm": 2.500008607734077, "learning_rate": 1.2895939859899075e-06, "loss": 0.3552, "step": 10998 }, { "epoch": 1.6830910482019892, "grad_norm": 2.214635489148657, "learning_rate": 1.2883769426368032e-06, "loss": 0.3502, "step": 10999 }, { "epoch": 1.6832440703902067, "grad_norm": 1.915744367799281, "learning_rate": 1.2871604342985478e-06, "loss": 0.2487, "step": 11000 }, { "epoch": 1.6833970925784238, "grad_norm": 2.2836247432485948, "learning_rate": 1.2859444610498539e-06, "loss": 0.293, "step": 11001 }, { "epoch": 1.6835501147666412, "grad_norm": 1.904948231183335, "learning_rate": 1.284729022965403e-06, "loss": 0.2842, "step": 11002 }, { "epoch": 1.6837031369548585, "grad_norm": 2.108321600771051, "learning_rate": 1.2835141201198343e-06, "loss": 0.2656, "step": 11003 }, { "epoch": 1.6838561591430756, "grad_norm": 2.093179930514413, "learning_rate": 1.2822997525877633e-06, "loss": 0.2816, "step": 11004 }, { "epoch": 1.6840091813312932, "grad_norm": 2.377483764466178, "learning_rate": 1.281085920443772e-06, "loss": 0.3617, "step": 11005 }, { "epoch": 1.6841622035195103, "grad_norm": 2.167029247250692, "learning_rate": 1.2798726237623971e-06, "loss": 0.2789, "step": 11006 }, { "epoch": 1.6843152257077276, "grad_norm": 2.182866340215894, "learning_rate": 1.2786598626181613e-06, "loss": 0.2687, "step": 11007 }, { "epoch": 1.684468247895945, "grad_norm": 1.9979186288108053, "learning_rate": 1.2774476370855426e-06, "loss": 0.2822, "step": 11008 }, { "epoch": 1.684621270084162, "grad_norm": 2.1936633301966597, "learning_rate": 1.276235947238983e-06, "loss": 0.2928, "step": 11009 }, { "epoch": 1.6847742922723796, "grad_norm": 2.104500943307297, "learning_rate": 1.2750247931529035e-06, "loss": 0.2523, "step": 11010 }, { "epoch": 1.6849273144605967, "grad_norm": 1.920693123555798, "learning_rate": 1.273814174901684e-06, "loss": 0.3069, "step": 11011 }, { "epoch": 1.685080336648814, "grad_norm": 2.299114587660011, "learning_rate": 1.272604092559674e-06, "loss": 0.295, "step": 11012 }, { "epoch": 1.6852333588370314, "grad_norm": 2.1949856289025647, "learning_rate": 1.2713945462011868e-06, "loss": 0.2994, "step": 11013 }, { "epoch": 1.6853863810252485, "grad_norm": 1.9841400340099198, "learning_rate": 1.2701855359005077e-06, "loss": 0.2852, "step": 11014 }, { "epoch": 1.685539403213466, "grad_norm": 2.4095612568256866, "learning_rate": 1.2689770617318864e-06, "loss": 0.2926, "step": 11015 }, { "epoch": 1.6856924254016832, "grad_norm": 2.067956237774031, "learning_rate": 1.2677691237695399e-06, "loss": 0.3413, "step": 11016 }, { "epoch": 1.6858454475899005, "grad_norm": 2.2597156371173814, "learning_rate": 1.2665617220876513e-06, "loss": 0.3331, "step": 11017 }, { "epoch": 1.6859984697781178, "grad_norm": 2.1700644455923297, "learning_rate": 1.2653548567603736e-06, "loss": 0.3578, "step": 11018 }, { "epoch": 1.686151491966335, "grad_norm": 2.0662294654838527, "learning_rate": 1.2641485278618237e-06, "loss": 0.2941, "step": 11019 }, { "epoch": 1.6863045141545525, "grad_norm": 2.129039860992622, "learning_rate": 1.2629427354660872e-06, "loss": 0.3242, "step": 11020 }, { "epoch": 1.6864575363427696, "grad_norm": 2.3578673412858, "learning_rate": 1.2617374796472181e-06, "loss": 0.3425, "step": 11021 }, { "epoch": 1.686610558530987, "grad_norm": 2.057803857866164, "learning_rate": 1.260532760479234e-06, "loss": 0.2395, "step": 11022 }, { "epoch": 1.6867635807192043, "grad_norm": 2.0059512760022593, "learning_rate": 1.259328578036122e-06, "loss": 0.3397, "step": 11023 }, { "epoch": 1.6869166029074216, "grad_norm": 2.049651286807147, "learning_rate": 1.2581249323918354e-06, "loss": 0.2789, "step": 11024 }, { "epoch": 1.687069625095639, "grad_norm": 2.499375308441647, "learning_rate": 1.2569218236202952e-06, "loss": 0.3722, "step": 11025 }, { "epoch": 1.687222647283856, "grad_norm": 1.960400132055227, "learning_rate": 1.2557192517953897e-06, "loss": 0.2968, "step": 11026 }, { "epoch": 1.6873756694720734, "grad_norm": 2.1366266710985413, "learning_rate": 1.2545172169909703e-06, "loss": 0.283, "step": 11027 }, { "epoch": 1.6875286916602907, "grad_norm": 2.039190519536008, "learning_rate": 1.2533157192808644e-06, "loss": 0.3212, "step": 11028 }, { "epoch": 1.687681713848508, "grad_norm": 2.0949669870871093, "learning_rate": 1.252114758738856e-06, "loss": 0.2679, "step": 11029 }, { "epoch": 1.6878347360367254, "grad_norm": 2.0416473705727487, "learning_rate": 1.250914335438701e-06, "loss": 0.2916, "step": 11030 }, { "epoch": 1.6879877582249425, "grad_norm": 2.09341563269475, "learning_rate": 1.2497144494541258e-06, "loss": 0.2946, "step": 11031 }, { "epoch": 1.68814078041316, "grad_norm": 2.270626399599111, "learning_rate": 1.248515100858817e-06, "loss": 0.331, "step": 11032 }, { "epoch": 1.6882938026013772, "grad_norm": 2.2390309838409883, "learning_rate": 1.247316289726429e-06, "loss": 0.3092, "step": 11033 }, { "epoch": 1.6884468247895945, "grad_norm": 2.1983290793456254, "learning_rate": 1.2461180161305919e-06, "loss": 0.3022, "step": 11034 }, { "epoch": 1.6885998469778118, "grad_norm": 2.0568056713653875, "learning_rate": 1.24492028014489e-06, "loss": 0.3016, "step": 11035 }, { "epoch": 1.688752869166029, "grad_norm": 2.3206498756421152, "learning_rate": 1.2437230818428846e-06, "loss": 0.3531, "step": 11036 }, { "epoch": 1.6889058913542465, "grad_norm": 2.0813455867828643, "learning_rate": 1.2425264212981024e-06, "loss": 0.2628, "step": 11037 }, { "epoch": 1.6890589135424636, "grad_norm": 2.0413458094502785, "learning_rate": 1.241330298584027e-06, "loss": 0.3088, "step": 11038 }, { "epoch": 1.689211935730681, "grad_norm": 2.1582679198860637, "learning_rate": 1.2401347137741248e-06, "loss": 0.3091, "step": 11039 }, { "epoch": 1.6893649579188983, "grad_norm": 2.069240644552984, "learning_rate": 1.2389396669418185e-06, "loss": 0.2866, "step": 11040 }, { "epoch": 1.6895179801071154, "grad_norm": 1.9584455619267858, "learning_rate": 1.2377451581605015e-06, "loss": 0.3061, "step": 11041 }, { "epoch": 1.689671002295333, "grad_norm": 2.3277588997885412, "learning_rate": 1.2365511875035319e-06, "loss": 0.3369, "step": 11042 }, { "epoch": 1.68982402448355, "grad_norm": 1.9800477950424002, "learning_rate": 1.2353577550442363e-06, "loss": 0.2405, "step": 11043 }, { "epoch": 1.6899770466717674, "grad_norm": 2.160902013523082, "learning_rate": 1.2341648608559088e-06, "loss": 0.2896, "step": 11044 }, { "epoch": 1.6901300688599847, "grad_norm": 2.1642624810194877, "learning_rate": 1.2329725050118091e-06, "loss": 0.2678, "step": 11045 }, { "epoch": 1.6902830910482018, "grad_norm": 2.209833345083377, "learning_rate": 1.2317806875851646e-06, "loss": 0.2937, "step": 11046 }, { "epoch": 1.6904361132364194, "grad_norm": 2.402841329362798, "learning_rate": 1.23058940864917e-06, "loss": 0.286, "step": 11047 }, { "epoch": 1.6905891354246365, "grad_norm": 2.522322338055443, "learning_rate": 1.2293986682769832e-06, "loss": 0.3434, "step": 11048 }, { "epoch": 1.6907421576128538, "grad_norm": 1.9879433395929236, "learning_rate": 1.2282084665417404e-06, "loss": 0.2557, "step": 11049 }, { "epoch": 1.6908951798010712, "grad_norm": 2.0561928258750366, "learning_rate": 1.2270188035165277e-06, "loss": 0.2496, "step": 11050 }, { "epoch": 1.6910482019892883, "grad_norm": 1.9106083750167469, "learning_rate": 1.2258296792744084e-06, "loss": 0.3045, "step": 11051 }, { "epoch": 1.6912012241775058, "grad_norm": 1.8611621144706574, "learning_rate": 1.224641093888418e-06, "loss": 0.2356, "step": 11052 }, { "epoch": 1.691354246365723, "grad_norm": 2.020755692551469, "learning_rate": 1.2234530474315453e-06, "loss": 0.2391, "step": 11053 }, { "epoch": 1.6915072685539403, "grad_norm": 2.3678631524009353, "learning_rate": 1.2222655399767524e-06, "loss": 0.2883, "step": 11054 }, { "epoch": 1.6916602907421576, "grad_norm": 2.246798245562036, "learning_rate": 1.2210785715969765e-06, "loss": 0.3168, "step": 11055 }, { "epoch": 1.691813312930375, "grad_norm": 2.1634726407960088, "learning_rate": 1.2198921423651034e-06, "loss": 0.2602, "step": 11056 }, { "epoch": 1.6919663351185923, "grad_norm": 1.8821433603848743, "learning_rate": 1.218706252354005e-06, "loss": 0.2641, "step": 11057 }, { "epoch": 1.6921193573068094, "grad_norm": 2.1581611539677863, "learning_rate": 1.2175209016365098e-06, "loss": 0.3223, "step": 11058 }, { "epoch": 1.6922723794950267, "grad_norm": 2.07129171472233, "learning_rate": 1.2163360902854094e-06, "loss": 0.3231, "step": 11059 }, { "epoch": 1.692425401683244, "grad_norm": 2.270422976688089, "learning_rate": 1.2151518183734735e-06, "loss": 0.311, "step": 11060 }, { "epoch": 1.6925784238714614, "grad_norm": 1.8203368474711896, "learning_rate": 1.2139680859734326e-06, "loss": 0.2481, "step": 11061 }, { "epoch": 1.6927314460596787, "grad_norm": 2.0614554546840553, "learning_rate": 1.2127848931579788e-06, "loss": 0.2811, "step": 11062 }, { "epoch": 1.6928844682478958, "grad_norm": 2.196661166866587, "learning_rate": 1.2116022399997828e-06, "loss": 0.3167, "step": 11063 }, { "epoch": 1.6930374904361134, "grad_norm": 2.077107983851203, "learning_rate": 1.2104201265714743e-06, "loss": 0.2713, "step": 11064 }, { "epoch": 1.6931905126243305, "grad_norm": 2.0755681276903424, "learning_rate": 1.2092385529456497e-06, "loss": 0.2502, "step": 11065 }, { "epoch": 1.6933435348125478, "grad_norm": 2.22983973981828, "learning_rate": 1.2080575191948763e-06, "loss": 0.2748, "step": 11066 }, { "epoch": 1.6934965570007652, "grad_norm": 2.2026304230846323, "learning_rate": 1.206877025391684e-06, "loss": 0.2916, "step": 11067 }, { "epoch": 1.6936495791889823, "grad_norm": 2.1680872094915915, "learning_rate": 1.2056970716085724e-06, "loss": 0.2673, "step": 11068 }, { "epoch": 1.6938026013771998, "grad_norm": 1.849240136022882, "learning_rate": 1.2045176579180074e-06, "loss": 0.2087, "step": 11069 }, { "epoch": 1.693955623565417, "grad_norm": 2.0659329721045308, "learning_rate": 1.2033387843924215e-06, "loss": 0.2818, "step": 11070 }, { "epoch": 1.6941086457536343, "grad_norm": 2.2135172712744176, "learning_rate": 1.202160451104213e-06, "loss": 0.3076, "step": 11071 }, { "epoch": 1.6942616679418516, "grad_norm": 2.20420719101936, "learning_rate": 1.2009826581257488e-06, "loss": 0.3086, "step": 11072 }, { "epoch": 1.6944146901300687, "grad_norm": 1.9144814530586822, "learning_rate": 1.1998054055293617e-06, "loss": 0.4203, "step": 11073 }, { "epoch": 1.6945677123182863, "grad_norm": 2.5558070625916343, "learning_rate": 1.1986286933873503e-06, "loss": 0.2861, "step": 11074 }, { "epoch": 1.6947207345065034, "grad_norm": 2.2374112894675595, "learning_rate": 1.1974525217719835e-06, "loss": 0.3367, "step": 11075 }, { "epoch": 1.6948737566947207, "grad_norm": 2.1946913555112637, "learning_rate": 1.1962768907554923e-06, "loss": 0.3348, "step": 11076 }, { "epoch": 1.695026778882938, "grad_norm": 2.151829815904901, "learning_rate": 1.1951018004100757e-06, "loss": 0.3246, "step": 11077 }, { "epoch": 1.6951798010711552, "grad_norm": 2.2967753420143513, "learning_rate": 1.1939272508079058e-06, "loss": 0.335, "step": 11078 }, { "epoch": 1.6953328232593727, "grad_norm": 2.2577136557529838, "learning_rate": 1.192753242021112e-06, "loss": 0.2895, "step": 11079 }, { "epoch": 1.6954858454475898, "grad_norm": 2.128267511682266, "learning_rate": 1.1915797741217928e-06, "loss": 0.257, "step": 11080 }, { "epoch": 1.6956388676358072, "grad_norm": 2.4002431250160567, "learning_rate": 1.190406847182023e-06, "loss": 0.3277, "step": 11081 }, { "epoch": 1.6957918898240245, "grad_norm": 2.2038719394242023, "learning_rate": 1.1892344612738305e-06, "loss": 0.263, "step": 11082 }, { "epoch": 1.6959449120122416, "grad_norm": 1.8881112103439488, "learning_rate": 1.1880626164692154e-06, "loss": 0.2243, "step": 11083 }, { "epoch": 1.6960979342004592, "grad_norm": 2.138352754268959, "learning_rate": 1.1868913128401504e-06, "loss": 0.2879, "step": 11084 }, { "epoch": 1.6962509563886763, "grad_norm": 1.828210483145325, "learning_rate": 1.1857205504585645e-06, "loss": 0.2238, "step": 11085 }, { "epoch": 1.6964039785768936, "grad_norm": 2.5405874955917516, "learning_rate": 1.1845503293963623e-06, "loss": 0.3505, "step": 11086 }, { "epoch": 1.696557000765111, "grad_norm": 2.111535006004857, "learning_rate": 1.183380649725413e-06, "loss": 0.2496, "step": 11087 }, { "epoch": 1.696710022953328, "grad_norm": 2.424057326610335, "learning_rate": 1.1822115115175448e-06, "loss": 0.3544, "step": 11088 }, { "epoch": 1.6968630451415456, "grad_norm": 2.4592854858100512, "learning_rate": 1.1810429148445645e-06, "loss": 0.2626, "step": 11089 }, { "epoch": 1.6970160673297627, "grad_norm": 2.042198436206543, "learning_rate": 1.1798748597782417e-06, "loss": 0.2835, "step": 11090 }, { "epoch": 1.69716908951798, "grad_norm": 2.275740572056835, "learning_rate": 1.1787073463903033e-06, "loss": 0.2623, "step": 11091 }, { "epoch": 1.6973221117061974, "grad_norm": 2.158557298967889, "learning_rate": 1.1775403747524582e-06, "loss": 0.2947, "step": 11092 }, { "epoch": 1.6974751338944147, "grad_norm": 1.9757088684466695, "learning_rate": 1.176373944936372e-06, "loss": 0.2579, "step": 11093 }, { "epoch": 1.697628156082632, "grad_norm": 1.7875604110832137, "learning_rate": 1.1752080570136814e-06, "loss": 0.2421, "step": 11094 }, { "epoch": 1.6977811782708492, "grad_norm": 2.2847682190407674, "learning_rate": 1.174042711055986e-06, "loss": 0.2785, "step": 11095 }, { "epoch": 1.6979342004590665, "grad_norm": 2.1586205655942345, "learning_rate": 1.172877907134855e-06, "loss": 0.2986, "step": 11096 }, { "epoch": 1.6980872226472838, "grad_norm": 2.5074890553104403, "learning_rate": 1.1717136453218236e-06, "loss": 0.3686, "step": 11097 }, { "epoch": 1.6982402448355012, "grad_norm": 2.01025453080292, "learning_rate": 1.1705499256883934e-06, "loss": 0.2587, "step": 11098 }, { "epoch": 1.6983932670237185, "grad_norm": 2.3308542569292303, "learning_rate": 1.1693867483060328e-06, "loss": 0.2932, "step": 11099 }, { "epoch": 1.6985462892119356, "grad_norm": 2.234928679181359, "learning_rate": 1.1682241132461791e-06, "loss": 0.2752, "step": 11100 }, { "epoch": 1.6986993114001532, "grad_norm": 2.4713138884866472, "learning_rate": 1.1670620205802319e-06, "loss": 0.3389, "step": 11101 }, { "epoch": 1.6988523335883703, "grad_norm": 2.471201205818322, "learning_rate": 1.1659004703795607e-06, "loss": 0.2308, "step": 11102 }, { "epoch": 1.6990053557765876, "grad_norm": 1.7215349224563574, "learning_rate": 1.1647394627155006e-06, "loss": 0.1932, "step": 11103 }, { "epoch": 1.699158377964805, "grad_norm": 2.0922403083721925, "learning_rate": 1.1635789976593536e-06, "loss": 0.2619, "step": 11104 }, { "epoch": 1.699311400153022, "grad_norm": 2.302314103427617, "learning_rate": 1.1624190752823895e-06, "loss": 0.3499, "step": 11105 }, { "epoch": 1.6994644223412396, "grad_norm": 1.869357598102231, "learning_rate": 1.1612596956558398e-06, "loss": 0.2962, "step": 11106 }, { "epoch": 1.6996174445294567, "grad_norm": 2.0397788159015935, "learning_rate": 1.1601008588509143e-06, "loss": 0.2743, "step": 11107 }, { "epoch": 1.699770466717674, "grad_norm": 2.0520554519115093, "learning_rate": 1.1589425649387743e-06, "loss": 0.2592, "step": 11108 }, { "epoch": 1.6999234889058914, "grad_norm": 1.7852914422848998, "learning_rate": 1.1577848139905568e-06, "loss": 0.2239, "step": 11109 }, { "epoch": 1.7000765110941085, "grad_norm": 2.014202409879043, "learning_rate": 1.1566276060773674e-06, "loss": 0.2447, "step": 11110 }, { "epoch": 1.700229533282326, "grad_norm": 1.989579755348068, "learning_rate": 1.1554709412702714e-06, "loss": 0.2221, "step": 11111 }, { "epoch": 1.7003825554705432, "grad_norm": 2.090257985515126, "learning_rate": 1.154314819640302e-06, "loss": 0.3446, "step": 11112 }, { "epoch": 1.7005355776587605, "grad_norm": 1.9125910003610571, "learning_rate": 1.153159241258469e-06, "loss": 0.2864, "step": 11113 }, { "epoch": 1.7006885998469778, "grad_norm": 2.1613839402350314, "learning_rate": 1.1520042061957315e-06, "loss": 0.2825, "step": 11114 }, { "epoch": 1.700841622035195, "grad_norm": 2.3295656766209616, "learning_rate": 1.1508497145230314e-06, "loss": 0.3438, "step": 11115 }, { "epoch": 1.7009946442234125, "grad_norm": 2.2090564568144075, "learning_rate": 1.1496957663112697e-06, "loss": 0.3128, "step": 11116 }, { "epoch": 1.7011476664116296, "grad_norm": 2.0492676006983297, "learning_rate": 1.1485423616313107e-06, "loss": 0.2841, "step": 11117 }, { "epoch": 1.701300688599847, "grad_norm": 2.1697518217573113, "learning_rate": 1.1473895005539937e-06, "loss": 0.2625, "step": 11118 }, { "epoch": 1.7014537107880643, "grad_norm": 1.9730217228325013, "learning_rate": 1.1462371831501207e-06, "loss": 0.2775, "step": 11119 }, { "epoch": 1.7016067329762814, "grad_norm": 2.088602724372186, "learning_rate": 1.1450854094904551e-06, "loss": 0.3239, "step": 11120 }, { "epoch": 1.701759755164499, "grad_norm": 2.3255899147858443, "learning_rate": 1.143934179645737e-06, "loss": 0.3086, "step": 11121 }, { "epoch": 1.701912777352716, "grad_norm": 1.9272329964493535, "learning_rate": 1.1427834936866655e-06, "loss": 0.2355, "step": 11122 }, { "epoch": 1.7020657995409334, "grad_norm": 1.9946843187147796, "learning_rate": 1.1416333516839107e-06, "loss": 0.2835, "step": 11123 }, { "epoch": 1.7022188217291507, "grad_norm": 2.173321803166349, "learning_rate": 1.1404837537081049e-06, "loss": 0.2785, "step": 11124 }, { "epoch": 1.702371843917368, "grad_norm": 2.0278180668942745, "learning_rate": 1.1393346998298505e-06, "loss": 0.266, "step": 11125 }, { "epoch": 1.7025248661055854, "grad_norm": 2.0094825725857173, "learning_rate": 1.1381861901197144e-06, "loss": 0.2798, "step": 11126 }, { "epoch": 1.7026778882938025, "grad_norm": 2.098979114428069, "learning_rate": 1.1370382246482337e-06, "loss": 0.2667, "step": 11127 }, { "epoch": 1.7028309104820198, "grad_norm": 2.0523444053923305, "learning_rate": 1.1358908034859062e-06, "loss": 0.3097, "step": 11128 }, { "epoch": 1.7029839326702372, "grad_norm": 2.1537059294575456, "learning_rate": 1.1347439267032023e-06, "loss": 0.2674, "step": 11129 }, { "epoch": 1.7031369548584545, "grad_norm": 2.1006487237785727, "learning_rate": 1.1335975943705523e-06, "loss": 0.2938, "step": 11130 }, { "epoch": 1.7032899770466718, "grad_norm": 2.3639342633413114, "learning_rate": 1.1324518065583633e-06, "loss": 0.2944, "step": 11131 }, { "epoch": 1.703442999234889, "grad_norm": 2.082932273742042, "learning_rate": 1.1313065633369968e-06, "loss": 0.2364, "step": 11132 }, { "epoch": 1.7035960214231065, "grad_norm": 2.188856557820004, "learning_rate": 1.1301618647767876e-06, "loss": 0.3076, "step": 11133 }, { "epoch": 1.7037490436113236, "grad_norm": 2.000951821270597, "learning_rate": 1.1290177109480406e-06, "loss": 0.2625, "step": 11134 }, { "epoch": 1.703902065799541, "grad_norm": 2.430335169813233, "learning_rate": 1.127874101921018e-06, "loss": 0.3042, "step": 11135 }, { "epoch": 1.7040550879877583, "grad_norm": 2.281204762892098, "learning_rate": 1.1267310377659524e-06, "loss": 0.3133, "step": 11136 }, { "epoch": 1.7042081101759754, "grad_norm": 1.9930439380863587, "learning_rate": 1.1255885185530502e-06, "loss": 0.2929, "step": 11137 }, { "epoch": 1.704361132364193, "grad_norm": 1.9680645229256954, "learning_rate": 1.12444654435247e-06, "loss": 0.2042, "step": 11138 }, { "epoch": 1.70451415455241, "grad_norm": 2.0093847715801285, "learning_rate": 1.123305115234351e-06, "loss": 0.349, "step": 11139 }, { "epoch": 1.7046671767406274, "grad_norm": 2.1094826918613556, "learning_rate": 1.122164231268793e-06, "loss": 0.2989, "step": 11140 }, { "epoch": 1.7048201989288447, "grad_norm": 2.169821861197907, "learning_rate": 1.1210238925258554e-06, "loss": 0.2632, "step": 11141 }, { "epoch": 1.7049732211170618, "grad_norm": 2.4069773754922466, "learning_rate": 1.1198840990755777e-06, "loss": 0.3368, "step": 11142 }, { "epoch": 1.7051262433052794, "grad_norm": 2.017085258934159, "learning_rate": 1.1187448509879562e-06, "loss": 0.2816, "step": 11143 }, { "epoch": 1.7052792654934965, "grad_norm": 1.9728566967575223, "learning_rate": 1.117606148332957e-06, "loss": 0.284, "step": 11144 }, { "epoch": 1.7054322876817138, "grad_norm": 1.9431671387155085, "learning_rate": 1.116467991180512e-06, "loss": 0.2646, "step": 11145 }, { "epoch": 1.7055853098699312, "grad_norm": 1.9854827370978707, "learning_rate": 1.115330379600521e-06, "loss": 0.2952, "step": 11146 }, { "epoch": 1.7057383320581483, "grad_norm": 2.280487685992356, "learning_rate": 1.114193313662848e-06, "loss": 0.2895, "step": 11147 }, { "epoch": 1.7058913542463658, "grad_norm": 2.287105129323108, "learning_rate": 1.113056793437326e-06, "loss": 0.2921, "step": 11148 }, { "epoch": 1.706044376434583, "grad_norm": 2.1288322442429384, "learning_rate": 1.1119208189937514e-06, "loss": 0.2851, "step": 11149 }, { "epoch": 1.7061973986228003, "grad_norm": 2.0158199871640665, "learning_rate": 1.1107853904018896e-06, "loss": 0.2171, "step": 11150 }, { "epoch": 1.7063504208110176, "grad_norm": 1.9626837031523319, "learning_rate": 1.1096505077314723e-06, "loss": 0.2358, "step": 11151 }, { "epoch": 1.7065034429992347, "grad_norm": 2.305121265245296, "learning_rate": 1.1085161710521962e-06, "loss": 0.2749, "step": 11152 }, { "epoch": 1.7066564651874523, "grad_norm": 2.038485482391815, "learning_rate": 1.1073823804337258e-06, "loss": 0.2631, "step": 11153 }, { "epoch": 1.7068094873756694, "grad_norm": 2.2882425227783862, "learning_rate": 1.1062491359456917e-06, "loss": 0.3128, "step": 11154 }, { "epoch": 1.7069625095638867, "grad_norm": 2.2012392539786134, "learning_rate": 1.1051164376576896e-06, "loss": 0.276, "step": 11155 }, { "epoch": 1.707115531752104, "grad_norm": 2.0394408445761663, "learning_rate": 1.1039842856392856e-06, "loss": 0.307, "step": 11156 }, { "epoch": 1.7072685539403214, "grad_norm": 2.128922991238398, "learning_rate": 1.102852679960007e-06, "loss": 0.2901, "step": 11157 }, { "epoch": 1.7074215761285387, "grad_norm": 1.8881752359487909, "learning_rate": 1.101721620689352e-06, "loss": 0.298, "step": 11158 }, { "epoch": 1.7075745983167558, "grad_norm": 2.0841273220620664, "learning_rate": 1.1005911078967802e-06, "loss": 0.2725, "step": 11159 }, { "epoch": 1.7077276205049732, "grad_norm": 1.994898698810137, "learning_rate": 1.0994611416517264e-06, "loss": 0.2369, "step": 11160 }, { "epoch": 1.7078806426931905, "grad_norm": 2.0006084196277003, "learning_rate": 1.0983317220235823e-06, "loss": 0.2582, "step": 11161 }, { "epoch": 1.7080336648814078, "grad_norm": 2.1415344517825736, "learning_rate": 1.0972028490817077e-06, "loss": 0.2784, "step": 11162 }, { "epoch": 1.7081866870696252, "grad_norm": 1.8469449110302756, "learning_rate": 1.0960745228954384e-06, "loss": 0.2703, "step": 11163 }, { "epoch": 1.7083397092578423, "grad_norm": 2.0745692309473167, "learning_rate": 1.0949467435340623e-06, "loss": 0.2781, "step": 11164 }, { "epoch": 1.7084927314460598, "grad_norm": 2.0757462371321473, "learning_rate": 1.0938195110668425e-06, "loss": 0.3172, "step": 11165 }, { "epoch": 1.708645753634277, "grad_norm": 1.9539949708197906, "learning_rate": 1.092692825563011e-06, "loss": 0.2808, "step": 11166 }, { "epoch": 1.7087987758224943, "grad_norm": 1.8315528166629997, "learning_rate": 1.091566687091755e-06, "loss": 0.2459, "step": 11167 }, { "epoch": 1.7089517980107116, "grad_norm": 1.7884907895676587, "learning_rate": 1.0904410957222411e-06, "loss": 0.234, "step": 11168 }, { "epoch": 1.7091048201989287, "grad_norm": 2.2466456776934662, "learning_rate": 1.0893160515235957e-06, "loss": 0.3361, "step": 11169 }, { "epoch": 1.7092578423871463, "grad_norm": 2.4207583570260023, "learning_rate": 1.0881915545649058e-06, "loss": 0.3154, "step": 11170 }, { "epoch": 1.7094108645753634, "grad_norm": 1.9753841454069814, "learning_rate": 1.0870676049152385e-06, "loss": 0.2679, "step": 11171 }, { "epoch": 1.7095638867635807, "grad_norm": 2.3190131346279514, "learning_rate": 1.0859442026436185e-06, "loss": 0.2965, "step": 11172 }, { "epoch": 1.709716908951798, "grad_norm": 2.160003228975004, "learning_rate": 1.0848213478190328e-06, "loss": 0.3465, "step": 11173 }, { "epoch": 1.7098699311400152, "grad_norm": 2.3473105298032264, "learning_rate": 1.0836990405104475e-06, "loss": 0.3369, "step": 11174 }, { "epoch": 1.7100229533282327, "grad_norm": 2.014596179323918, "learning_rate": 1.082577280786784e-06, "loss": 0.269, "step": 11175 }, { "epoch": 1.7101759755164498, "grad_norm": 2.057023392344061, "learning_rate": 1.081456068716935e-06, "loss": 0.3078, "step": 11176 }, { "epoch": 1.7103289977046672, "grad_norm": 2.444349548800692, "learning_rate": 1.0803354043697588e-06, "loss": 0.3089, "step": 11177 }, { "epoch": 1.7104820198928845, "grad_norm": 1.9832948768618603, "learning_rate": 1.0792152878140794e-06, "loss": 0.3157, "step": 11178 }, { "epoch": 1.7106350420811016, "grad_norm": 2.1437773295090126, "learning_rate": 1.078095719118687e-06, "loss": 0.3203, "step": 11179 }, { "epoch": 1.7107880642693192, "grad_norm": 1.9014695196455216, "learning_rate": 1.0769766983523388e-06, "loss": 0.2788, "step": 11180 }, { "epoch": 1.7109410864575363, "grad_norm": 1.8859102107863197, "learning_rate": 1.0758582255837591e-06, "loss": 0.2665, "step": 11181 }, { "epoch": 1.7110941086457536, "grad_norm": 2.0492514664079473, "learning_rate": 1.0747403008816382e-06, "loss": 0.2679, "step": 11182 }, { "epoch": 1.711247130833971, "grad_norm": 2.0658287803286197, "learning_rate": 1.07362292431463e-06, "loss": 0.2491, "step": 11183 }, { "epoch": 1.711400153022188, "grad_norm": 2.1805943892160475, "learning_rate": 1.0725060959513578e-06, "loss": 0.3342, "step": 11184 }, { "epoch": 1.7115531752104056, "grad_norm": 1.9814368426589373, "learning_rate": 1.0713898158604119e-06, "loss": 0.2748, "step": 11185 }, { "epoch": 1.7117061973986227, "grad_norm": 2.0479335820218343, "learning_rate": 1.0702740841103455e-06, "loss": 0.3067, "step": 11186 }, { "epoch": 1.71185921958684, "grad_norm": 2.2402698668164467, "learning_rate": 1.0691589007696811e-06, "loss": 0.2936, "step": 11187 }, { "epoch": 1.7120122417750574, "grad_norm": 2.175527755245013, "learning_rate": 1.0680442659069046e-06, "loss": 0.3096, "step": 11188 }, { "epoch": 1.7121652639632745, "grad_norm": 2.259874022178763, "learning_rate": 1.0669301795904762e-06, "loss": 0.3045, "step": 11189 }, { "epoch": 1.712318286151492, "grad_norm": 2.206801471311079, "learning_rate": 1.0658166418888094e-06, "loss": 0.2307, "step": 11190 }, { "epoch": 1.7124713083397092, "grad_norm": 2.0808032911944663, "learning_rate": 1.0647036528702915e-06, "loss": 0.3301, "step": 11191 }, { "epoch": 1.7126243305279265, "grad_norm": 2.009233915278547, "learning_rate": 1.06359121260328e-06, "loss": 0.2486, "step": 11192 }, { "epoch": 1.7127773527161438, "grad_norm": 1.9884306466496928, "learning_rate": 1.0624793211560913e-06, "loss": 0.236, "step": 11193 }, { "epoch": 1.7129303749043612, "grad_norm": 1.9343940879298103, "learning_rate": 1.0613679785970087e-06, "loss": 0.2245, "step": 11194 }, { "epoch": 1.7130833970925785, "grad_norm": 2.068900757215732, "learning_rate": 1.06025718499429e-06, "loss": 0.336, "step": 11195 }, { "epoch": 1.7132364192807956, "grad_norm": 2.107983069680888, "learning_rate": 1.059146940416147e-06, "loss": 0.2883, "step": 11196 }, { "epoch": 1.713389441469013, "grad_norm": 2.0972134931039657, "learning_rate": 1.0580372449307686e-06, "loss": 0.2593, "step": 11197 }, { "epoch": 1.7135424636572303, "grad_norm": 2.2987713281363344, "learning_rate": 1.0569280986063058e-06, "loss": 0.3548, "step": 11198 }, { "epoch": 1.7136954858454476, "grad_norm": 2.059416623438375, "learning_rate": 1.0558195015108708e-06, "loss": 0.2374, "step": 11199 }, { "epoch": 1.713848508033665, "grad_norm": 2.161279175677584, "learning_rate": 1.0547114537125514e-06, "loss": 0.2702, "step": 11200 }, { "epoch": 1.714001530221882, "grad_norm": 1.9833873999128637, "learning_rate": 1.0536039552793987e-06, "loss": 0.2869, "step": 11201 }, { "epoch": 1.7141545524100996, "grad_norm": 1.941243048171417, "learning_rate": 1.0524970062794203e-06, "loss": 0.248, "step": 11202 }, { "epoch": 1.7143075745983167, "grad_norm": 2.215030856608608, "learning_rate": 1.0513906067806067e-06, "loss": 0.2854, "step": 11203 }, { "epoch": 1.714460596786534, "grad_norm": 2.09891972731733, "learning_rate": 1.0502847568509023e-06, "loss": 0.2748, "step": 11204 }, { "epoch": 1.7146136189747514, "grad_norm": 1.9231481988683312, "learning_rate": 1.0491794565582225e-06, "loss": 0.227, "step": 11205 }, { "epoch": 1.7147666411629685, "grad_norm": 2.1008690413719227, "learning_rate": 1.0480747059704488e-06, "loss": 0.2827, "step": 11206 }, { "epoch": 1.714919663351186, "grad_norm": 1.9290761821050166, "learning_rate": 1.0469705051554269e-06, "loss": 0.3185, "step": 11207 }, { "epoch": 1.7150726855394032, "grad_norm": 2.343551545185132, "learning_rate": 1.0458668541809714e-06, "loss": 0.3501, "step": 11208 }, { "epoch": 1.7152257077276205, "grad_norm": 1.788433718216971, "learning_rate": 1.0447637531148592e-06, "loss": 0.2199, "step": 11209 }, { "epoch": 1.7153787299158378, "grad_norm": 2.0652724541004863, "learning_rate": 1.0436612020248404e-06, "loss": 0.2759, "step": 11210 }, { "epoch": 1.715531752104055, "grad_norm": 1.9281228885043027, "learning_rate": 1.0425592009786246e-06, "loss": 0.2673, "step": 11211 }, { "epoch": 1.7156847742922725, "grad_norm": 2.106942568657105, "learning_rate": 1.0414577500438873e-06, "loss": 0.2637, "step": 11212 }, { "epoch": 1.7158377964804896, "grad_norm": 2.0071831488097227, "learning_rate": 1.0403568492882786e-06, "loss": 0.2315, "step": 11213 }, { "epoch": 1.715990818668707, "grad_norm": 2.0902800708364024, "learning_rate": 1.039256498779405e-06, "loss": 0.2754, "step": 11214 }, { "epoch": 1.7161438408569243, "grad_norm": 1.998381374019673, "learning_rate": 1.0381566985848423e-06, "loss": 0.2503, "step": 11215 }, { "epoch": 1.7162968630451414, "grad_norm": 2.275807287322682, "learning_rate": 1.0370574487721396e-06, "loss": 0.2725, "step": 11216 }, { "epoch": 1.716449885233359, "grad_norm": 2.4486035736270746, "learning_rate": 1.0359587494087985e-06, "loss": 0.2977, "step": 11217 }, { "epoch": 1.716602907421576, "grad_norm": 1.9189903077463175, "learning_rate": 1.0348606005622997e-06, "loss": 0.2505, "step": 11218 }, { "epoch": 1.7167559296097934, "grad_norm": 2.1347410423324025, "learning_rate": 1.0337630023000856e-06, "loss": 0.287, "step": 11219 }, { "epoch": 1.7169089517980107, "grad_norm": 2.043348875111778, "learning_rate": 1.032665954689558e-06, "loss": 0.2275, "step": 11220 }, { "epoch": 1.7170619739862278, "grad_norm": 1.9169361145288593, "learning_rate": 1.031569457798095e-06, "loss": 0.2564, "step": 11221 }, { "epoch": 1.7172149961744454, "grad_norm": 2.4828468009673257, "learning_rate": 1.0304735116930397e-06, "loss": 0.3424, "step": 11222 }, { "epoch": 1.7173680183626625, "grad_norm": 1.8955725882532888, "learning_rate": 1.0293781164416905e-06, "loss": 0.3574, "step": 11223 }, { "epoch": 1.7175210405508798, "grad_norm": 2.096578082910678, "learning_rate": 1.0282832721113267e-06, "loss": 0.351, "step": 11224 }, { "epoch": 1.7176740627390972, "grad_norm": 1.9892607721302973, "learning_rate": 1.0271889787691846e-06, "loss": 0.2568, "step": 11225 }, { "epoch": 1.7178270849273145, "grad_norm": 2.2149399450508884, "learning_rate": 1.0260952364824694e-06, "loss": 0.2597, "step": 11226 }, { "epoch": 1.7179801071155318, "grad_norm": 2.198504328031844, "learning_rate": 1.0250020453183506e-06, "loss": 0.2871, "step": 11227 }, { "epoch": 1.718133129303749, "grad_norm": 1.8657046369191856, "learning_rate": 1.023909405343968e-06, "loss": 0.2746, "step": 11228 }, { "epoch": 1.7182861514919663, "grad_norm": 2.522595038063424, "learning_rate": 1.0228173166264233e-06, "loss": 0.2711, "step": 11229 }, { "epoch": 1.7184391736801836, "grad_norm": 2.0394131673625937, "learning_rate": 1.0217257792327862e-06, "loss": 0.3378, "step": 11230 }, { "epoch": 1.718592195868401, "grad_norm": 2.3529953949487568, "learning_rate": 1.020634793230092e-06, "loss": 0.3383, "step": 11231 }, { "epoch": 1.7187452180566183, "grad_norm": 2.491294117442281, "learning_rate": 1.0195443586853415e-06, "loss": 0.4019, "step": 11232 }, { "epoch": 1.7188982402448354, "grad_norm": 1.903192324592772, "learning_rate": 1.0184544756655047e-06, "loss": 0.2835, "step": 11233 }, { "epoch": 1.719051262433053, "grad_norm": 2.2662638217854467, "learning_rate": 1.0173651442375154e-06, "loss": 0.3071, "step": 11234 }, { "epoch": 1.71920428462127, "grad_norm": 2.1920337232874973, "learning_rate": 1.0162763644682715e-06, "loss": 0.3151, "step": 11235 }, { "epoch": 1.7193573068094874, "grad_norm": 1.8404140119518602, "learning_rate": 1.0151881364246407e-06, "loss": 0.2563, "step": 11236 }, { "epoch": 1.7195103289977047, "grad_norm": 2.371469181227642, "learning_rate": 1.0141004601734562e-06, "loss": 0.3661, "step": 11237 }, { "epoch": 1.7196633511859218, "grad_norm": 2.676750375304351, "learning_rate": 1.0130133357815142e-06, "loss": 0.3352, "step": 11238 }, { "epoch": 1.7198163733741394, "grad_norm": 2.2395476317147556, "learning_rate": 1.0119267633155816e-06, "loss": 0.316, "step": 11239 }, { "epoch": 1.7199693955623565, "grad_norm": 1.9873318256316577, "learning_rate": 1.0108407428423873e-06, "loss": 0.2805, "step": 11240 }, { "epoch": 1.7201224177505738, "grad_norm": 2.1685902722728874, "learning_rate": 1.0097552744286277e-06, "loss": 0.3208, "step": 11241 }, { "epoch": 1.7202754399387912, "grad_norm": 2.0648830031892103, "learning_rate": 1.0086703581409696e-06, "loss": 0.2799, "step": 11242 }, { "epoch": 1.7204284621270083, "grad_norm": 2.168635886474466, "learning_rate": 1.0075859940460374e-06, "loss": 0.2345, "step": 11243 }, { "epoch": 1.7205814843152258, "grad_norm": 2.061383471866174, "learning_rate": 1.006502182210426e-06, "loss": 0.3173, "step": 11244 }, { "epoch": 1.720734506503443, "grad_norm": 2.2431859409567343, "learning_rate": 1.0054189227007027e-06, "loss": 0.3387, "step": 11245 }, { "epoch": 1.7208875286916603, "grad_norm": 1.9175774670047985, "learning_rate": 1.0043362155833857e-06, "loss": 0.2928, "step": 11246 }, { "epoch": 1.7210405508798776, "grad_norm": 1.7032224043623452, "learning_rate": 1.0032540609249752e-06, "loss": 0.2237, "step": 11247 }, { "epoch": 1.7211935730680947, "grad_norm": 2.3240615291172344, "learning_rate": 1.0021724587919302e-06, "loss": 0.3035, "step": 11248 }, { "epoch": 1.7213465952563123, "grad_norm": 2.1714863936300706, "learning_rate": 1.0010914092506706e-06, "loss": 0.3228, "step": 11249 }, { "epoch": 1.7214996174445294, "grad_norm": 1.8727184181754257, "learning_rate": 1.0000109123675938e-06, "loss": 0.2461, "step": 11250 }, { "epoch": 1.7216526396327467, "grad_norm": 1.8880016801810797, "learning_rate": 9.989309682090564e-07, "loss": 0.2402, "step": 11251 }, { "epoch": 1.721805661820964, "grad_norm": 2.0474369307251807, "learning_rate": 9.978515768413766e-07, "loss": 0.2758, "step": 11252 }, { "epoch": 1.7219586840091812, "grad_norm": 2.183342201937387, "learning_rate": 9.967727383308501e-07, "loss": 0.3161, "step": 11253 }, { "epoch": 1.7221117061973987, "grad_norm": 2.1222715461235153, "learning_rate": 9.956944527437308e-07, "loss": 0.3003, "step": 11254 }, { "epoch": 1.7222647283856158, "grad_norm": 1.9626862552546822, "learning_rate": 9.946167201462399e-07, "loss": 0.2209, "step": 11255 }, { "epoch": 1.7224177505738332, "grad_norm": 2.2158250843339684, "learning_rate": 9.935395406045655e-07, "loss": 0.2603, "step": 11256 }, { "epoch": 1.7225707727620505, "grad_norm": 2.039833535788292, "learning_rate": 9.924629141848607e-07, "loss": 0.2453, "step": 11257 }, { "epoch": 1.7227237949502678, "grad_norm": 2.3722448003118073, "learning_rate": 9.913868409532467e-07, "loss": 0.3115, "step": 11258 }, { "epoch": 1.7228768171384852, "grad_norm": 2.0730045335293257, "learning_rate": 9.903113209758098e-07, "loss": 0.3535, "step": 11259 }, { "epoch": 1.7230298393267023, "grad_norm": 2.475071828831205, "learning_rate": 9.892363543185989e-07, "loss": 0.3188, "step": 11260 }, { "epoch": 1.7231828615149196, "grad_norm": 2.053885122377709, "learning_rate": 9.88161941047635e-07, "loss": 0.2961, "step": 11261 }, { "epoch": 1.723335883703137, "grad_norm": 1.9970096655581508, "learning_rate": 9.870880812289008e-07, "loss": 0.2553, "step": 11262 }, { "epoch": 1.7234889058913543, "grad_norm": 2.091151342045369, "learning_rate": 9.860147749283455e-07, "loss": 0.3145, "step": 11263 }, { "epoch": 1.7236419280795716, "grad_norm": 2.3526633638739427, "learning_rate": 9.849420222118866e-07, "loss": 0.3026, "step": 11264 }, { "epoch": 1.7237949502677887, "grad_norm": 2.2486718187261983, "learning_rate": 9.838698231454048e-07, "loss": 0.3352, "step": 11265 }, { "epoch": 1.7239479724560063, "grad_norm": 2.134344080040552, "learning_rate": 9.827981777947481e-07, "loss": 0.2745, "step": 11266 }, { "epoch": 1.7241009946442234, "grad_norm": 2.13120696519989, "learning_rate": 9.817270862257312e-07, "loss": 0.2227, "step": 11267 }, { "epoch": 1.7242540168324407, "grad_norm": 1.9544916604476297, "learning_rate": 9.80656548504133e-07, "loss": 0.2757, "step": 11268 }, { "epoch": 1.724407039020658, "grad_norm": 2.305241911383372, "learning_rate": 9.795865646957015e-07, "loss": 0.2935, "step": 11269 }, { "epoch": 1.7245600612088752, "grad_norm": 2.217856703258211, "learning_rate": 9.785171348661438e-07, "loss": 0.321, "step": 11270 }, { "epoch": 1.7247130833970927, "grad_norm": 2.2401253166837325, "learning_rate": 9.774482590811451e-07, "loss": 0.2609, "step": 11271 }, { "epoch": 1.7248661055853098, "grad_norm": 2.379006393238006, "learning_rate": 9.763799374063442e-07, "loss": 0.3083, "step": 11272 }, { "epoch": 1.7250191277735272, "grad_norm": 2.0967622566022364, "learning_rate": 9.753121699073487e-07, "loss": 0.2765, "step": 11273 }, { "epoch": 1.7251721499617445, "grad_norm": 2.2026134591533024, "learning_rate": 9.742449566497424e-07, "loss": 0.2821, "step": 11274 }, { "epoch": 1.7253251721499616, "grad_norm": 2.0261515121698928, "learning_rate": 9.7317829769906e-07, "loss": 0.3014, "step": 11275 }, { "epoch": 1.7254781943381792, "grad_norm": 2.019153648269614, "learning_rate": 9.7211219312081e-07, "loss": 0.2442, "step": 11276 }, { "epoch": 1.7256312165263963, "grad_norm": 2.430551797468562, "learning_rate": 9.71046642980471e-07, "loss": 0.3102, "step": 11277 }, { "epoch": 1.7257842387146136, "grad_norm": 1.950502258473303, "learning_rate": 9.699816473434753e-07, "loss": 0.3363, "step": 11278 }, { "epoch": 1.725937260902831, "grad_norm": 2.198363605772209, "learning_rate": 9.68917206275234e-07, "loss": 0.3117, "step": 11279 }, { "epoch": 1.726090283091048, "grad_norm": 1.9226727381010975, "learning_rate": 9.678533198411188e-07, "loss": 0.2341, "step": 11280 }, { "epoch": 1.7262433052792656, "grad_norm": 2.200988192635039, "learning_rate": 9.667899881064624e-07, "loss": 0.3561, "step": 11281 }, { "epoch": 1.7263963274674827, "grad_norm": 2.4511019558072196, "learning_rate": 9.657272111365712e-07, "loss": 0.3339, "step": 11282 }, { "epoch": 1.7265493496557, "grad_norm": 2.649575488293189, "learning_rate": 9.646649889967152e-07, "loss": 0.3311, "step": 11283 }, { "epoch": 1.7267023718439174, "grad_norm": 2.0152891336023866, "learning_rate": 9.63603321752129e-07, "loss": 0.2774, "step": 11284 }, { "epoch": 1.7268553940321345, "grad_norm": 2.0731551227423015, "learning_rate": 9.625422094680126e-07, "loss": 0.2537, "step": 11285 }, { "epoch": 1.727008416220352, "grad_norm": 1.947496310724743, "learning_rate": 9.614816522095339e-07, "loss": 0.2203, "step": 11286 }, { "epoch": 1.7271614384085692, "grad_norm": 2.3574204738706803, "learning_rate": 9.604216500418262e-07, "loss": 0.2976, "step": 11287 }, { "epoch": 1.7273144605967865, "grad_norm": 2.014474596823735, "learning_rate": 9.593622030299876e-07, "loss": 0.339, "step": 11288 }, { "epoch": 1.7274674827850038, "grad_norm": 1.8008681537701139, "learning_rate": 9.583033112390838e-07, "loss": 0.2211, "step": 11289 }, { "epoch": 1.7276205049732212, "grad_norm": 2.0716788361002685, "learning_rate": 9.57244974734145e-07, "loss": 0.2579, "step": 11290 }, { "epoch": 1.7277735271614385, "grad_norm": 2.2269332519553537, "learning_rate": 9.561871935801647e-07, "loss": 0.2713, "step": 11291 }, { "epoch": 1.7279265493496556, "grad_norm": 2.4042579479130257, "learning_rate": 9.551299678421133e-07, "loss": 0.3419, "step": 11292 }, { "epoch": 1.728079571537873, "grad_norm": 2.0798852529452336, "learning_rate": 9.540732975849122e-07, "loss": 0.2732, "step": 11293 }, { "epoch": 1.7282325937260903, "grad_norm": 2.345583325761405, "learning_rate": 9.530171828734558e-07, "loss": 0.2879, "step": 11294 }, { "epoch": 1.7283856159143076, "grad_norm": 2.1856914083684194, "learning_rate": 9.519616237726104e-07, "loss": 0.2799, "step": 11295 }, { "epoch": 1.728538638102525, "grad_norm": 1.9641914474915316, "learning_rate": 9.509066203471962e-07, "loss": 0.2643, "step": 11296 }, { "epoch": 1.728691660290742, "grad_norm": 2.2317078996984523, "learning_rate": 9.498521726620036e-07, "loss": 0.3013, "step": 11297 }, { "epoch": 1.7288446824789594, "grad_norm": 2.0330881449616705, "learning_rate": 9.487982807817975e-07, "loss": 0.2861, "step": 11298 }, { "epoch": 1.7289977046671767, "grad_norm": 2.0998242046821702, "learning_rate": 9.477449447712938e-07, "loss": 0.3496, "step": 11299 }, { "epoch": 1.729150726855394, "grad_norm": 2.57819425032008, "learning_rate": 9.466921646951888e-07, "loss": 0.3567, "step": 11300 }, { "epoch": 1.7293037490436114, "grad_norm": 2.2586641955898257, "learning_rate": 9.456399406181349e-07, "loss": 0.284, "step": 11301 }, { "epoch": 1.7294567712318285, "grad_norm": 2.4828821297651373, "learning_rate": 9.445882726047507e-07, "loss": 0.2696, "step": 11302 }, { "epoch": 1.729609793420046, "grad_norm": 1.850980352969131, "learning_rate": 9.435371607196264e-07, "loss": 0.2184, "step": 11303 }, { "epoch": 1.7297628156082632, "grad_norm": 2.557017556726781, "learning_rate": 9.424866050273163e-07, "loss": 0.2398, "step": 11304 }, { "epoch": 1.7299158377964805, "grad_norm": 1.9141775522394715, "learning_rate": 9.41436605592333e-07, "loss": 0.2731, "step": 11305 }, { "epoch": 1.7300688599846978, "grad_norm": 2.0152008196879665, "learning_rate": 9.403871624791672e-07, "loss": 0.2778, "step": 11306 }, { "epoch": 1.730221882172915, "grad_norm": 2.533871929801867, "learning_rate": 9.393382757522673e-07, "loss": 0.2577, "step": 11307 }, { "epoch": 1.7303749043611325, "grad_norm": 2.151577434545533, "learning_rate": 9.382899454760497e-07, "loss": 0.2912, "step": 11308 }, { "epoch": 1.7305279265493496, "grad_norm": 2.002420979004715, "learning_rate": 9.372421717148961e-07, "loss": 0.2804, "step": 11309 }, { "epoch": 1.730680948737567, "grad_norm": 1.9167255742661933, "learning_rate": 9.361949545331539e-07, "loss": 0.2903, "step": 11310 }, { "epoch": 1.7308339709257843, "grad_norm": 1.7526154281756776, "learning_rate": 9.351482939951373e-07, "loss": 0.2414, "step": 11311 }, { "epoch": 1.7309869931140014, "grad_norm": 2.298302566978914, "learning_rate": 9.341021901651259e-07, "loss": 0.2814, "step": 11312 }, { "epoch": 1.731140015302219, "grad_norm": 2.151736437727825, "learning_rate": 9.330566431073663e-07, "loss": 0.2432, "step": 11313 }, { "epoch": 1.731293037490436, "grad_norm": 2.0864141985693156, "learning_rate": 9.320116528860667e-07, "loss": 0.2428, "step": 11314 }, { "epoch": 1.7314460596786534, "grad_norm": 2.0619824325768095, "learning_rate": 9.309672195654063e-07, "loss": 0.2943, "step": 11315 }, { "epoch": 1.7315990818668707, "grad_norm": 2.142611756371609, "learning_rate": 9.299233432095267e-07, "loss": 0.2184, "step": 11316 }, { "epoch": 1.7317521040550878, "grad_norm": 2.1460629954558548, "learning_rate": 9.28880023882538e-07, "loss": 0.336, "step": 11317 }, { "epoch": 1.7319051262433054, "grad_norm": 2.203744055586528, "learning_rate": 9.27837261648512e-07, "loss": 0.2863, "step": 11318 }, { "epoch": 1.7320581484315225, "grad_norm": 2.2301200311317104, "learning_rate": 9.267950565714911e-07, "loss": 0.3067, "step": 11319 }, { "epoch": 1.7322111706197398, "grad_norm": 1.977711646870203, "learning_rate": 9.257534087154796e-07, "loss": 0.2776, "step": 11320 }, { "epoch": 1.7323641928079572, "grad_norm": 2.1372521289802386, "learning_rate": 9.247123181444517e-07, "loss": 0.2968, "step": 11321 }, { "epoch": 1.7325172149961743, "grad_norm": 2.118149417733798, "learning_rate": 9.23671784922342e-07, "loss": 0.2988, "step": 11322 }, { "epoch": 1.7326702371843918, "grad_norm": 2.1422567198478872, "learning_rate": 9.226318091130537e-07, "loss": 0.2463, "step": 11323 }, { "epoch": 1.732823259372609, "grad_norm": 2.0668194086851748, "learning_rate": 9.215923907804591e-07, "loss": 0.3208, "step": 11324 }, { "epoch": 1.7329762815608263, "grad_norm": 2.5807343354347423, "learning_rate": 9.205535299883894e-07, "loss": 0.2877, "step": 11325 }, { "epoch": 1.7331293037490436, "grad_norm": 2.09122486977293, "learning_rate": 9.195152268006457e-07, "loss": 0.2624, "step": 11326 }, { "epoch": 1.733282325937261, "grad_norm": 2.220035139943399, "learning_rate": 9.184774812809972e-07, "loss": 0.2811, "step": 11327 }, { "epoch": 1.7334353481254783, "grad_norm": 2.311077102527897, "learning_rate": 9.174402934931704e-07, "loss": 0.3409, "step": 11328 }, { "epoch": 1.7335883703136954, "grad_norm": 1.845809541515683, "learning_rate": 9.164036635008689e-07, "loss": 0.259, "step": 11329 }, { "epoch": 1.7337413925019127, "grad_norm": 1.9894773477892849, "learning_rate": 9.15367591367754e-07, "loss": 0.2562, "step": 11330 }, { "epoch": 1.73389441469013, "grad_norm": 2.343597507954915, "learning_rate": 9.143320771574526e-07, "loss": 0.2868, "step": 11331 }, { "epoch": 1.7340474368783474, "grad_norm": 2.3318031296705426, "learning_rate": 9.132971209335628e-07, "loss": 0.3158, "step": 11332 }, { "epoch": 1.7342004590665647, "grad_norm": 2.009158194023136, "learning_rate": 9.122627227596458e-07, "loss": 0.2978, "step": 11333 }, { "epoch": 1.7343534812547818, "grad_norm": 2.0812300521920086, "learning_rate": 9.112288826992233e-07, "loss": 0.3239, "step": 11334 }, { "epoch": 1.7345065034429994, "grad_norm": 1.9694566051586355, "learning_rate": 9.10195600815792e-07, "loss": 0.302, "step": 11335 }, { "epoch": 1.7346595256312165, "grad_norm": 2.1698146071646707, "learning_rate": 9.091628771728078e-07, "loss": 0.3485, "step": 11336 }, { "epoch": 1.7348125478194338, "grad_norm": 2.2078184817883404, "learning_rate": 9.081307118336957e-07, "loss": 0.2642, "step": 11337 }, { "epoch": 1.7349655700076512, "grad_norm": 2.0498493991285347, "learning_rate": 9.070991048618438e-07, "loss": 0.269, "step": 11338 }, { "epoch": 1.7351185921958683, "grad_norm": 1.9622688176545964, "learning_rate": 9.060680563206082e-07, "loss": 0.218, "step": 11339 }, { "epoch": 1.7352716143840858, "grad_norm": 2.0330487439105545, "learning_rate": 9.05037566273308e-07, "loss": 0.2984, "step": 11340 }, { "epoch": 1.735424636572303, "grad_norm": 2.174098256735964, "learning_rate": 9.040076347832305e-07, "loss": 0.2946, "step": 11341 }, { "epoch": 1.7355776587605203, "grad_norm": 1.9391762352970459, "learning_rate": 9.029782619136285e-07, "loss": 0.2331, "step": 11342 }, { "epoch": 1.7357306809487376, "grad_norm": 1.954971032676284, "learning_rate": 9.01949447727719e-07, "loss": 0.2687, "step": 11343 }, { "epoch": 1.7358837031369547, "grad_norm": 2.3946998205627383, "learning_rate": 9.009211922886862e-07, "loss": 0.3037, "step": 11344 }, { "epoch": 1.7360367253251723, "grad_norm": 2.4884493360178195, "learning_rate": 8.998934956596772e-07, "loss": 0.3187, "step": 11345 }, { "epoch": 1.7361897475133894, "grad_norm": 2.0318519644823403, "learning_rate": 8.988663579038104e-07, "loss": 0.2894, "step": 11346 }, { "epoch": 1.7363427697016067, "grad_norm": 2.032057463388814, "learning_rate": 8.978397790841631e-07, "loss": 0.2795, "step": 11347 }, { "epoch": 1.736495791889824, "grad_norm": 1.9829708365779646, "learning_rate": 8.968137592637838e-07, "loss": 0.2309, "step": 11348 }, { "epoch": 1.7366488140780412, "grad_norm": 1.9451190583188658, "learning_rate": 8.95788298505682e-07, "loss": 0.2661, "step": 11349 }, { "epoch": 1.7368018362662587, "grad_norm": 2.2877897156250455, "learning_rate": 8.947633968728376e-07, "loss": 0.2577, "step": 11350 }, { "epoch": 1.7369548584544758, "grad_norm": 2.28996390901099, "learning_rate": 8.937390544281932e-07, "loss": 0.2877, "step": 11351 }, { "epoch": 1.7371078806426932, "grad_norm": 2.1909353599002626, "learning_rate": 8.927152712346542e-07, "loss": 0.2953, "step": 11352 }, { "epoch": 1.7372609028309105, "grad_norm": 2.0225927534092505, "learning_rate": 8.916920473551039e-07, "loss": 0.2364, "step": 11353 }, { "epoch": 1.7374139250191276, "grad_norm": 2.195364625764178, "learning_rate": 8.906693828523738e-07, "loss": 0.2801, "step": 11354 }, { "epoch": 1.7375669472073452, "grad_norm": 2.198796276058231, "learning_rate": 8.896472777892706e-07, "loss": 0.2753, "step": 11355 }, { "epoch": 1.7377199693955623, "grad_norm": 1.9924419768513708, "learning_rate": 8.886257322285729e-07, "loss": 0.2271, "step": 11356 }, { "epoch": 1.7378729915837796, "grad_norm": 1.8575121798898055, "learning_rate": 8.876047462330084e-07, "loss": 0.2319, "step": 11357 }, { "epoch": 1.738026013771997, "grad_norm": 2.0271128466885022, "learning_rate": 8.865843198652858e-07, "loss": 0.279, "step": 11358 }, { "epoch": 1.7381790359602143, "grad_norm": 2.020993282119618, "learning_rate": 8.855644531880747e-07, "loss": 0.2546, "step": 11359 }, { "epoch": 1.7383320581484316, "grad_norm": 2.116368172177348, "learning_rate": 8.84545146264002e-07, "loss": 0.3017, "step": 11360 }, { "epoch": 1.7384850803366487, "grad_norm": 2.1967153978718184, "learning_rate": 8.835263991556742e-07, "loss": 0.2769, "step": 11361 }, { "epoch": 1.738638102524866, "grad_norm": 1.9077855748756016, "learning_rate": 8.825082119256557e-07, "loss": 0.2808, "step": 11362 }, { "epoch": 1.7387911247130834, "grad_norm": 1.8821456287630594, "learning_rate": 8.814905846364719e-07, "loss": 0.2419, "step": 11363 }, { "epoch": 1.7389441469013007, "grad_norm": 2.3135659597892784, "learning_rate": 8.804735173506251e-07, "loss": 0.3392, "step": 11364 }, { "epoch": 1.739097169089518, "grad_norm": 1.8049680049774777, "learning_rate": 8.794570101305744e-07, "loss": 0.2129, "step": 11365 }, { "epoch": 1.7392501912777352, "grad_norm": 2.355594127900009, "learning_rate": 8.784410630387497e-07, "loss": 0.2976, "step": 11366 }, { "epoch": 1.7394032134659527, "grad_norm": 1.8566497792068795, "learning_rate": 8.774256761375432e-07, "loss": 0.2239, "step": 11367 }, { "epoch": 1.7395562356541698, "grad_norm": 1.8551089045311278, "learning_rate": 8.764108494893131e-07, "loss": 0.222, "step": 11368 }, { "epoch": 1.7397092578423872, "grad_norm": 2.11851018960962, "learning_rate": 8.753965831563838e-07, "loss": 0.2562, "step": 11369 }, { "epoch": 1.7398622800306045, "grad_norm": 2.2494424386365672, "learning_rate": 8.743828772010465e-07, "loss": 0.3498, "step": 11370 }, { "epoch": 1.7400153022188216, "grad_norm": 2.0475301438582627, "learning_rate": 8.733697316855572e-07, "loss": 0.314, "step": 11371 }, { "epoch": 1.7401683244070392, "grad_norm": 2.346917348304334, "learning_rate": 8.723571466721348e-07, "loss": 0.2817, "step": 11372 }, { "epoch": 1.7403213465952563, "grad_norm": 1.9556503799353762, "learning_rate": 8.713451222229674e-07, "loss": 0.2514, "step": 11373 }, { "epoch": 1.7404743687834736, "grad_norm": 1.909780103481461, "learning_rate": 8.703336584002098e-07, "loss": 0.2623, "step": 11374 }, { "epoch": 1.740627390971691, "grad_norm": 2.2751671168907084, "learning_rate": 8.693227552659766e-07, "loss": 0.3077, "step": 11375 }, { "epoch": 1.740780413159908, "grad_norm": 2.295512749361854, "learning_rate": 8.683124128823506e-07, "loss": 0.3109, "step": 11376 }, { "epoch": 1.7409334353481256, "grad_norm": 2.070742958849601, "learning_rate": 8.673026313113853e-07, "loss": 0.2412, "step": 11377 }, { "epoch": 1.7410864575363427, "grad_norm": 2.107583228263569, "learning_rate": 8.662934106150922e-07, "loss": 0.2787, "step": 11378 }, { "epoch": 1.74123947972456, "grad_norm": 2.361860363040369, "learning_rate": 8.652847508554497e-07, "loss": 0.3423, "step": 11379 }, { "epoch": 1.7413925019127774, "grad_norm": 2.0767444703186912, "learning_rate": 8.642766520944091e-07, "loss": 0.3141, "step": 11380 }, { "epoch": 1.7415455241009945, "grad_norm": 2.336321547404426, "learning_rate": 8.632691143938754e-07, "loss": 0.3341, "step": 11381 }, { "epoch": 1.741698546289212, "grad_norm": 2.0940993397565943, "learning_rate": 8.622621378157292e-07, "loss": 0.2899, "step": 11382 }, { "epoch": 1.7418515684774292, "grad_norm": 2.3391721829229533, "learning_rate": 8.612557224218154e-07, "loss": 0.3682, "step": 11383 }, { "epoch": 1.7420045906656465, "grad_norm": 2.4228550269184, "learning_rate": 8.602498682739347e-07, "loss": 0.2575, "step": 11384 }, { "epoch": 1.7421576128538638, "grad_norm": 2.0087616832226405, "learning_rate": 8.592445754338652e-07, "loss": 0.2201, "step": 11385 }, { "epoch": 1.742310635042081, "grad_norm": 2.1070525512343026, "learning_rate": 8.582398439633466e-07, "loss": 0.2497, "step": 11386 }, { "epoch": 1.7424636572302985, "grad_norm": 2.08953895418418, "learning_rate": 8.572356739240817e-07, "loss": 0.2649, "step": 11387 }, { "epoch": 1.7426166794185156, "grad_norm": 2.144426001085784, "learning_rate": 8.562320653777401e-07, "loss": 0.2698, "step": 11388 }, { "epoch": 1.742769701606733, "grad_norm": 2.063685815560944, "learning_rate": 8.55229018385958e-07, "loss": 0.2662, "step": 11389 }, { "epoch": 1.7429227237949503, "grad_norm": 2.0046930646619363, "learning_rate": 8.542265330103372e-07, "loss": 0.217, "step": 11390 }, { "epoch": 1.7430757459831676, "grad_norm": 2.0568254195121796, "learning_rate": 8.53224609312443e-07, "loss": 0.2466, "step": 11391 }, { "epoch": 1.743228768171385, "grad_norm": 2.342779252411613, "learning_rate": 8.522232473538073e-07, "loss": 0.3322, "step": 11392 }, { "epoch": 1.743381790359602, "grad_norm": 2.245448570805698, "learning_rate": 8.512224471959296e-07, "loss": 0.2831, "step": 11393 }, { "epoch": 1.7435348125478194, "grad_norm": 2.3014573795661146, "learning_rate": 8.50222208900271e-07, "loss": 0.2902, "step": 11394 }, { "epoch": 1.7436878347360367, "grad_norm": 1.9968036086457233, "learning_rate": 8.492225325282599e-07, "loss": 0.2555, "step": 11395 }, { "epoch": 1.743840856924254, "grad_norm": 1.9143267854598263, "learning_rate": 8.482234181412918e-07, "loss": 0.2421, "step": 11396 }, { "epoch": 1.7439938791124714, "grad_norm": 1.7719945723268122, "learning_rate": 8.472248658007253e-07, "loss": 0.2534, "step": 11397 }, { "epoch": 1.7441469013006885, "grad_norm": 2.533973732151635, "learning_rate": 8.46226875567886e-07, "loss": 0.3271, "step": 11398 }, { "epoch": 1.744299923488906, "grad_norm": 2.2236967026045082, "learning_rate": 8.452294475040645e-07, "loss": 0.3054, "step": 11399 }, { "epoch": 1.7444529456771232, "grad_norm": 2.1544617140659237, "learning_rate": 8.442325816705154e-07, "loss": 0.2555, "step": 11400 }, { "epoch": 1.7446059678653405, "grad_norm": 2.3671783149900247, "learning_rate": 8.43236278128462e-07, "loss": 0.3112, "step": 11401 }, { "epoch": 1.7447589900535578, "grad_norm": 2.125329113033378, "learning_rate": 8.422405369390874e-07, "loss": 0.2517, "step": 11402 }, { "epoch": 1.744912012241775, "grad_norm": 2.374595888527881, "learning_rate": 8.412453581635505e-07, "loss": 0.3258, "step": 11403 }, { "epoch": 1.7450650344299925, "grad_norm": 2.1344730686834503, "learning_rate": 8.402507418629646e-07, "loss": 0.3029, "step": 11404 }, { "epoch": 1.7452180566182096, "grad_norm": 2.2387729700388648, "learning_rate": 8.39256688098411e-07, "loss": 0.2967, "step": 11405 }, { "epoch": 1.745371078806427, "grad_norm": 1.959205870616328, "learning_rate": 8.382631969309451e-07, "loss": 0.2543, "step": 11406 }, { "epoch": 1.7455241009946443, "grad_norm": 2.007488255339045, "learning_rate": 8.37270268421575e-07, "loss": 0.2798, "step": 11407 }, { "epoch": 1.7456771231828614, "grad_norm": 2.152705889958382, "learning_rate": 8.362779026312817e-07, "loss": 0.2928, "step": 11408 }, { "epoch": 1.745830145371079, "grad_norm": 2.005074106876521, "learning_rate": 8.352860996210133e-07, "loss": 0.2823, "step": 11409 }, { "epoch": 1.745983167559296, "grad_norm": 2.692710912856644, "learning_rate": 8.342948594516753e-07, "loss": 0.3304, "step": 11410 }, { "epoch": 1.7461361897475134, "grad_norm": 2.5533912783203205, "learning_rate": 8.333041821841481e-07, "loss": 0.3511, "step": 11411 }, { "epoch": 1.7462892119357307, "grad_norm": 2.031777309784537, "learning_rate": 8.32314067879274e-07, "loss": 0.2852, "step": 11412 }, { "epoch": 1.7464422341239478, "grad_norm": 1.735112109022947, "learning_rate": 8.313245165978522e-07, "loss": 0.2149, "step": 11413 }, { "epoch": 1.7465952563121654, "grad_norm": 1.7149314804299296, "learning_rate": 8.303355284006631e-07, "loss": 0.1944, "step": 11414 }, { "epoch": 1.7467482785003825, "grad_norm": 2.34769130403817, "learning_rate": 8.293471033484412e-07, "loss": 0.2967, "step": 11415 }, { "epoch": 1.7469013006885998, "grad_norm": 2.104585496737917, "learning_rate": 8.283592415018871e-07, "loss": 0.3018, "step": 11416 }, { "epoch": 1.7470543228768172, "grad_norm": 2.1709189972513117, "learning_rate": 8.273719429216732e-07, "loss": 0.3429, "step": 11417 }, { "epoch": 1.7472073450650343, "grad_norm": 2.033005114625582, "learning_rate": 8.26385207668432e-07, "loss": 0.2769, "step": 11418 }, { "epoch": 1.7473603672532518, "grad_norm": 2.200352392592713, "learning_rate": 8.25399035802763e-07, "loss": 0.3022, "step": 11419 }, { "epoch": 1.747513389441469, "grad_norm": 2.016402621506828, "learning_rate": 8.244134273852289e-07, "loss": 0.3039, "step": 11420 }, { "epoch": 1.7476664116296863, "grad_norm": 1.9838466411531992, "learning_rate": 8.234283824763623e-07, "loss": 0.2549, "step": 11421 }, { "epoch": 1.7478194338179036, "grad_norm": 1.9775094872488335, "learning_rate": 8.22443901136658e-07, "loss": 0.2676, "step": 11422 }, { "epoch": 1.7479724560061207, "grad_norm": 1.9390339024939802, "learning_rate": 8.214599834265757e-07, "loss": 0.2548, "step": 11423 }, { "epoch": 1.7481254781943383, "grad_norm": 2.2990351085347966, "learning_rate": 8.204766294065436e-07, "loss": 0.2939, "step": 11424 }, { "epoch": 1.7482785003825554, "grad_norm": 2.1371254621600824, "learning_rate": 8.194938391369511e-07, "loss": 0.2853, "step": 11425 }, { "epoch": 1.7484315225707727, "grad_norm": 1.8783787360270152, "learning_rate": 8.185116126781567e-07, "loss": 0.2473, "step": 11426 }, { "epoch": 1.74858454475899, "grad_norm": 1.946085021023166, "learning_rate": 8.175299500904821e-07, "loss": 0.2571, "step": 11427 }, { "epoch": 1.7487375669472074, "grad_norm": 2.2973323888353607, "learning_rate": 8.165488514342157e-07, "loss": 0.3026, "step": 11428 }, { "epoch": 1.7488905891354247, "grad_norm": 2.2079906556541835, "learning_rate": 8.155683167696105e-07, "loss": 0.3072, "step": 11429 }, { "epoch": 1.7490436113236418, "grad_norm": 2.1061825703626034, "learning_rate": 8.145883461568837e-07, "loss": 0.287, "step": 11430 }, { "epoch": 1.7491966335118592, "grad_norm": 2.4976507044518965, "learning_rate": 8.136089396562186e-07, "loss": 0.3586, "step": 11431 }, { "epoch": 1.7493496557000765, "grad_norm": 2.1563888177726134, "learning_rate": 8.126300973277701e-07, "loss": 0.2401, "step": 11432 }, { "epoch": 1.7495026778882938, "grad_norm": 2.4782391257527436, "learning_rate": 8.116518192316459e-07, "loss": 0.3337, "step": 11433 }, { "epoch": 1.7496557000765112, "grad_norm": 2.0657477292079895, "learning_rate": 8.106741054279277e-07, "loss": 0.2599, "step": 11434 }, { "epoch": 1.7498087222647283, "grad_norm": 2.4612207172641676, "learning_rate": 8.096969559766643e-07, "loss": 0.2549, "step": 11435 }, { "epoch": 1.7499617444529458, "grad_norm": 2.118689141461657, "learning_rate": 8.087203709378622e-07, "loss": 0.2663, "step": 11436 }, { "epoch": 1.750114766641163, "grad_norm": 2.19481740548431, "learning_rate": 8.077443503714965e-07, "loss": 0.3231, "step": 11437 }, { "epoch": 1.7502677888293803, "grad_norm": 2.345923801834366, "learning_rate": 8.06768894337514e-07, "loss": 0.3122, "step": 11438 }, { "epoch": 1.7504208110175976, "grad_norm": 2.1517185673359727, "learning_rate": 8.057940028958145e-07, "loss": 0.271, "step": 11439 }, { "epoch": 1.7505738332058147, "grad_norm": 2.047360134838788, "learning_rate": 8.048196761062743e-07, "loss": 0.2962, "step": 11440 }, { "epoch": 1.7507268553940323, "grad_norm": 2.166641555801906, "learning_rate": 8.038459140287325e-07, "loss": 0.3112, "step": 11441 }, { "epoch": 1.7508798775822494, "grad_norm": 2.050956415007141, "learning_rate": 8.028727167229844e-07, "loss": 0.2088, "step": 11442 }, { "epoch": 1.7510328997704667, "grad_norm": 1.7371176140968658, "learning_rate": 8.019000842488045e-07, "loss": 0.2038, "step": 11443 }, { "epoch": 1.751185921958684, "grad_norm": 2.948667707658099, "learning_rate": 8.00928016665925e-07, "loss": 0.2795, "step": 11444 }, { "epoch": 1.7513389441469012, "grad_norm": 1.9595754810553292, "learning_rate": 7.999565140340393e-07, "loss": 0.3042, "step": 11445 }, { "epoch": 1.7514919663351187, "grad_norm": 1.9510168138436947, "learning_rate": 7.989855764128163e-07, "loss": 0.2728, "step": 11446 }, { "epoch": 1.7516449885233358, "grad_norm": 2.25768939840928, "learning_rate": 7.980152038618838e-07, "loss": 0.2869, "step": 11447 }, { "epoch": 1.7517980107115532, "grad_norm": 2.322938772528777, "learning_rate": 7.970453964408365e-07, "loss": 0.3438, "step": 11448 }, { "epoch": 1.7519510328997705, "grad_norm": 2.1999953646735477, "learning_rate": 7.960761542092332e-07, "loss": 0.3005, "step": 11449 }, { "epoch": 1.7521040550879876, "grad_norm": 1.7289761044406273, "learning_rate": 7.951074772265987e-07, "loss": 0.222, "step": 11450 }, { "epoch": 1.7522570772762052, "grad_norm": 2.190680943604739, "learning_rate": 7.941393655524243e-07, "loss": 0.3138, "step": 11451 }, { "epoch": 1.7524100994644223, "grad_norm": 2.425449360415477, "learning_rate": 7.931718192461657e-07, "loss": 0.3134, "step": 11452 }, { "epoch": 1.7525631216526396, "grad_norm": 1.8979134354611327, "learning_rate": 7.92204838367242e-07, "loss": 0.3067, "step": 11453 }, { "epoch": 1.752716143840857, "grad_norm": 1.9714110258957196, "learning_rate": 7.912384229750402e-07, "loss": 0.2503, "step": 11454 }, { "epoch": 1.752869166029074, "grad_norm": 2.2808248096299493, "learning_rate": 7.902725731289107e-07, "loss": 0.2858, "step": 11455 }, { "epoch": 1.7530221882172916, "grad_norm": 1.7634975671903566, "learning_rate": 7.893072888881748e-07, "loss": 0.2124, "step": 11456 }, { "epoch": 1.7531752104055087, "grad_norm": 1.990459773666613, "learning_rate": 7.883425703121083e-07, "loss": 0.2959, "step": 11457 }, { "epoch": 1.753328232593726, "grad_norm": 1.952214898201334, "learning_rate": 7.873784174599586e-07, "loss": 0.2631, "step": 11458 }, { "epoch": 1.7534812547819434, "grad_norm": 2.8143096353414445, "learning_rate": 7.864148303909447e-07, "loss": 0.3382, "step": 11459 }, { "epoch": 1.7536342769701607, "grad_norm": 2.1359237551802837, "learning_rate": 7.85451809164236e-07, "loss": 0.233, "step": 11460 }, { "epoch": 1.753787299158378, "grad_norm": 2.358983205654095, "learning_rate": 7.844893538389808e-07, "loss": 0.2735, "step": 11461 }, { "epoch": 1.7539403213465952, "grad_norm": 2.186624182602075, "learning_rate": 7.835274644742874e-07, "loss": 0.2891, "step": 11462 }, { "epoch": 1.7540933435348125, "grad_norm": 2.1643668630024058, "learning_rate": 7.82566141129224e-07, "loss": 0.3178, "step": 11463 }, { "epoch": 1.7542463657230298, "grad_norm": 2.391814174037397, "learning_rate": 7.816053838628346e-07, "loss": 0.3334, "step": 11464 }, { "epoch": 1.7543993879112472, "grad_norm": 2.127914629053998, "learning_rate": 7.80645192734123e-07, "loss": 0.3049, "step": 11465 }, { "epoch": 1.7545524100994645, "grad_norm": 2.3015822780937505, "learning_rate": 7.796855678020531e-07, "loss": 0.3718, "step": 11466 }, { "epoch": 1.7547054322876816, "grad_norm": 2.1525976489722987, "learning_rate": 7.787265091255636e-07, "loss": 0.2538, "step": 11467 }, { "epoch": 1.7548584544758992, "grad_norm": 2.2044691852542035, "learning_rate": 7.777680167635537e-07, "loss": 0.2651, "step": 11468 }, { "epoch": 1.7550114766641163, "grad_norm": 2.046414617913969, "learning_rate": 7.768100907748866e-07, "loss": 0.2679, "step": 11469 }, { "epoch": 1.7551644988523336, "grad_norm": 2.2233483966772716, "learning_rate": 7.758527312183939e-07, "loss": 0.3119, "step": 11470 }, { "epoch": 1.755317521040551, "grad_norm": 2.0313291587701934, "learning_rate": 7.748959381528698e-07, "loss": 0.2891, "step": 11471 }, { "epoch": 1.755470543228768, "grad_norm": 2.393618769392913, "learning_rate": 7.73939711637075e-07, "loss": 0.3505, "step": 11472 }, { "epoch": 1.7556235654169856, "grad_norm": 2.128114574717289, "learning_rate": 7.729840517297349e-07, "loss": 0.3469, "step": 11473 }, { "epoch": 1.7557765876052027, "grad_norm": 2.2834270947072324, "learning_rate": 7.720289584895413e-07, "loss": 0.297, "step": 11474 }, { "epoch": 1.75592960979342, "grad_norm": 2.3363821368142124, "learning_rate": 7.710744319751484e-07, "loss": 0.3029, "step": 11475 }, { "epoch": 1.7560826319816374, "grad_norm": 2.12918511038581, "learning_rate": 7.701204722451783e-07, "loss": 0.2405, "step": 11476 }, { "epoch": 1.7562356541698545, "grad_norm": 2.187618497157304, "learning_rate": 7.691670793582185e-07, "loss": 0.2519, "step": 11477 }, { "epoch": 1.756388676358072, "grad_norm": 2.3802505932165023, "learning_rate": 7.682142533728187e-07, "loss": 0.3085, "step": 11478 }, { "epoch": 1.7565416985462892, "grad_norm": 2.090745528324452, "learning_rate": 7.672619943474968e-07, "loss": 0.2837, "step": 11479 }, { "epoch": 1.7566947207345065, "grad_norm": 2.149798905042403, "learning_rate": 7.663103023407337e-07, "loss": 0.269, "step": 11480 }, { "epoch": 1.7568477429227238, "grad_norm": 2.8280663649694042, "learning_rate": 7.653591774109781e-07, "loss": 0.3053, "step": 11481 }, { "epoch": 1.757000765110941, "grad_norm": 2.0802449500599085, "learning_rate": 7.644086196166423e-07, "loss": 0.3047, "step": 11482 }, { "epoch": 1.7571537872991585, "grad_norm": 1.946712250652231, "learning_rate": 7.634586290161017e-07, "loss": 0.2859, "step": 11483 }, { "epoch": 1.7573068094873756, "grad_norm": 2.1479427054523903, "learning_rate": 7.625092056676997e-07, "loss": 0.2553, "step": 11484 }, { "epoch": 1.757459831675593, "grad_norm": 2.0637537859831996, "learning_rate": 7.615603496297475e-07, "loss": 0.2663, "step": 11485 }, { "epoch": 1.7576128538638103, "grad_norm": 2.345797647798903, "learning_rate": 7.606120609605128e-07, "loss": 0.2572, "step": 11486 }, { "epoch": 1.7577658760520274, "grad_norm": 2.0232236730906106, "learning_rate": 7.596643397182357e-07, "loss": 0.2539, "step": 11487 }, { "epoch": 1.757918898240245, "grad_norm": 2.1486066998428193, "learning_rate": 7.587171859611219e-07, "loss": 0.333, "step": 11488 }, { "epoch": 1.758071920428462, "grad_norm": 1.887984788614984, "learning_rate": 7.577705997473372e-07, "loss": 0.2489, "step": 11489 }, { "epoch": 1.7582249426166794, "grad_norm": 1.8155595847795774, "learning_rate": 7.568245811350139e-07, "loss": 0.2399, "step": 11490 }, { "epoch": 1.7583779648048967, "grad_norm": 2.2819860715358384, "learning_rate": 7.558791301822555e-07, "loss": 0.3071, "step": 11491 }, { "epoch": 1.758530986993114, "grad_norm": 2.005584975186744, "learning_rate": 7.549342469471199e-07, "loss": 0.2336, "step": 11492 }, { "epoch": 1.7586840091813314, "grad_norm": 2.289801482583614, "learning_rate": 7.539899314876409e-07, "loss": 0.3048, "step": 11493 }, { "epoch": 1.7588370313695485, "grad_norm": 2.121216076992854, "learning_rate": 7.530461838618119e-07, "loss": 0.3218, "step": 11494 }, { "epoch": 1.7589900535577658, "grad_norm": 2.236459890017379, "learning_rate": 7.521030041275879e-07, "loss": 0.267, "step": 11495 }, { "epoch": 1.7591430757459832, "grad_norm": 2.2002981567336466, "learning_rate": 7.51160392342899e-07, "loss": 0.2827, "step": 11496 }, { "epoch": 1.7592960979342005, "grad_norm": 2.0246238778322327, "learning_rate": 7.502183485656311e-07, "loss": 0.2729, "step": 11497 }, { "epoch": 1.7594491201224178, "grad_norm": 2.2739189494236993, "learning_rate": 7.492768728536404e-07, "loss": 0.2925, "step": 11498 }, { "epoch": 1.759602142310635, "grad_norm": 1.9264721755974386, "learning_rate": 7.483359652647448e-07, "loss": 0.2605, "step": 11499 }, { "epoch": 1.7597551644988525, "grad_norm": 2.0289411572643523, "learning_rate": 7.473956258567316e-07, "loss": 0.2722, "step": 11500 }, { "epoch": 1.7599081866870696, "grad_norm": 1.6723850393620163, "learning_rate": 7.46455854687349e-07, "loss": 0.1741, "step": 11501 }, { "epoch": 1.760061208875287, "grad_norm": 2.3182455245711857, "learning_rate": 7.45516651814312e-07, "loss": 0.2993, "step": 11502 }, { "epoch": 1.7602142310635043, "grad_norm": 2.6195480262424002, "learning_rate": 7.44578017295301e-07, "loss": 0.3212, "step": 11503 }, { "epoch": 1.7603672532517214, "grad_norm": 1.762441987335934, "learning_rate": 7.436399511879622e-07, "loss": 0.2604, "step": 11504 }, { "epoch": 1.760520275439939, "grad_norm": 1.7135814424977367, "learning_rate": 7.42702453549905e-07, "loss": 0.2559, "step": 11505 }, { "epoch": 1.760673297628156, "grad_norm": 2.2057167673996303, "learning_rate": 7.417655244387045e-07, "loss": 0.2687, "step": 11506 }, { "epoch": 1.7608263198163734, "grad_norm": 2.0842795751715477, "learning_rate": 7.408291639119014e-07, "loss": 0.2647, "step": 11507 }, { "epoch": 1.7609793420045907, "grad_norm": 2.034933453675627, "learning_rate": 7.398933720270018e-07, "loss": 0.3, "step": 11508 }, { "epoch": 1.7611323641928078, "grad_norm": 2.130052559162876, "learning_rate": 7.389581488414754e-07, "loss": 0.3058, "step": 11509 }, { "epoch": 1.7612853863810254, "grad_norm": 1.902586949096245, "learning_rate": 7.380234944127595e-07, "loss": 0.2671, "step": 11510 }, { "epoch": 1.7614384085692425, "grad_norm": 2.0243430655818573, "learning_rate": 7.370894087982528e-07, "loss": 0.2402, "step": 11511 }, { "epoch": 1.7615914307574598, "grad_norm": 2.0713333991695277, "learning_rate": 7.361558920553225e-07, "loss": 0.2736, "step": 11512 }, { "epoch": 1.7617444529456772, "grad_norm": 2.6500941540661587, "learning_rate": 7.352229442412984e-07, "loss": 0.3045, "step": 11513 }, { "epoch": 1.7618974751338943, "grad_norm": 2.2426275693857325, "learning_rate": 7.342905654134802e-07, "loss": 0.2958, "step": 11514 }, { "epoch": 1.7620504973221118, "grad_norm": 2.187412096709696, "learning_rate": 7.333587556291233e-07, "loss": 0.2794, "step": 11515 }, { "epoch": 1.762203519510329, "grad_norm": 2.086117649436215, "learning_rate": 7.324275149454563e-07, "loss": 0.2924, "step": 11516 }, { "epoch": 1.7623565416985463, "grad_norm": 2.3882021880357027, "learning_rate": 7.314968434196734e-07, "loss": 0.272, "step": 11517 }, { "epoch": 1.7625095638867636, "grad_norm": 2.189237862691424, "learning_rate": 7.305667411089257e-07, "loss": 0.2529, "step": 11518 }, { "epoch": 1.7626625860749807, "grad_norm": 2.2909008823470076, "learning_rate": 7.296372080703351e-07, "loss": 0.3102, "step": 11519 }, { "epoch": 1.7628156082631983, "grad_norm": 2.479912838161086, "learning_rate": 7.287082443609938e-07, "loss": 0.3642, "step": 11520 }, { "epoch": 1.7629686304514154, "grad_norm": 2.396033551508515, "learning_rate": 7.27779850037944e-07, "loss": 0.3179, "step": 11521 }, { "epoch": 1.7631216526396327, "grad_norm": 2.0599105222120686, "learning_rate": 7.268520251582101e-07, "loss": 0.2572, "step": 11522 }, { "epoch": 1.76327467482785, "grad_norm": 2.1985313014313226, "learning_rate": 7.259247697787697e-07, "loss": 0.2397, "step": 11523 }, { "epoch": 1.7634276970160672, "grad_norm": 2.5257026577043353, "learning_rate": 7.249980839565673e-07, "loss": 0.4124, "step": 11524 }, { "epoch": 1.7635807192042847, "grad_norm": 2.1187741614819218, "learning_rate": 7.240719677485186e-07, "loss": 0.2284, "step": 11525 }, { "epoch": 1.7637337413925018, "grad_norm": 2.27650166500535, "learning_rate": 7.23146421211498e-07, "loss": 0.3091, "step": 11526 }, { "epoch": 1.7638867635807192, "grad_norm": 2.1431620058394687, "learning_rate": 7.222214444023468e-07, "loss": 0.2679, "step": 11527 }, { "epoch": 1.7640397857689365, "grad_norm": 2.2747276067202393, "learning_rate": 7.212970373778705e-07, "loss": 0.2761, "step": 11528 }, { "epoch": 1.7641928079571538, "grad_norm": 2.3816843053242156, "learning_rate": 7.203732001948427e-07, "loss": 0.3181, "step": 11529 }, { "epoch": 1.7643458301453712, "grad_norm": 1.9811359822858945, "learning_rate": 7.194499329099991e-07, "loss": 0.241, "step": 11530 }, { "epoch": 1.7644988523335883, "grad_norm": 2.076332988962881, "learning_rate": 7.185272355800399e-07, "loss": 0.318, "step": 11531 }, { "epoch": 1.7646518745218056, "grad_norm": 2.5309491908046593, "learning_rate": 7.176051082616331e-07, "loss": 0.3363, "step": 11532 }, { "epoch": 1.764804896710023, "grad_norm": 1.9335674180540168, "learning_rate": 7.16683551011409e-07, "loss": 0.2686, "step": 11533 }, { "epoch": 1.7649579188982403, "grad_norm": 1.986235927705645, "learning_rate": 7.157625638859634e-07, "loss": 0.2505, "step": 11534 }, { "epoch": 1.7651109410864576, "grad_norm": 2.013218268869103, "learning_rate": 7.14842146941862e-07, "loss": 0.2755, "step": 11535 }, { "epoch": 1.7652639632746747, "grad_norm": 2.5751645961695533, "learning_rate": 7.139223002356266e-07, "loss": 0.2704, "step": 11536 }, { "epoch": 1.7654169854628923, "grad_norm": 1.9783689343552875, "learning_rate": 7.130030238237484e-07, "loss": 0.2792, "step": 11537 }, { "epoch": 1.7655700076511094, "grad_norm": 1.9929281594053494, "learning_rate": 7.120843177626879e-07, "loss": 0.2913, "step": 11538 }, { "epoch": 1.7657230298393267, "grad_norm": 2.3890388963972895, "learning_rate": 7.111661821088633e-07, "loss": 0.3186, "step": 11539 }, { "epoch": 1.765876052027544, "grad_norm": 2.3135962005718786, "learning_rate": 7.102486169186595e-07, "loss": 0.2763, "step": 11540 }, { "epoch": 1.7660290742157612, "grad_norm": 1.9568397396801063, "learning_rate": 7.093316222484337e-07, "loss": 0.2673, "step": 11541 }, { "epoch": 1.7661820964039787, "grad_norm": 1.940537721405934, "learning_rate": 7.08415198154494e-07, "loss": 0.2478, "step": 11542 }, { "epoch": 1.7663351185921958, "grad_norm": 2.2076985256142763, "learning_rate": 7.074993446931288e-07, "loss": 0.277, "step": 11543 }, { "epoch": 1.7664881407804132, "grad_norm": 1.9708964282837547, "learning_rate": 7.065840619205821e-07, "loss": 0.2539, "step": 11544 }, { "epoch": 1.7666411629686305, "grad_norm": 2.2308721958293627, "learning_rate": 7.056693498930611e-07, "loss": 0.3503, "step": 11545 }, { "epoch": 1.7667941851568476, "grad_norm": 2.3264206303422523, "learning_rate": 7.047552086667464e-07, "loss": 0.3175, "step": 11546 }, { "epoch": 1.7669472073450652, "grad_norm": 2.2786931204458947, "learning_rate": 7.038416382977797e-07, "loss": 0.2961, "step": 11547 }, { "epoch": 1.7671002295332823, "grad_norm": 2.401697495954377, "learning_rate": 7.029286388422608e-07, "loss": 0.3503, "step": 11548 }, { "epoch": 1.7672532517214996, "grad_norm": 2.2087065408054634, "learning_rate": 7.020162103562667e-07, "loss": 0.2838, "step": 11549 }, { "epoch": 1.767406273909717, "grad_norm": 2.0496835744113904, "learning_rate": 7.011043528958306e-07, "loss": 0.2834, "step": 11550 }, { "epoch": 1.767559296097934, "grad_norm": 2.0960520988601146, "learning_rate": 7.001930665169543e-07, "loss": 0.2617, "step": 11551 }, { "epoch": 1.7677123182861516, "grad_norm": 2.1397333109537247, "learning_rate": 6.992823512756019e-07, "loss": 0.3063, "step": 11552 }, { "epoch": 1.7678653404743687, "grad_norm": 2.28631592306058, "learning_rate": 6.983722072277055e-07, "loss": 0.2877, "step": 11553 }, { "epoch": 1.768018362662586, "grad_norm": 1.7910000877177028, "learning_rate": 6.974626344291602e-07, "loss": 0.2054, "step": 11554 }, { "epoch": 1.7681713848508034, "grad_norm": 1.8451767717460463, "learning_rate": 6.965536329358258e-07, "loss": 0.2168, "step": 11555 }, { "epoch": 1.7683244070390205, "grad_norm": 2.277969473191107, "learning_rate": 6.956452028035287e-07, "loss": 0.2725, "step": 11556 }, { "epoch": 1.768477429227238, "grad_norm": 1.8798605342727712, "learning_rate": 6.947373440880578e-07, "loss": 0.2392, "step": 11557 }, { "epoch": 1.7686304514154552, "grad_norm": 2.018158526897199, "learning_rate": 6.938300568451695e-07, "loss": 0.2837, "step": 11558 }, { "epoch": 1.7687834736036725, "grad_norm": 2.082724016431595, "learning_rate": 6.929233411305836e-07, "loss": 0.297, "step": 11559 }, { "epoch": 1.7689364957918898, "grad_norm": 2.4526921631636722, "learning_rate": 6.920171969999845e-07, "loss": 0.2738, "step": 11560 }, { "epoch": 1.7690895179801072, "grad_norm": 2.137846305201505, "learning_rate": 6.911116245090222e-07, "loss": 0.2852, "step": 11561 }, { "epoch": 1.7692425401683245, "grad_norm": 2.1361546200502093, "learning_rate": 6.90206623713312e-07, "loss": 0.3244, "step": 11562 }, { "epoch": 1.7693955623565416, "grad_norm": 2.2487698262458062, "learning_rate": 6.893021946684309e-07, "loss": 0.3367, "step": 11563 }, { "epoch": 1.769548584544759, "grad_norm": 2.0760080354126487, "learning_rate": 6.883983374299296e-07, "loss": 0.3091, "step": 11564 }, { "epoch": 1.7697016067329763, "grad_norm": 2.10544391417939, "learning_rate": 6.874950520533108e-07, "loss": 0.3197, "step": 11565 }, { "epoch": 1.7698546289211936, "grad_norm": 2.161015077218226, "learning_rate": 6.86592338594051e-07, "loss": 0.2401, "step": 11566 }, { "epoch": 1.770007651109411, "grad_norm": 2.291425465652151, "learning_rate": 6.856901971075936e-07, "loss": 0.2838, "step": 11567 }, { "epoch": 1.770160673297628, "grad_norm": 1.9249319567789467, "learning_rate": 6.847886276493365e-07, "loss": 0.2095, "step": 11568 }, { "epoch": 1.7703136954858456, "grad_norm": 2.20987764398357, "learning_rate": 6.8388763027465e-07, "loss": 0.2599, "step": 11569 }, { "epoch": 1.7704667176740627, "grad_norm": 2.3077010605825703, "learning_rate": 6.82987205038872e-07, "loss": 0.3059, "step": 11570 }, { "epoch": 1.77061973986228, "grad_norm": 2.2411642688887174, "learning_rate": 6.820873519972949e-07, "loss": 0.2815, "step": 11571 }, { "epoch": 1.7707727620504974, "grad_norm": 2.2461254208083763, "learning_rate": 6.811880712051866e-07, "loss": 0.3286, "step": 11572 }, { "epoch": 1.7709257842387145, "grad_norm": 2.4101762861822853, "learning_rate": 6.802893627177753e-07, "loss": 0.2896, "step": 11573 }, { "epoch": 1.771078806426932, "grad_norm": 2.2324724632302635, "learning_rate": 6.793912265902514e-07, "loss": 0.2563, "step": 11574 }, { "epoch": 1.7712318286151492, "grad_norm": 2.409887429701208, "learning_rate": 6.78493662877775e-07, "loss": 0.337, "step": 11575 }, { "epoch": 1.7713848508033665, "grad_norm": 2.211937176981524, "learning_rate": 6.77596671635471e-07, "loss": 0.2453, "step": 11576 }, { "epoch": 1.7715378729915838, "grad_norm": 1.940927053286327, "learning_rate": 6.767002529184219e-07, "loss": 0.2219, "step": 11577 }, { "epoch": 1.771690895179801, "grad_norm": 2.122117699775585, "learning_rate": 6.758044067816849e-07, "loss": 0.2919, "step": 11578 }, { "epoch": 1.7718439173680185, "grad_norm": 2.014861880625214, "learning_rate": 6.749091332802748e-07, "loss": 0.2203, "step": 11579 }, { "epoch": 1.7719969395562356, "grad_norm": 2.134298312181937, "learning_rate": 6.740144324691755e-07, "loss": 0.2862, "step": 11580 }, { "epoch": 1.772149961744453, "grad_norm": 2.1255099152844896, "learning_rate": 6.73120304403333e-07, "loss": 0.2669, "step": 11581 }, { "epoch": 1.7723029839326703, "grad_norm": 2.523914791255019, "learning_rate": 6.722267491376599e-07, "loss": 0.3238, "step": 11582 }, { "epoch": 1.7724560061208874, "grad_norm": 1.9316026331139717, "learning_rate": 6.713337667270325e-07, "loss": 0.2682, "step": 11583 }, { "epoch": 1.772609028309105, "grad_norm": 2.3472903220055366, "learning_rate": 6.704413572262924e-07, "loss": 0.316, "step": 11584 }, { "epoch": 1.772762050497322, "grad_norm": 2.054524648699194, "learning_rate": 6.695495206902458e-07, "loss": 0.2599, "step": 11585 }, { "epoch": 1.7729150726855394, "grad_norm": 1.8121343266405059, "learning_rate": 6.686582571736643e-07, "loss": 0.2625, "step": 11586 }, { "epoch": 1.7730680948737567, "grad_norm": 2.1207606831716674, "learning_rate": 6.677675667312844e-07, "loss": 0.2734, "step": 11587 }, { "epoch": 1.7732211170619738, "grad_norm": 2.4304987605550656, "learning_rate": 6.668774494178054e-07, "loss": 0.3168, "step": 11588 }, { "epoch": 1.7733741392501914, "grad_norm": 1.9828230674015517, "learning_rate": 6.659879052878925e-07, "loss": 0.2348, "step": 11589 }, { "epoch": 1.7735271614384085, "grad_norm": 2.460569333288936, "learning_rate": 6.650989343961788e-07, "loss": 0.3821, "step": 11590 }, { "epoch": 1.7736801836266258, "grad_norm": 2.016231564778463, "learning_rate": 6.642105367972573e-07, "loss": 0.2461, "step": 11591 }, { "epoch": 1.7738332058148432, "grad_norm": 1.9688510542138224, "learning_rate": 6.633227125456887e-07, "loss": 0.242, "step": 11592 }, { "epoch": 1.7739862280030605, "grad_norm": 1.953541653477738, "learning_rate": 6.624354616959971e-07, "loss": 0.2461, "step": 11593 }, { "epoch": 1.7741392501912778, "grad_norm": 2.1083010117470486, "learning_rate": 6.615487843026735e-07, "loss": 0.2774, "step": 11594 }, { "epoch": 1.774292272379495, "grad_norm": 2.1541068089107656, "learning_rate": 6.606626804201688e-07, "loss": 0.2902, "step": 11595 }, { "epoch": 1.7744452945677123, "grad_norm": 2.285536978638378, "learning_rate": 6.597771501029082e-07, "loss": 0.2622, "step": 11596 }, { "epoch": 1.7745983167559296, "grad_norm": 2.319317293092176, "learning_rate": 6.588921934052705e-07, "loss": 0.2898, "step": 11597 }, { "epoch": 1.774751338944147, "grad_norm": 1.993917651888502, "learning_rate": 6.580078103816034e-07, "loss": 0.2264, "step": 11598 }, { "epoch": 1.7749043611323643, "grad_norm": 1.9890587475176882, "learning_rate": 6.571240010862267e-07, "loss": 0.2461, "step": 11599 }, { "epoch": 1.7750573833205814, "grad_norm": 1.7738468768909665, "learning_rate": 6.562407655734116e-07, "loss": 0.2365, "step": 11600 }, { "epoch": 1.775210405508799, "grad_norm": 2.229496564560051, "learning_rate": 6.553581038974055e-07, "loss": 0.3035, "step": 11601 }, { "epoch": 1.775363427697016, "grad_norm": 2.0336619601974375, "learning_rate": 6.544760161124153e-07, "loss": 0.2633, "step": 11602 }, { "epoch": 1.7755164498852334, "grad_norm": 2.002039939027311, "learning_rate": 6.535945022726109e-07, "loss": 0.2538, "step": 11603 }, { "epoch": 1.7756694720734507, "grad_norm": 2.0847731213098033, "learning_rate": 6.527135624321324e-07, "loss": 0.3249, "step": 11604 }, { "epoch": 1.7758224942616678, "grad_norm": 2.2660603591564517, "learning_rate": 6.51833196645083e-07, "loss": 0.3123, "step": 11605 }, { "epoch": 1.7759755164498854, "grad_norm": 2.432517369709772, "learning_rate": 6.509534049655231e-07, "loss": 0.2786, "step": 11606 }, { "epoch": 1.7761285386381025, "grad_norm": 1.8314023909408828, "learning_rate": 6.500741874474903e-07, "loss": 0.2399, "step": 11607 }, { "epoch": 1.7762815608263198, "grad_norm": 2.32761232065426, "learning_rate": 6.491955441449793e-07, "loss": 0.3315, "step": 11608 }, { "epoch": 1.7764345830145372, "grad_norm": 2.27982039867596, "learning_rate": 6.483174751119503e-07, "loss": 0.289, "step": 11609 }, { "epoch": 1.7765876052027543, "grad_norm": 2.112027368923221, "learning_rate": 6.47439980402329e-07, "loss": 0.2515, "step": 11610 }, { "epoch": 1.7767406273909718, "grad_norm": 2.2189829473835383, "learning_rate": 6.465630600700057e-07, "loss": 0.2844, "step": 11611 }, { "epoch": 1.776893649579189, "grad_norm": 1.9160559036153286, "learning_rate": 6.456867141688361e-07, "loss": 0.2474, "step": 11612 }, { "epoch": 1.7770466717674063, "grad_norm": 1.8834851653508435, "learning_rate": 6.448109427526394e-07, "loss": 0.3078, "step": 11613 }, { "epoch": 1.7771996939556236, "grad_norm": 2.0496891897542695, "learning_rate": 6.439357458752005e-07, "loss": 0.2768, "step": 11614 }, { "epoch": 1.7773527161438407, "grad_norm": 2.071621909374972, "learning_rate": 6.430611235902684e-07, "loss": 0.2088, "step": 11615 }, { "epoch": 1.7775057383320583, "grad_norm": 2.405026711662064, "learning_rate": 6.421870759515558e-07, "loss": 0.3089, "step": 11616 }, { "epoch": 1.7776587605202754, "grad_norm": 2.082176155954287, "learning_rate": 6.413136030127465e-07, "loss": 0.2935, "step": 11617 }, { "epoch": 1.7778117827084927, "grad_norm": 2.0675961828515295, "learning_rate": 6.404407048274775e-07, "loss": 0.3031, "step": 11618 }, { "epoch": 1.77796480489671, "grad_norm": 2.359503068353368, "learning_rate": 6.395683814493592e-07, "loss": 0.3404, "step": 11619 }, { "epoch": 1.7781178270849272, "grad_norm": 2.4968081203038293, "learning_rate": 6.386966329319666e-07, "loss": 0.3414, "step": 11620 }, { "epoch": 1.7782708492731447, "grad_norm": 2.034343682425657, "learning_rate": 6.378254593288347e-07, "loss": 0.2631, "step": 11621 }, { "epoch": 1.7784238714613618, "grad_norm": 2.01888433987026, "learning_rate": 6.36954860693465e-07, "loss": 0.2474, "step": 11622 }, { "epoch": 1.7785768936495792, "grad_norm": 2.239054272129354, "learning_rate": 6.360848370793293e-07, "loss": 0.3453, "step": 11623 }, { "epoch": 1.7787299158377965, "grad_norm": 1.8388079956214614, "learning_rate": 6.352153885398516e-07, "loss": 0.2245, "step": 11624 }, { "epoch": 1.7788829380260138, "grad_norm": 2.3830486932199335, "learning_rate": 6.343465151284334e-07, "loss": 0.3301, "step": 11625 }, { "epoch": 1.7790359602142312, "grad_norm": 2.0508156507592705, "learning_rate": 6.334782168984365e-07, "loss": 0.2308, "step": 11626 }, { "epoch": 1.7791889824024483, "grad_norm": 2.086349526363687, "learning_rate": 6.326104939031818e-07, "loss": 0.2734, "step": 11627 }, { "epoch": 1.7793420045906656, "grad_norm": 2.1454319811649882, "learning_rate": 6.317433461959622e-07, "loss": 0.2935, "step": 11628 }, { "epoch": 1.779495026778883, "grad_norm": 2.3671829733376453, "learning_rate": 6.308767738300358e-07, "loss": 0.3281, "step": 11629 }, { "epoch": 1.7796480489671003, "grad_norm": 2.192383371880891, "learning_rate": 6.30010776858615e-07, "loss": 0.3156, "step": 11630 }, { "epoch": 1.7798010711553176, "grad_norm": 2.131210440220201, "learning_rate": 6.291453553348892e-07, "loss": 0.3044, "step": 11631 }, { "epoch": 1.7799540933435347, "grad_norm": 2.5597526386839844, "learning_rate": 6.282805093120059e-07, "loss": 0.3019, "step": 11632 }, { "epoch": 1.780107115531752, "grad_norm": 2.3159736097903587, "learning_rate": 6.274162388430794e-07, "loss": 0.2879, "step": 11633 }, { "epoch": 1.7802601377199694, "grad_norm": 2.4289419941363324, "learning_rate": 6.265525439811881e-07, "loss": 0.354, "step": 11634 }, { "epoch": 1.7804131599081867, "grad_norm": 2.023080467977148, "learning_rate": 6.256894247793732e-07, "loss": 0.2547, "step": 11635 }, { "epoch": 1.780566182096404, "grad_norm": 1.9144497659810062, "learning_rate": 6.248268812906433e-07, "loss": 0.2787, "step": 11636 }, { "epoch": 1.7807192042846212, "grad_norm": 2.104341239947922, "learning_rate": 6.239649135679704e-07, "loss": 0.2601, "step": 11637 }, { "epoch": 1.7808722264728387, "grad_norm": 1.8563273314684772, "learning_rate": 6.231035216642922e-07, "loss": 0.2237, "step": 11638 }, { "epoch": 1.7810252486610558, "grad_norm": 1.9084349351630756, "learning_rate": 6.222427056325087e-07, "loss": 0.2434, "step": 11639 }, { "epoch": 1.7811782708492732, "grad_norm": 2.20964827735286, "learning_rate": 6.213824655254874e-07, "loss": 0.3002, "step": 11640 }, { "epoch": 1.7813312930374905, "grad_norm": 2.0264820560965933, "learning_rate": 6.205228013960574e-07, "loss": 0.2534, "step": 11641 }, { "epoch": 1.7814843152257076, "grad_norm": 2.2198244041607924, "learning_rate": 6.196637132970151e-07, "loss": 0.2982, "step": 11642 }, { "epoch": 1.7816373374139252, "grad_norm": 2.051614021142709, "learning_rate": 6.188052012811207e-07, "loss": 0.2739, "step": 11643 }, { "epoch": 1.7817903596021423, "grad_norm": 2.148961658639848, "learning_rate": 6.179472654010976e-07, "loss": 0.2943, "step": 11644 }, { "epoch": 1.7819433817903596, "grad_norm": 1.96673620662787, "learning_rate": 6.170899057096347e-07, "loss": 0.3192, "step": 11645 }, { "epoch": 1.782096403978577, "grad_norm": 2.0614212600984705, "learning_rate": 6.162331222593898e-07, "loss": 0.335, "step": 11646 }, { "epoch": 1.782249426166794, "grad_norm": 2.0945513989942457, "learning_rate": 6.153769151029765e-07, "loss": 0.2588, "step": 11647 }, { "epoch": 1.7824024483550116, "grad_norm": 2.131680603392567, "learning_rate": 6.145212842929793e-07, "loss": 0.3115, "step": 11648 }, { "epoch": 1.7825554705432287, "grad_norm": 2.169429836682061, "learning_rate": 6.136662298819484e-07, "loss": 0.3033, "step": 11649 }, { "epoch": 1.782708492731446, "grad_norm": 1.8813273070863772, "learning_rate": 6.128117519223919e-07, "loss": 0.2579, "step": 11650 }, { "epoch": 1.7828615149196634, "grad_norm": 1.9967126429259028, "learning_rate": 6.119578504667878e-07, "loss": 0.2296, "step": 11651 }, { "epoch": 1.7830145371078805, "grad_norm": 2.325664541784967, "learning_rate": 6.111045255675808e-07, "loss": 0.262, "step": 11652 }, { "epoch": 1.783167559296098, "grad_norm": 2.1955597124128086, "learning_rate": 6.102517772771721e-07, "loss": 0.3015, "step": 11653 }, { "epoch": 1.7833205814843152, "grad_norm": 2.2385583608451967, "learning_rate": 6.093996056479356e-07, "loss": 0.3167, "step": 11654 }, { "epoch": 1.7834736036725325, "grad_norm": 2.0869614373628185, "learning_rate": 6.085480107322072e-07, "loss": 0.3075, "step": 11655 }, { "epoch": 1.7836266258607498, "grad_norm": 1.8924749927757958, "learning_rate": 6.076969925822829e-07, "loss": 0.2345, "step": 11656 }, { "epoch": 1.783779648048967, "grad_norm": 2.347694523952781, "learning_rate": 6.068465512504296e-07, "loss": 0.3419, "step": 11657 }, { "epoch": 1.7839326702371845, "grad_norm": 2.1575066247384034, "learning_rate": 6.059966867888778e-07, "loss": 0.2692, "step": 11658 }, { "epoch": 1.7840856924254016, "grad_norm": 2.4130816506654935, "learning_rate": 6.051473992498158e-07, "loss": 0.3215, "step": 11659 }, { "epoch": 1.784238714613619, "grad_norm": 1.8151835358364201, "learning_rate": 6.042986886854063e-07, "loss": 0.252, "step": 11660 }, { "epoch": 1.7843917368018363, "grad_norm": 1.9735591729644815, "learning_rate": 6.03450555147771e-07, "loss": 0.2327, "step": 11661 }, { "epoch": 1.7845447589900536, "grad_norm": 2.292921280603086, "learning_rate": 6.026029986889959e-07, "loss": 0.3124, "step": 11662 }, { "epoch": 1.784697781178271, "grad_norm": 2.143060264993622, "learning_rate": 6.017560193611338e-07, "loss": 0.2716, "step": 11663 }, { "epoch": 1.784850803366488, "grad_norm": 2.1064055632602052, "learning_rate": 6.00909617216201e-07, "loss": 0.324, "step": 11664 }, { "epoch": 1.7850038255547054, "grad_norm": 2.491721456754776, "learning_rate": 6.000637923061769e-07, "loss": 0.2778, "step": 11665 }, { "epoch": 1.7851568477429227, "grad_norm": 1.9397667295022485, "learning_rate": 5.99218544683009e-07, "loss": 0.2748, "step": 11666 }, { "epoch": 1.78530986993114, "grad_norm": 2.0315429084869323, "learning_rate": 5.983738743986068e-07, "loss": 0.2424, "step": 11667 }, { "epoch": 1.7854628921193574, "grad_norm": 2.3518604760030946, "learning_rate": 5.975297815048442e-07, "loss": 0.3663, "step": 11668 }, { "epoch": 1.7856159143075745, "grad_norm": 2.260235840291961, "learning_rate": 5.9668626605356e-07, "loss": 0.3246, "step": 11669 }, { "epoch": 1.785768936495792, "grad_norm": 2.2893769844413283, "learning_rate": 5.958433280965581e-07, "loss": 0.2896, "step": 11670 }, { "epoch": 1.7859219586840092, "grad_norm": 2.2230273719541893, "learning_rate": 5.950009676856061e-07, "loss": 0.36, "step": 11671 }, { "epoch": 1.7860749808722265, "grad_norm": 2.3200719885128858, "learning_rate": 5.941591848724381e-07, "loss": 0.3401, "step": 11672 }, { "epoch": 1.7862280030604438, "grad_norm": 2.1974597281976593, "learning_rate": 5.933179797087507e-07, "loss": 0.3361, "step": 11673 }, { "epoch": 1.786381025248661, "grad_norm": 2.5283285291331223, "learning_rate": 5.924773522462024e-07, "loss": 0.2941, "step": 11674 }, { "epoch": 1.7865340474368785, "grad_norm": 2.112766463182009, "learning_rate": 5.916373025364264e-07, "loss": 0.2504, "step": 11675 }, { "epoch": 1.7866870696250956, "grad_norm": 2.1968271904468812, "learning_rate": 5.907978306310081e-07, "loss": 0.311, "step": 11676 }, { "epoch": 1.786840091813313, "grad_norm": 2.21326844605421, "learning_rate": 5.899589365815018e-07, "loss": 0.346, "step": 11677 }, { "epoch": 1.7869931140015303, "grad_norm": 2.249810646236595, "learning_rate": 5.891206204394329e-07, "loss": 0.2816, "step": 11678 }, { "epoch": 1.7871461361897474, "grad_norm": 1.8485888703819569, "learning_rate": 5.882828822562814e-07, "loss": 0.2307, "step": 11679 }, { "epoch": 1.787299158377965, "grad_norm": 2.0267399285945404, "learning_rate": 5.87445722083494e-07, "loss": 0.2479, "step": 11680 }, { "epoch": 1.787452180566182, "grad_norm": 2.0911961020778653, "learning_rate": 5.866091399724916e-07, "loss": 0.3178, "step": 11681 }, { "epoch": 1.7876052027543994, "grad_norm": 1.9366386472362302, "learning_rate": 5.857731359746433e-07, "loss": 0.2728, "step": 11682 }, { "epoch": 1.7877582249426167, "grad_norm": 2.0375420240316817, "learning_rate": 5.849377101412967e-07, "loss": 0.3075, "step": 11683 }, { "epoch": 1.7879112471308338, "grad_norm": 1.7988385396357176, "learning_rate": 5.841028625237589e-07, "loss": 0.2719, "step": 11684 }, { "epoch": 1.7880642693190514, "grad_norm": 2.5279457807415464, "learning_rate": 5.832685931732963e-07, "loss": 0.3454, "step": 11685 }, { "epoch": 1.7882172915072685, "grad_norm": 2.313295462058856, "learning_rate": 5.824349021411502e-07, "loss": 0.256, "step": 11686 }, { "epoch": 1.7883703136954858, "grad_norm": 2.1886780113954254, "learning_rate": 5.816017894785197e-07, "loss": 0.2694, "step": 11687 }, { "epoch": 1.7885233358837032, "grad_norm": 1.8681046262205732, "learning_rate": 5.807692552365652e-07, "loss": 0.2614, "step": 11688 }, { "epoch": 1.7886763580719203, "grad_norm": 2.0027057223970317, "learning_rate": 5.799372994664199e-07, "loss": 0.259, "step": 11689 }, { "epoch": 1.7888293802601378, "grad_norm": 2.337302769224805, "learning_rate": 5.791059222191764e-07, "loss": 0.2759, "step": 11690 }, { "epoch": 1.788982402448355, "grad_norm": 2.121766483543593, "learning_rate": 5.782751235458939e-07, "loss": 0.323, "step": 11691 }, { "epoch": 1.7891354246365723, "grad_norm": 1.9704435983259665, "learning_rate": 5.774449034975938e-07, "loss": 0.3234, "step": 11692 }, { "epoch": 1.7892884468247896, "grad_norm": 2.0983690476734305, "learning_rate": 5.766152621252629e-07, "loss": 0.2725, "step": 11693 }, { "epoch": 1.789441469013007, "grad_norm": 2.1429735780924744, "learning_rate": 5.757861994798542e-07, "loss": 0.2911, "step": 11694 }, { "epoch": 1.7895944912012243, "grad_norm": 1.9899945550379567, "learning_rate": 5.749577156122821e-07, "loss": 0.2584, "step": 11695 }, { "epoch": 1.7897475133894414, "grad_norm": 2.1339230085642242, "learning_rate": 5.741298105734272e-07, "loss": 0.2873, "step": 11696 }, { "epoch": 1.7899005355776587, "grad_norm": 1.9769567723769148, "learning_rate": 5.733024844141355e-07, "loss": 0.2111, "step": 11697 }, { "epoch": 1.790053557765876, "grad_norm": 1.8199371530775728, "learning_rate": 5.724757371852141e-07, "loss": 0.2833, "step": 11698 }, { "epoch": 1.7902065799540934, "grad_norm": 2.204375861271401, "learning_rate": 5.716495689374413e-07, "loss": 0.3179, "step": 11699 }, { "epoch": 1.7903596021423107, "grad_norm": 2.392939646494985, "learning_rate": 5.708239797215509e-07, "loss": 0.2879, "step": 11700 }, { "epoch": 1.7905126243305278, "grad_norm": 2.262298036101752, "learning_rate": 5.699989695882458e-07, "loss": 0.3352, "step": 11701 }, { "epoch": 1.7906656465187454, "grad_norm": 1.9027329378061686, "learning_rate": 5.691745385881964e-07, "loss": 0.1847, "step": 11702 }, { "epoch": 1.7908186687069625, "grad_norm": 2.150646438874415, "learning_rate": 5.683506867720301e-07, "loss": 0.2933, "step": 11703 }, { "epoch": 1.7909716908951798, "grad_norm": 2.2417842603845526, "learning_rate": 5.675274141903464e-07, "loss": 0.2996, "step": 11704 }, { "epoch": 1.7911247130833972, "grad_norm": 2.346125193672816, "learning_rate": 5.66704720893706e-07, "loss": 0.2515, "step": 11705 }, { "epoch": 1.7912777352716143, "grad_norm": 1.9921160410033858, "learning_rate": 5.658826069326284e-07, "loss": 0.2964, "step": 11706 }, { "epoch": 1.7914307574598318, "grad_norm": 2.1445987832520053, "learning_rate": 5.650610723576078e-07, "loss": 0.2592, "step": 11707 }, { "epoch": 1.791583779648049, "grad_norm": 2.0694519109933727, "learning_rate": 5.642401172190981e-07, "loss": 0.2388, "step": 11708 }, { "epoch": 1.7917368018362663, "grad_norm": 2.238611258670583, "learning_rate": 5.634197415675124e-07, "loss": 0.2617, "step": 11709 }, { "epoch": 1.7918898240244836, "grad_norm": 2.151535537763062, "learning_rate": 5.625999454532383e-07, "loss": 0.2704, "step": 11710 }, { "epoch": 1.7920428462127007, "grad_norm": 2.059430799019778, "learning_rate": 5.617807289266208e-07, "loss": 0.3045, "step": 11711 }, { "epoch": 1.7921958684009183, "grad_norm": 2.119929153330778, "learning_rate": 5.609620920379721e-07, "loss": 0.3171, "step": 11712 }, { "epoch": 1.7923488905891354, "grad_norm": 1.9604155333595272, "learning_rate": 5.601440348375653e-07, "loss": 0.2426, "step": 11713 }, { "epoch": 1.7925019127773527, "grad_norm": 1.9606199783978195, "learning_rate": 5.593265573756434e-07, "loss": 0.2463, "step": 11714 }, { "epoch": 1.79265493496557, "grad_norm": 2.0879423430699737, "learning_rate": 5.585096597024098e-07, "loss": 0.2984, "step": 11715 }, { "epoch": 1.7928079571537872, "grad_norm": 2.1097963203212102, "learning_rate": 5.576933418680331e-07, "loss": 0.3156, "step": 11716 }, { "epoch": 1.7929609793420047, "grad_norm": 1.9424617217380995, "learning_rate": 5.568776039226465e-07, "loss": 0.2702, "step": 11717 }, { "epoch": 1.7931140015302218, "grad_norm": 2.0457163778091196, "learning_rate": 5.560624459163488e-07, "loss": 0.2134, "step": 11718 }, { "epoch": 1.7932670237184392, "grad_norm": 2.2486400242745916, "learning_rate": 5.552478678992001e-07, "loss": 0.244, "step": 11719 }, { "epoch": 1.7934200459066565, "grad_norm": 2.1376451976669166, "learning_rate": 5.544338699212281e-07, "loss": 0.216, "step": 11720 }, { "epoch": 1.7935730680948736, "grad_norm": 2.388209980608498, "learning_rate": 5.536204520324239e-07, "loss": 0.2641, "step": 11721 }, { "epoch": 1.7937260902830912, "grad_norm": 2.1460112607255053, "learning_rate": 5.528076142827422e-07, "loss": 0.2468, "step": 11722 }, { "epoch": 1.7938791124713083, "grad_norm": 2.0739753388599462, "learning_rate": 5.519953567221025e-07, "loss": 0.2704, "step": 11723 }, { "epoch": 1.7940321346595256, "grad_norm": 2.0422144203755295, "learning_rate": 5.511836794003889e-07, "loss": 0.2772, "step": 11724 }, { "epoch": 1.794185156847743, "grad_norm": 2.0214617735698037, "learning_rate": 5.503725823674488e-07, "loss": 0.2763, "step": 11725 }, { "epoch": 1.7943381790359603, "grad_norm": 1.7177450706562944, "learning_rate": 5.49562065673096e-07, "loss": 0.2216, "step": 11726 }, { "epoch": 1.7944912012241776, "grad_norm": 2.0555841425573567, "learning_rate": 5.48752129367105e-07, "loss": 0.2227, "step": 11727 }, { "epoch": 1.7946442234123947, "grad_norm": 2.080887556455304, "learning_rate": 5.479427734992226e-07, "loss": 0.2297, "step": 11728 }, { "epoch": 1.794797245600612, "grad_norm": 2.1233295078008037, "learning_rate": 5.471339981191503e-07, "loss": 0.3034, "step": 11729 }, { "epoch": 1.7949502677888294, "grad_norm": 2.3167717650903046, "learning_rate": 5.463258032765573e-07, "loss": 0.2657, "step": 11730 }, { "epoch": 1.7951032899770467, "grad_norm": 2.319827835372855, "learning_rate": 5.455181890210814e-07, "loss": 0.2852, "step": 11731 }, { "epoch": 1.795256312165264, "grad_norm": 1.9427112608034016, "learning_rate": 5.447111554023199e-07, "loss": 0.2446, "step": 11732 }, { "epoch": 1.7954093343534812, "grad_norm": 2.0329376925996603, "learning_rate": 5.43904702469833e-07, "loss": 0.2396, "step": 11733 }, { "epoch": 1.7955623565416987, "grad_norm": 1.7308500793905726, "learning_rate": 5.430988302731544e-07, "loss": 0.2522, "step": 11734 }, { "epoch": 1.7957153787299158, "grad_norm": 2.1219125389826554, "learning_rate": 5.422935388617689e-07, "loss": 0.2545, "step": 11735 }, { "epoch": 1.7958684009181332, "grad_norm": 1.972590600455119, "learning_rate": 5.414888282851372e-07, "loss": 0.2451, "step": 11736 }, { "epoch": 1.7960214231063505, "grad_norm": 1.9834371644107958, "learning_rate": 5.406846985926805e-07, "loss": 0.2387, "step": 11737 }, { "epoch": 1.7961744452945676, "grad_norm": 1.9935226037345846, "learning_rate": 5.398811498337786e-07, "loss": 0.2365, "step": 11738 }, { "epoch": 1.7963274674827852, "grad_norm": 3.541082073396699, "learning_rate": 5.39078182057785e-07, "loss": 0.3011, "step": 11739 }, { "epoch": 1.7964804896710023, "grad_norm": 2.2449387681269695, "learning_rate": 5.382757953140105e-07, "loss": 0.2998, "step": 11740 }, { "epoch": 1.7966335118592196, "grad_norm": 2.373753123628044, "learning_rate": 5.374739896517345e-07, "loss": 0.3223, "step": 11741 }, { "epoch": 1.796786534047437, "grad_norm": 2.3114047848746972, "learning_rate": 5.366727651201986e-07, "loss": 0.2996, "step": 11742 }, { "epoch": 1.796939556235654, "grad_norm": 2.0196626112232257, "learning_rate": 5.358721217686091e-07, "loss": 0.2923, "step": 11743 }, { "epoch": 1.7970925784238716, "grad_norm": 2.035219870357041, "learning_rate": 5.350720596461367e-07, "loss": 0.2943, "step": 11744 }, { "epoch": 1.7972456006120887, "grad_norm": 2.2123252681687724, "learning_rate": 5.342725788019154e-07, "loss": 0.2645, "step": 11745 }, { "epoch": 1.797398622800306, "grad_norm": 2.1569997981072664, "learning_rate": 5.334736792850448e-07, "loss": 0.2261, "step": 11746 }, { "epoch": 1.7975516449885234, "grad_norm": 2.420526338846463, "learning_rate": 5.326753611445901e-07, "loss": 0.2685, "step": 11747 }, { "epoch": 1.7977046671767405, "grad_norm": 2.333183084239707, "learning_rate": 5.318776244295776e-07, "loss": 0.2612, "step": 11748 }, { "epoch": 1.797857689364958, "grad_norm": 2.0257483589647833, "learning_rate": 5.310804691889993e-07, "loss": 0.2629, "step": 11749 }, { "epoch": 1.7980107115531752, "grad_norm": 2.0788523474030485, "learning_rate": 5.302838954718115e-07, "loss": 0.3047, "step": 11750 }, { "epoch": 1.7981637337413925, "grad_norm": 2.1004294540442534, "learning_rate": 5.294879033269362e-07, "loss": 0.2874, "step": 11751 }, { "epoch": 1.7983167559296098, "grad_norm": 2.1730252469221836, "learning_rate": 5.286924928032566e-07, "loss": 0.2628, "step": 11752 }, { "epoch": 1.798469778117827, "grad_norm": 2.1484332701793125, "learning_rate": 5.278976639496236e-07, "loss": 0.2705, "step": 11753 }, { "epoch": 1.7986228003060445, "grad_norm": 2.0795707539418697, "learning_rate": 5.271034168148504e-07, "loss": 0.2895, "step": 11754 }, { "epoch": 1.7987758224942616, "grad_norm": 2.0650753087535003, "learning_rate": 5.263097514477145e-07, "loss": 0.2599, "step": 11755 }, { "epoch": 1.798928844682479, "grad_norm": 2.2929545293487643, "learning_rate": 5.25516667896957e-07, "loss": 0.3368, "step": 11756 }, { "epoch": 1.7990818668706963, "grad_norm": 2.171294433545801, "learning_rate": 5.247241662112878e-07, "loss": 0.3341, "step": 11757 }, { "epoch": 1.7992348890589134, "grad_norm": 2.510967028431956, "learning_rate": 5.239322464393726e-07, "loss": 0.3286, "step": 11758 }, { "epoch": 1.799387911247131, "grad_norm": 2.5373624082576667, "learning_rate": 5.231409086298489e-07, "loss": 0.3282, "step": 11759 }, { "epoch": 1.799540933435348, "grad_norm": 2.3404830830993664, "learning_rate": 5.223501528313179e-07, "loss": 0.2916, "step": 11760 }, { "epoch": 1.7996939556235654, "grad_norm": 2.4516704197186145, "learning_rate": 5.215599790923387e-07, "loss": 0.3127, "step": 11761 }, { "epoch": 1.7998469778117827, "grad_norm": 1.8951882844356727, "learning_rate": 5.2077038746144e-07, "loss": 0.2737, "step": 11762 }, { "epoch": 1.8, "grad_norm": 2.1140050714971848, "learning_rate": 5.199813779871187e-07, "loss": 0.3123, "step": 11763 }, { "epoch": 1.8001530221882174, "grad_norm": 2.3629890157765683, "learning_rate": 5.191929507178228e-07, "loss": 0.2923, "step": 11764 }, { "epoch": 1.8003060443764345, "grad_norm": 2.2207657842982704, "learning_rate": 5.18405105701979e-07, "loss": 0.2737, "step": 11765 }, { "epoch": 1.8004590665646518, "grad_norm": 2.2328795744392687, "learning_rate": 5.176178429879708e-07, "loss": 0.2769, "step": 11766 }, { "epoch": 1.8006120887528692, "grad_norm": 2.2494304651674804, "learning_rate": 5.16831162624144e-07, "loss": 0.305, "step": 11767 }, { "epoch": 1.8007651109410865, "grad_norm": 2.097178530222374, "learning_rate": 5.160450646588144e-07, "loss": 0.2687, "step": 11768 }, { "epoch": 1.8009181331293038, "grad_norm": 1.949079996975221, "learning_rate": 5.152595491402612e-07, "loss": 0.2349, "step": 11769 }, { "epoch": 1.801071155317521, "grad_norm": 1.9194856467463366, "learning_rate": 5.144746161167202e-07, "loss": 0.2499, "step": 11770 }, { "epoch": 1.8012241775057385, "grad_norm": 1.9094906109398266, "learning_rate": 5.136902656364018e-07, "loss": 0.263, "step": 11771 }, { "epoch": 1.8013771996939556, "grad_norm": 2.0543659422279172, "learning_rate": 5.129064977474752e-07, "loss": 0.2677, "step": 11772 }, { "epoch": 1.801530221882173, "grad_norm": 2.037500846294362, "learning_rate": 5.121233124980751e-07, "loss": 0.277, "step": 11773 }, { "epoch": 1.8016832440703903, "grad_norm": 2.5064908670823125, "learning_rate": 5.113407099362977e-07, "loss": 0.3037, "step": 11774 }, { "epoch": 1.8018362662586074, "grad_norm": 2.3767799879674065, "learning_rate": 5.105586901102078e-07, "loss": 0.3141, "step": 11775 }, { "epoch": 1.801989288446825, "grad_norm": 2.1263927705629877, "learning_rate": 5.097772530678325e-07, "loss": 0.2535, "step": 11776 }, { "epoch": 1.802142310635042, "grad_norm": 2.101381107662464, "learning_rate": 5.089963988571589e-07, "loss": 0.2888, "step": 11777 }, { "epoch": 1.8022953328232594, "grad_norm": 2.1648239103094844, "learning_rate": 5.082161275261499e-07, "loss": 0.2802, "step": 11778 }, { "epoch": 1.8024483550114767, "grad_norm": 2.4059903176752617, "learning_rate": 5.074364391227182e-07, "loss": 0.2784, "step": 11779 }, { "epoch": 1.8026013771996938, "grad_norm": 2.0832771851068754, "learning_rate": 5.066573336947489e-07, "loss": 0.3067, "step": 11780 }, { "epoch": 1.8027543993879114, "grad_norm": 1.8546774228621359, "learning_rate": 5.058788112900925e-07, "loss": 0.2336, "step": 11781 }, { "epoch": 1.8029074215761285, "grad_norm": 1.9287318570324383, "learning_rate": 5.051008719565597e-07, "loss": 0.2113, "step": 11782 }, { "epoch": 1.8030604437643458, "grad_norm": 2.329499263317703, "learning_rate": 5.043235157419246e-07, "loss": 0.2933, "step": 11783 }, { "epoch": 1.8032134659525632, "grad_norm": 2.0964211634689702, "learning_rate": 5.035467426939322e-07, "loss": 0.3175, "step": 11784 }, { "epoch": 1.8033664881407803, "grad_norm": 1.8844374172641056, "learning_rate": 5.027705528602822e-07, "loss": 0.2303, "step": 11785 }, { "epoch": 1.8035195103289978, "grad_norm": 1.9323244223377913, "learning_rate": 5.019949462886476e-07, "loss": 0.2753, "step": 11786 }, { "epoch": 1.803672532517215, "grad_norm": 2.3979087439781517, "learning_rate": 5.012199230266612e-07, "loss": 0.2833, "step": 11787 }, { "epoch": 1.8038255547054323, "grad_norm": 2.066289773156986, "learning_rate": 5.004454831219152e-07, "loss": 0.2935, "step": 11788 }, { "epoch": 1.8039785768936496, "grad_norm": 2.037769789992835, "learning_rate": 4.99671626621977e-07, "loss": 0.2397, "step": 11789 }, { "epoch": 1.8041315990818667, "grad_norm": 2.0152504446404653, "learning_rate": 4.988983535743697e-07, "loss": 0.2613, "step": 11790 }, { "epoch": 1.8042846212700843, "grad_norm": 1.7574012569673563, "learning_rate": 4.981256640265808e-07, "loss": 0.2328, "step": 11791 }, { "epoch": 1.8044376434583014, "grad_norm": 2.189575784945479, "learning_rate": 4.97353558026068e-07, "loss": 0.3055, "step": 11792 }, { "epoch": 1.8045906656465187, "grad_norm": 2.131528056103428, "learning_rate": 4.965820356202478e-07, "loss": 0.2894, "step": 11793 }, { "epoch": 1.804743687834736, "grad_norm": 2.1817272506538496, "learning_rate": 4.958110968565022e-07, "loss": 0.3182, "step": 11794 }, { "epoch": 1.8048967100229534, "grad_norm": 2.547644929513796, "learning_rate": 4.950407417821779e-07, "loss": 0.2626, "step": 11795 }, { "epoch": 1.8050497322111707, "grad_norm": 2.012545629577886, "learning_rate": 4.942709704445858e-07, "loss": 0.298, "step": 11796 }, { "epoch": 1.8052027543993878, "grad_norm": 2.108012508898975, "learning_rate": 4.935017828909994e-07, "loss": 0.2551, "step": 11797 }, { "epoch": 1.8053557765876052, "grad_norm": 2.2952470925887303, "learning_rate": 4.927331791686585e-07, "loss": 0.26, "step": 11798 }, { "epoch": 1.8055087987758225, "grad_norm": 2.075299206713513, "learning_rate": 4.919651593247654e-07, "loss": 0.2831, "step": 11799 }, { "epoch": 1.8056618209640398, "grad_norm": 2.3021425499492483, "learning_rate": 4.911977234064868e-07, "loss": 0.2906, "step": 11800 }, { "epoch": 1.8058148431522572, "grad_norm": 1.9590477977426395, "learning_rate": 4.904308714609562e-07, "loss": 0.2897, "step": 11801 }, { "epoch": 1.8059678653404743, "grad_norm": 1.9965251043147465, "learning_rate": 4.896646035352659e-07, "loss": 0.3132, "step": 11802 }, { "epoch": 1.8061208875286918, "grad_norm": 2.373562135530326, "learning_rate": 4.888989196764782e-07, "loss": 0.3023, "step": 11803 }, { "epoch": 1.806273909716909, "grad_norm": 2.1986562880820486, "learning_rate": 4.881338199316144e-07, "loss": 0.2941, "step": 11804 }, { "epoch": 1.8064269319051263, "grad_norm": 2.040180272914321, "learning_rate": 4.873693043476646e-07, "loss": 0.2436, "step": 11805 }, { "epoch": 1.8065799540933436, "grad_norm": 1.918033241126225, "learning_rate": 4.86605372971577e-07, "loss": 0.2649, "step": 11806 }, { "epoch": 1.8067329762815607, "grad_norm": 2.5039834522606426, "learning_rate": 4.858420258502727e-07, "loss": 0.459, "step": 11807 }, { "epoch": 1.8068859984697783, "grad_norm": 2.1500788017787653, "learning_rate": 4.850792630306289e-07, "loss": 0.2669, "step": 11808 }, { "epoch": 1.8070390206579954, "grad_norm": 1.7573674117294718, "learning_rate": 4.84317084559488e-07, "loss": 0.2311, "step": 11809 }, { "epoch": 1.8071920428462127, "grad_norm": 2.116328054003031, "learning_rate": 4.835554904836626e-07, "loss": 0.2959, "step": 11810 }, { "epoch": 1.80734506503443, "grad_norm": 1.9941371350603454, "learning_rate": 4.82794480849923e-07, "loss": 0.2193, "step": 11811 }, { "epoch": 1.8074980872226472, "grad_norm": 2.4572585198173367, "learning_rate": 4.82034055705004e-07, "loss": 0.3226, "step": 11812 }, { "epoch": 1.8076511094108647, "grad_norm": 2.18333804741628, "learning_rate": 4.812742150956107e-07, "loss": 0.2796, "step": 11813 }, { "epoch": 1.8078041315990818, "grad_norm": 2.015761932746089, "learning_rate": 4.805149590684022e-07, "loss": 0.2538, "step": 11814 }, { "epoch": 1.8079571537872992, "grad_norm": 2.2635890211246497, "learning_rate": 4.797562876700124e-07, "loss": 0.2918, "step": 11815 }, { "epoch": 1.8081101759755165, "grad_norm": 2.0019627008557994, "learning_rate": 4.78998200947034e-07, "loss": 0.2271, "step": 11816 }, { "epoch": 1.8082631981637336, "grad_norm": 1.9257373438476535, "learning_rate": 4.782406989460197e-07, "loss": 0.2475, "step": 11817 }, { "epoch": 1.8084162203519512, "grad_norm": 2.1176835046895177, "learning_rate": 4.774837817134937e-07, "loss": 0.2816, "step": 11818 }, { "epoch": 1.8085692425401683, "grad_norm": 2.15777176903622, "learning_rate": 4.7672744929594396e-07, "loss": 0.3033, "step": 11819 }, { "epoch": 1.8087222647283856, "grad_norm": 1.9143295838287768, "learning_rate": 4.759717017398124e-07, "loss": 0.2498, "step": 11820 }, { "epoch": 1.808875286916603, "grad_norm": 2.0434931578114623, "learning_rate": 4.752165390915198e-07, "loss": 0.2489, "step": 11821 }, { "epoch": 1.80902830910482, "grad_norm": 1.6784767679091706, "learning_rate": 4.744619613974399e-07, "loss": 0.2, "step": 11822 }, { "epoch": 1.8091813312930376, "grad_norm": 2.313772731023759, "learning_rate": 4.737079687039148e-07, "loss": 0.3154, "step": 11823 }, { "epoch": 1.8093343534812547, "grad_norm": 2.30936650960722, "learning_rate": 4.7295456105725057e-07, "loss": 0.2594, "step": 11824 }, { "epoch": 1.809487375669472, "grad_norm": 2.0716315153696208, "learning_rate": 4.7220173850371695e-07, "loss": 0.2652, "step": 11825 }, { "epoch": 1.8096403978576894, "grad_norm": 1.9868270558385652, "learning_rate": 4.71449501089547e-07, "loss": 0.2478, "step": 11826 }, { "epoch": 1.8097934200459067, "grad_norm": 2.018269885599993, "learning_rate": 4.706978488609393e-07, "loss": 0.2329, "step": 11827 }, { "epoch": 1.809946442234124, "grad_norm": 1.9684741598276265, "learning_rate": 4.6994678186405685e-07, "loss": 0.2501, "step": 11828 }, { "epoch": 1.8100994644223412, "grad_norm": 1.933993539998423, "learning_rate": 4.691963001450228e-07, "loss": 0.2868, "step": 11829 }, { "epoch": 1.8102524866105585, "grad_norm": 2.3696993136051736, "learning_rate": 4.684464037499292e-07, "loss": 0.3, "step": 11830 }, { "epoch": 1.8104055087987758, "grad_norm": 2.2717448328519603, "learning_rate": 4.676970927248292e-07, "loss": 0.2647, "step": 11831 }, { "epoch": 1.8105585309869932, "grad_norm": 2.005296412078862, "learning_rate": 4.6694836711574153e-07, "loss": 0.2521, "step": 11832 }, { "epoch": 1.8107115531752105, "grad_norm": 1.752049954837269, "learning_rate": 4.662002269686483e-07, "loss": 0.2542, "step": 11833 }, { "epoch": 1.8108645753634276, "grad_norm": 2.015368287875028, "learning_rate": 4.654526723294961e-07, "loss": 0.2845, "step": 11834 }, { "epoch": 1.8110175975516452, "grad_norm": 2.137987633418499, "learning_rate": 4.6470570324419374e-07, "loss": 0.3099, "step": 11835 }, { "epoch": 1.8111706197398623, "grad_norm": 2.2337190632103794, "learning_rate": 4.6395931975861673e-07, "loss": 0.268, "step": 11836 }, { "epoch": 1.8113236419280796, "grad_norm": 2.0287393644206366, "learning_rate": 4.6321352191860293e-07, "loss": 0.2506, "step": 11837 }, { "epoch": 1.811476664116297, "grad_norm": 2.0219857424282845, "learning_rate": 4.6246830976995336e-07, "loss": 0.4011, "step": 11838 }, { "epoch": 1.811629686304514, "grad_norm": 2.4153059420724565, "learning_rate": 4.617236833584393e-07, "loss": 0.2586, "step": 11839 }, { "epoch": 1.8117827084927316, "grad_norm": 2.1473162421467507, "learning_rate": 4.6097964272978634e-07, "loss": 0.2317, "step": 11840 }, { "epoch": 1.8119357306809487, "grad_norm": 2.129796298413445, "learning_rate": 4.6023618792968794e-07, "loss": 0.2965, "step": 11841 }, { "epoch": 1.812088752869166, "grad_norm": 2.186068024749278, "learning_rate": 4.5949331900380865e-07, "loss": 0.2816, "step": 11842 }, { "epoch": 1.8122417750573834, "grad_norm": 2.0460758575279896, "learning_rate": 4.5875103599776426e-07, "loss": 0.2681, "step": 11843 }, { "epoch": 1.8123947972456005, "grad_norm": 2.3159308326608423, "learning_rate": 4.5800933895714606e-07, "loss": 0.2898, "step": 11844 }, { "epoch": 1.812547819433818, "grad_norm": 2.3681240793421754, "learning_rate": 4.5726822792750425e-07, "loss": 0.2783, "step": 11845 }, { "epoch": 1.8127008416220352, "grad_norm": 2.2807501390604608, "learning_rate": 4.565277029543491e-07, "loss": 0.2452, "step": 11846 }, { "epoch": 1.8128538638102525, "grad_norm": 2.0219388437967574, "learning_rate": 4.5578776408316315e-07, "loss": 0.233, "step": 11847 }, { "epoch": 1.8130068859984698, "grad_norm": 1.944753820724033, "learning_rate": 4.5504841135938893e-07, "loss": 0.2404, "step": 11848 }, { "epoch": 1.813159908186687, "grad_norm": 1.7793328730279039, "learning_rate": 4.543096448284301e-07, "loss": 0.2104, "step": 11849 }, { "epoch": 1.8133129303749045, "grad_norm": 2.0493902412848954, "learning_rate": 4.5357146453565926e-07, "loss": 0.2623, "step": 11850 }, { "epoch": 1.8134659525631216, "grad_norm": 1.8090947119690544, "learning_rate": 4.5283387052641125e-07, "loss": 0.1942, "step": 11851 }, { "epoch": 1.813618974751339, "grad_norm": 2.0275436226010153, "learning_rate": 4.520968628459832e-07, "loss": 0.2955, "step": 11852 }, { "epoch": 1.8137719969395563, "grad_norm": 2.4728132882804537, "learning_rate": 4.5136044153963887e-07, "loss": 0.3628, "step": 11853 }, { "epoch": 1.8139250191277734, "grad_norm": 2.2678757394918745, "learning_rate": 4.5062460665260434e-07, "loss": 0.302, "step": 11854 }, { "epoch": 1.814078041315991, "grad_norm": 1.9115212445776606, "learning_rate": 4.498893582300701e-07, "loss": 0.24, "step": 11855 }, { "epoch": 1.814231063504208, "grad_norm": 1.9723840834983504, "learning_rate": 4.491546963171911e-07, "loss": 0.2545, "step": 11856 }, { "epoch": 1.8143840856924254, "grad_norm": 1.9544300904005711, "learning_rate": 4.4842062095908467e-07, "loss": 0.2047, "step": 11857 }, { "epoch": 1.8145371078806427, "grad_norm": 2.0727551080590167, "learning_rate": 4.476871322008336e-07, "loss": 0.2165, "step": 11858 }, { "epoch": 1.8146901300688598, "grad_norm": 2.13104978350104, "learning_rate": 4.4695423008748406e-07, "loss": 0.3208, "step": 11859 }, { "epoch": 1.8148431522570774, "grad_norm": 2.242696903489749, "learning_rate": 4.4622191466404894e-07, "loss": 0.3112, "step": 11860 }, { "epoch": 1.8149961744452945, "grad_norm": 2.2981606041884333, "learning_rate": 4.454901859755001e-07, "loss": 0.3035, "step": 11861 }, { "epoch": 1.8151491966335118, "grad_norm": 2.1319246519937507, "learning_rate": 4.4475904406677483e-07, "loss": 0.2548, "step": 11862 }, { "epoch": 1.8153022188217292, "grad_norm": 2.152578096052732, "learning_rate": 4.440284889827795e-07, "loss": 0.2502, "step": 11863 }, { "epoch": 1.8154552410099465, "grad_norm": 2.2940645041026757, "learning_rate": 4.4329852076837597e-07, "loss": 0.3296, "step": 11864 }, { "epoch": 1.8156082631981638, "grad_norm": 2.199915066734886, "learning_rate": 4.4256913946839506e-07, "loss": 0.3083, "step": 11865 }, { "epoch": 1.815761285386381, "grad_norm": 2.1312699700865285, "learning_rate": 4.418403451276354e-07, "loss": 0.343, "step": 11866 }, { "epoch": 1.8159143075745983, "grad_norm": 2.1295718756882573, "learning_rate": 4.411121377908489e-07, "loss": 0.2527, "step": 11867 }, { "epoch": 1.8160673297628156, "grad_norm": 2.0468010619606978, "learning_rate": 4.4038451750276213e-07, "loss": 0.2323, "step": 11868 }, { "epoch": 1.816220351951033, "grad_norm": 2.037380646764395, "learning_rate": 4.396574843080603e-07, "loss": 0.3027, "step": 11869 }, { "epoch": 1.8163733741392503, "grad_norm": 2.4229862444006147, "learning_rate": 4.3893103825139115e-07, "loss": 0.3327, "step": 11870 }, { "epoch": 1.8165263963274674, "grad_norm": 2.2136068015881913, "learning_rate": 4.3820517937737005e-07, "loss": 0.3462, "step": 11871 }, { "epoch": 1.816679418515685, "grad_norm": 2.2981031674405994, "learning_rate": 4.3747990773057693e-07, "loss": 0.3054, "step": 11872 }, { "epoch": 1.816832440703902, "grad_norm": 2.050569164217128, "learning_rate": 4.367552233555494e-07, "loss": 0.2547, "step": 11873 }, { "epoch": 1.8169854628921194, "grad_norm": 2.1447671798370855, "learning_rate": 4.3603112629679534e-07, "loss": 0.2512, "step": 11874 }, { "epoch": 1.8171384850803367, "grad_norm": 2.229682016061022, "learning_rate": 4.353076165987846e-07, "loss": 0.2623, "step": 11875 }, { "epoch": 1.8172915072685538, "grad_norm": 2.3664239681453663, "learning_rate": 4.3458469430595063e-07, "loss": 0.2964, "step": 11876 }, { "epoch": 1.8174445294567714, "grad_norm": 2.0532090128786655, "learning_rate": 4.338623594626912e-07, "loss": 0.3125, "step": 11877 }, { "epoch": 1.8175975516449885, "grad_norm": 1.9329806232041002, "learning_rate": 4.3314061211336633e-07, "loss": 0.2434, "step": 11878 }, { "epoch": 1.8177505738332058, "grad_norm": 2.076618181262312, "learning_rate": 4.3241945230230286e-07, "loss": 0.2888, "step": 11879 }, { "epoch": 1.8179035960214232, "grad_norm": 2.740737723939962, "learning_rate": 4.316988800737887e-07, "loss": 0.3424, "step": 11880 }, { "epoch": 1.8180566182096403, "grad_norm": 2.0235334902556255, "learning_rate": 4.309788954720784e-07, "loss": 0.2841, "step": 11881 }, { "epoch": 1.8182096403978578, "grad_norm": 1.8685472019677516, "learning_rate": 4.302594985413877e-07, "loss": 0.2695, "step": 11882 }, { "epoch": 1.818362662586075, "grad_norm": 2.088752310739199, "learning_rate": 4.295406893258991e-07, "loss": 0.2887, "step": 11883 }, { "epoch": 1.8185156847742923, "grad_norm": 2.1044895242632147, "learning_rate": 4.2882246786975614e-07, "loss": 0.285, "step": 11884 }, { "epoch": 1.8186687069625096, "grad_norm": 1.986025076615623, "learning_rate": 4.2810483421706796e-07, "loss": 0.2584, "step": 11885 }, { "epoch": 1.8188217291507267, "grad_norm": 2.493855607868984, "learning_rate": 4.273877884119071e-07, "loss": 0.3174, "step": 11886 }, { "epoch": 1.8189747513389443, "grad_norm": 2.3424713449133083, "learning_rate": 4.2667133049831166e-07, "loss": 0.2856, "step": 11887 }, { "epoch": 1.8191277735271614, "grad_norm": 2.159188268341222, "learning_rate": 4.259554605202787e-07, "loss": 0.295, "step": 11888 }, { "epoch": 1.8192807957153787, "grad_norm": 1.9226041677235508, "learning_rate": 4.2524017852177746e-07, "loss": 0.2502, "step": 11889 }, { "epoch": 1.819433817903596, "grad_norm": 2.0557886809068306, "learning_rate": 4.245254845467317e-07, "loss": 0.2254, "step": 11890 }, { "epoch": 1.8195868400918132, "grad_norm": 2.2139733495822735, "learning_rate": 4.238113786390352e-07, "loss": 0.3166, "step": 11891 }, { "epoch": 1.8197398622800307, "grad_norm": 1.8869752641971287, "learning_rate": 4.230978608425462e-07, "loss": 0.2612, "step": 11892 }, { "epoch": 1.8198928844682478, "grad_norm": 2.361534050142558, "learning_rate": 4.223849312010808e-07, "loss": 0.2905, "step": 11893 }, { "epoch": 1.8200459066564652, "grad_norm": 2.473559879765051, "learning_rate": 4.2167258975842394e-07, "loss": 0.2726, "step": 11894 }, { "epoch": 1.8201989288446825, "grad_norm": 1.8092190590853954, "learning_rate": 4.209608365583262e-07, "loss": 0.2333, "step": 11895 }, { "epoch": 1.8203519510328998, "grad_norm": 1.9546857994825566, "learning_rate": 4.202496716444948e-07, "loss": 0.221, "step": 11896 }, { "epoch": 1.8205049732211172, "grad_norm": 2.0141397192525066, "learning_rate": 4.195390950606082e-07, "loss": 0.2491, "step": 11897 }, { "epoch": 1.8206579954093343, "grad_norm": 2.1127973177494597, "learning_rate": 4.1882910685030587e-07, "loss": 0.2451, "step": 11898 }, { "epoch": 1.8208110175975516, "grad_norm": 1.9673277852549478, "learning_rate": 4.181197070571874e-07, "loss": 0.2684, "step": 11899 }, { "epoch": 1.820964039785769, "grad_norm": 2.358968570632875, "learning_rate": 4.174108957248246e-07, "loss": 0.2537, "step": 11900 }, { "epoch": 1.8211170619739863, "grad_norm": 2.1309862940664765, "learning_rate": 4.16702672896746e-07, "loss": 0.3338, "step": 11901 }, { "epoch": 1.8212700841622036, "grad_norm": 2.170521550489901, "learning_rate": 4.1599503861644355e-07, "loss": 0.2589, "step": 11902 }, { "epoch": 1.8214231063504207, "grad_norm": 1.9231878873342532, "learning_rate": 4.152879929273812e-07, "loss": 0.1625, "step": 11903 }, { "epoch": 1.8215761285386383, "grad_norm": 2.136045983086775, "learning_rate": 4.145815358729777e-07, "loss": 0.2598, "step": 11904 }, { "epoch": 1.8217291507268554, "grad_norm": 2.0576412417232857, "learning_rate": 4.1387566749662045e-07, "loss": 0.2408, "step": 11905 }, { "epoch": 1.8218821729150727, "grad_norm": 1.9763125602432094, "learning_rate": 4.131703878416604e-07, "loss": 0.2477, "step": 11906 }, { "epoch": 1.82203519510329, "grad_norm": 2.3306832460828155, "learning_rate": 4.124656969514107e-07, "loss": 0.2894, "step": 11907 }, { "epoch": 1.8221882172915072, "grad_norm": 2.175829010880975, "learning_rate": 4.117615948691489e-07, "loss": 0.2145, "step": 11908 }, { "epoch": 1.8223412394797247, "grad_norm": 1.8907633407981779, "learning_rate": 4.11058081638116e-07, "loss": 0.2225, "step": 11909 }, { "epoch": 1.8224942616679418, "grad_norm": 2.2137627125533275, "learning_rate": 4.103551573015196e-07, "loss": 0.2826, "step": 11910 }, { "epoch": 1.8226472838561592, "grad_norm": 2.6138023580709238, "learning_rate": 4.096528219025275e-07, "loss": 0.3311, "step": 11911 }, { "epoch": 1.8228003060443765, "grad_norm": 2.483862774064997, "learning_rate": 4.0895107548427293e-07, "loss": 0.3195, "step": 11912 }, { "epoch": 1.8229533282325936, "grad_norm": 2.487626032754856, "learning_rate": 4.082499180898536e-07, "loss": 0.2799, "step": 11913 }, { "epoch": 1.8231063504208112, "grad_norm": 2.1376354412056844, "learning_rate": 4.0754934976233065e-07, "loss": 0.2945, "step": 11914 }, { "epoch": 1.8232593726090283, "grad_norm": 1.574613938962143, "learning_rate": 4.068493705447274e-07, "loss": 0.1875, "step": 11915 }, { "epoch": 1.8234123947972456, "grad_norm": 2.1105301064666726, "learning_rate": 4.0614998048003284e-07, "loss": 0.2669, "step": 11916 }, { "epoch": 1.823565416985463, "grad_norm": 2.268105261960538, "learning_rate": 4.054511796111982e-07, "loss": 0.2442, "step": 11917 }, { "epoch": 1.82371843917368, "grad_norm": 2.281379829918933, "learning_rate": 4.047529679811424e-07, "loss": 0.2729, "step": 11918 }, { "epoch": 1.8238714613618976, "grad_norm": 1.9162797641433544, "learning_rate": 4.040553456327434e-07, "loss": 0.2425, "step": 11919 }, { "epoch": 1.8240244835501147, "grad_norm": 2.123847206220364, "learning_rate": 4.0335831260884363e-07, "loss": 0.256, "step": 11920 }, { "epoch": 1.824177505738332, "grad_norm": 1.9678421561811497, "learning_rate": 4.026618689522543e-07, "loss": 0.255, "step": 11921 }, { "epoch": 1.8243305279265494, "grad_norm": 2.1499690708656307, "learning_rate": 4.019660147057436e-07, "loss": 0.2687, "step": 11922 }, { "epoch": 1.8244835501147665, "grad_norm": 1.839216684375747, "learning_rate": 4.012707499120472e-07, "loss": 0.2095, "step": 11923 }, { "epoch": 1.824636572302984, "grad_norm": 1.8671693115789987, "learning_rate": 4.005760746138654e-07, "loss": 0.2566, "step": 11924 }, { "epoch": 1.8247895944912012, "grad_norm": 2.007621689391616, "learning_rate": 3.998819888538585e-07, "loss": 0.2492, "step": 11925 }, { "epoch": 1.8249426166794185, "grad_norm": 2.33315783039703, "learning_rate": 3.9918849267465587e-07, "loss": 0.2988, "step": 11926 }, { "epoch": 1.8250956388676358, "grad_norm": 2.2086776410871973, "learning_rate": 3.984955861188478e-07, "loss": 0.2698, "step": 11927 }, { "epoch": 1.8252486610558531, "grad_norm": 2.0325254374514365, "learning_rate": 3.978032692289846e-07, "loss": 0.2833, "step": 11928 }, { "epoch": 1.8254016832440705, "grad_norm": 1.9777584449620282, "learning_rate": 3.9711154204758686e-07, "loss": 0.2598, "step": 11929 }, { "epoch": 1.8255547054322876, "grad_norm": 2.0637922928930887, "learning_rate": 3.964204046171383e-07, "loss": 0.2631, "step": 11930 }, { "epoch": 1.825707727620505, "grad_norm": 2.3317485527455073, "learning_rate": 3.957298569800794e-07, "loss": 0.2423, "step": 11931 }, { "epoch": 1.8258607498087223, "grad_norm": 2.252331720437728, "learning_rate": 3.950398991788229e-07, "loss": 0.2706, "step": 11932 }, { "epoch": 1.8260137719969396, "grad_norm": 2.163044989421106, "learning_rate": 3.9435053125574164e-07, "loss": 0.2988, "step": 11933 }, { "epoch": 1.826166794185157, "grad_norm": 2.129347941639684, "learning_rate": 3.936617532531717e-07, "loss": 0.297, "step": 11934 }, { "epoch": 1.826319816373374, "grad_norm": 2.0345026352262123, "learning_rate": 3.929735652134137e-07, "loss": 0.2762, "step": 11935 }, { "epoch": 1.8264728385615916, "grad_norm": 2.0560842105913837, "learning_rate": 3.922859671787316e-07, "loss": 0.3026, "step": 11936 }, { "epoch": 1.8266258607498087, "grad_norm": 1.9701130038274002, "learning_rate": 3.915989591913538e-07, "loss": 0.2759, "step": 11937 }, { "epoch": 1.826778882938026, "grad_norm": 1.8164126230552349, "learning_rate": 3.9091254129347225e-07, "loss": 0.2115, "step": 11938 }, { "epoch": 1.8269319051262434, "grad_norm": 2.095226323648143, "learning_rate": 3.902267135272431e-07, "loss": 0.2918, "step": 11939 }, { "epoch": 1.8270849273144605, "grad_norm": 1.8632956629908928, "learning_rate": 3.8954147593478486e-07, "loss": 0.2635, "step": 11940 }, { "epoch": 1.827237949502678, "grad_norm": 2.125554599686935, "learning_rate": 3.888568285581795e-07, "loss": 0.2541, "step": 11941 }, { "epoch": 1.8273909716908951, "grad_norm": 1.8686548914535643, "learning_rate": 3.8817277143947786e-07, "loss": 0.2687, "step": 11942 }, { "epoch": 1.8275439938791125, "grad_norm": 2.2724859585571004, "learning_rate": 3.874893046206862e-07, "loss": 0.2784, "step": 11943 }, { "epoch": 1.8276970160673298, "grad_norm": 1.9277596501627852, "learning_rate": 3.8680642814378e-07, "loss": 0.2747, "step": 11944 }, { "epoch": 1.827850038255547, "grad_norm": 1.8385682044969542, "learning_rate": 3.861241420507e-07, "loss": 0.2125, "step": 11945 }, { "epoch": 1.8280030604437645, "grad_norm": 2.273734952165846, "learning_rate": 3.8544244638334617e-07, "loss": 0.2406, "step": 11946 }, { "epoch": 1.8281560826319816, "grad_norm": 1.6741109276874935, "learning_rate": 3.8476134118358153e-07, "loss": 0.1967, "step": 11947 }, { "epoch": 1.828309104820199, "grad_norm": 2.0544841920395363, "learning_rate": 3.8408082649324165e-07, "loss": 0.2309, "step": 11948 }, { "epoch": 1.8284621270084163, "grad_norm": 2.120247464040846, "learning_rate": 3.8340090235411186e-07, "loss": 0.2576, "step": 11949 }, { "epoch": 1.8286151491966334, "grad_norm": 2.19249778357952, "learning_rate": 3.827215688079555e-07, "loss": 0.2595, "step": 11950 }, { "epoch": 1.828768171384851, "grad_norm": 2.0334839445427293, "learning_rate": 3.8204282589649144e-07, "loss": 0.3497, "step": 11951 }, { "epoch": 1.828921193573068, "grad_norm": 2.414610411001209, "learning_rate": 3.8136467366140073e-07, "loss": 0.3199, "step": 11952 }, { "epoch": 1.8290742157612854, "grad_norm": 2.293890111032897, "learning_rate": 3.8068711214433453e-07, "loss": 0.3205, "step": 11953 }, { "epoch": 1.8292272379495027, "grad_norm": 2.141584926134872, "learning_rate": 3.8001014138690396e-07, "loss": 0.2612, "step": 11954 }, { "epoch": 1.8293802601377198, "grad_norm": 1.9667031981212337, "learning_rate": 3.793337614306847e-07, "loss": 0.3036, "step": 11955 }, { "epoch": 1.8295332823259374, "grad_norm": 1.8036232157888064, "learning_rate": 3.7865797231721456e-07, "loss": 0.2635, "step": 11956 }, { "epoch": 1.8296863045141545, "grad_norm": 2.0793528801360943, "learning_rate": 3.779827740879982e-07, "loss": 0.2694, "step": 11957 }, { "epoch": 1.8298393267023718, "grad_norm": 2.3880010286275724, "learning_rate": 3.773081667845002e-07, "loss": 0.26, "step": 11958 }, { "epoch": 1.8299923488905891, "grad_norm": 1.922707375441064, "learning_rate": 3.76634150448153e-07, "loss": 0.2438, "step": 11959 }, { "epoch": 1.8301453710788065, "grad_norm": 1.9779623539916547, "learning_rate": 3.75960725120349e-07, "loss": 0.2355, "step": 11960 }, { "epoch": 1.8302983932670238, "grad_norm": 2.116293706736384, "learning_rate": 3.752878908424462e-07, "loss": 0.2952, "step": 11961 }, { "epoch": 1.830451415455241, "grad_norm": 2.0257737983103787, "learning_rate": 3.7461564765576607e-07, "loss": 0.2281, "step": 11962 }, { "epoch": 1.8306044376434583, "grad_norm": 1.8736255320167776, "learning_rate": 3.7394399560159336e-07, "loss": 0.2413, "step": 11963 }, { "epoch": 1.8307574598316756, "grad_norm": 2.3071687308411466, "learning_rate": 3.732729347211772e-07, "loss": 0.2546, "step": 11964 }, { "epoch": 1.830910482019893, "grad_norm": 1.873675215731886, "learning_rate": 3.726024650557303e-07, "loss": 0.2861, "step": 11965 }, { "epoch": 1.8310635042081103, "grad_norm": 2.7959098918112555, "learning_rate": 3.7193258664642964e-07, "loss": 0.276, "step": 11966 }, { "epoch": 1.8312165263963274, "grad_norm": 2.3147884301052275, "learning_rate": 3.712632995344123e-07, "loss": 0.2683, "step": 11967 }, { "epoch": 1.8313695485845447, "grad_norm": 2.1285332905357786, "learning_rate": 3.7059460376078547e-07, "loss": 0.2911, "step": 11968 }, { "epoch": 1.831522570772762, "grad_norm": 1.9785370093599328, "learning_rate": 3.6992649936661294e-07, "loss": 0.2751, "step": 11969 }, { "epoch": 1.8316755929609794, "grad_norm": 1.9944491857282887, "learning_rate": 3.6925898639292634e-07, "loss": 0.2614, "step": 11970 }, { "epoch": 1.8318286151491967, "grad_norm": 2.1796578318532, "learning_rate": 3.6859206488072396e-07, "loss": 0.3357, "step": 11971 }, { "epoch": 1.8319816373374138, "grad_norm": 2.266227460375382, "learning_rate": 3.6792573487095975e-07, "loss": 0.2783, "step": 11972 }, { "epoch": 1.8321346595256314, "grad_norm": 2.113008303916984, "learning_rate": 3.6725999640455533e-07, "loss": 0.3254, "step": 11973 }, { "epoch": 1.8322876817138485, "grad_norm": 1.8368245644367691, "learning_rate": 3.6659484952240033e-07, "loss": 0.2533, "step": 11974 }, { "epoch": 1.8324407039020658, "grad_norm": 1.9882890052049291, "learning_rate": 3.6593029426534086e-07, "loss": 0.2611, "step": 11975 }, { "epoch": 1.8325937260902831, "grad_norm": 1.8442393407352757, "learning_rate": 3.652663306741899e-07, "loss": 0.2888, "step": 11976 }, { "epoch": 1.8327467482785003, "grad_norm": 2.227356989444283, "learning_rate": 3.6460295878972704e-07, "loss": 0.3018, "step": 11977 }, { "epoch": 1.8328997704667178, "grad_norm": 2.6299960205316566, "learning_rate": 3.639401786526875e-07, "loss": 0.2764, "step": 11978 }, { "epoch": 1.833052792654935, "grad_norm": 2.3886068477380373, "learning_rate": 3.6327799030377976e-07, "loss": 0.282, "step": 11979 }, { "epoch": 1.8332058148431523, "grad_norm": 2.3571824622687125, "learning_rate": 3.6261639378367133e-07, "loss": 0.2807, "step": 11980 }, { "epoch": 1.8333588370313696, "grad_norm": 2.2983280032511173, "learning_rate": 3.6195538913298856e-07, "loss": 0.2866, "step": 11981 }, { "epoch": 1.8335118592195867, "grad_norm": 2.280510188770605, "learning_rate": 3.6129497639233123e-07, "loss": 0.2887, "step": 11982 }, { "epoch": 1.8336648814078043, "grad_norm": 2.2769950664737117, "learning_rate": 3.606351556022558e-07, "loss": 0.3011, "step": 11983 }, { "epoch": 1.8338179035960214, "grad_norm": 2.183026594822403, "learning_rate": 3.599759268032854e-07, "loss": 0.2637, "step": 11984 }, { "epoch": 1.8339709257842387, "grad_norm": 2.033810868967627, "learning_rate": 3.5931729003590544e-07, "loss": 0.2677, "step": 11985 }, { "epoch": 1.834123947972456, "grad_norm": 2.0557534905143453, "learning_rate": 3.586592453405646e-07, "loss": 0.2727, "step": 11986 }, { "epoch": 1.8342769701606731, "grad_norm": 2.005984349800885, "learning_rate": 3.580017927576773e-07, "loss": 0.3202, "step": 11987 }, { "epoch": 1.8344299923488907, "grad_norm": 2.070018685158, "learning_rate": 3.57344932327619e-07, "loss": 0.2542, "step": 11988 }, { "epoch": 1.8345830145371078, "grad_norm": 2.3376594691999077, "learning_rate": 3.5668866409073075e-07, "loss": 0.2996, "step": 11989 }, { "epoch": 1.8347360367253251, "grad_norm": 2.441617675543389, "learning_rate": 3.56032988087317e-07, "loss": 0.3233, "step": 11990 }, { "epoch": 1.8348890589135425, "grad_norm": 2.4082276906172493, "learning_rate": 3.5537790435764443e-07, "loss": 0.3318, "step": 11991 }, { "epoch": 1.8350420811017596, "grad_norm": 2.585091922064814, "learning_rate": 3.5472341294194413e-07, "loss": 0.2845, "step": 11992 }, { "epoch": 1.8351951032899771, "grad_norm": 2.2536038309694173, "learning_rate": 3.540695138804129e-07, "loss": 0.303, "step": 11993 }, { "epoch": 1.8353481254781943, "grad_norm": 2.255993235887367, "learning_rate": 3.5341620721320746e-07, "loss": 0.2994, "step": 11994 }, { "epoch": 1.8355011476664116, "grad_norm": 1.9197326776399797, "learning_rate": 3.527634929804502e-07, "loss": 0.2682, "step": 11995 }, { "epoch": 1.835654169854629, "grad_norm": 2.0008368864449113, "learning_rate": 3.521113712222268e-07, "loss": 0.2804, "step": 11996 }, { "epoch": 1.8358071920428463, "grad_norm": 1.7878197925898167, "learning_rate": 3.514598419785875e-07, "loss": 0.2248, "step": 11997 }, { "epoch": 1.8359602142310636, "grad_norm": 2.0309007271436763, "learning_rate": 3.508089052895436e-07, "loss": 0.2969, "step": 11998 }, { "epoch": 1.8361132364192807, "grad_norm": 1.9657444590479858, "learning_rate": 3.50158561195072e-07, "loss": 0.2682, "step": 11999 }, { "epoch": 1.836266258607498, "grad_norm": 2.395003585976222, "learning_rate": 3.4950880973511626e-07, "loss": 0.3577, "step": 12000 }, { "epoch": 1.8364192807957154, "grad_norm": 2.4444206059912723, "learning_rate": 3.488596509495756e-07, "loss": 0.5389, "step": 12001 }, { "epoch": 1.8365723029839327, "grad_norm": 2.045241268445169, "learning_rate": 3.4821108487831936e-07, "loss": 0.2474, "step": 12002 }, { "epoch": 1.83672532517215, "grad_norm": 2.0052203274872338, "learning_rate": 3.4756311156117995e-07, "loss": 0.2466, "step": 12003 }, { "epoch": 1.8368783473603671, "grad_norm": 2.0755297021165817, "learning_rate": 3.4691573103794894e-07, "loss": 0.2683, "step": 12004 }, { "epoch": 1.8370313695485847, "grad_norm": 2.2371449837594946, "learning_rate": 3.4626894334838457e-07, "loss": 0.3022, "step": 12005 }, { "epoch": 1.8371843917368018, "grad_norm": 2.4836021577730865, "learning_rate": 3.456227485322128e-07, "loss": 0.3733, "step": 12006 }, { "epoch": 1.8373374139250191, "grad_norm": 2.1804822352614504, "learning_rate": 3.4497714662911406e-07, "loss": 0.2799, "step": 12007 }, { "epoch": 1.8374904361132365, "grad_norm": 2.1739341705368256, "learning_rate": 3.443321376787401e-07, "loss": 0.2481, "step": 12008 }, { "epoch": 1.8376434583014536, "grad_norm": 1.9035207841071022, "learning_rate": 3.436877217207046e-07, "loss": 0.2046, "step": 12009 }, { "epoch": 1.8377964804896711, "grad_norm": 2.5314565993420812, "learning_rate": 3.4304389879457835e-07, "loss": 0.2207, "step": 12010 }, { "epoch": 1.8379495026778883, "grad_norm": 2.4787411745654127, "learning_rate": 3.424006689399073e-07, "loss": 0.3614, "step": 12011 }, { "epoch": 1.8381025248661056, "grad_norm": 1.8064961409465559, "learning_rate": 3.4175803219619217e-07, "loss": 0.24, "step": 12012 }, { "epoch": 1.838255547054323, "grad_norm": 2.2357013035541717, "learning_rate": 3.4111598860289696e-07, "loss": 0.2798, "step": 12013 }, { "epoch": 1.83840856924254, "grad_norm": 2.1526836237095313, "learning_rate": 3.4047453819945564e-07, "loss": 0.2627, "step": 12014 }, { "epoch": 1.8385615914307576, "grad_norm": 2.2397589758269048, "learning_rate": 3.398336810252623e-07, "loss": 0.3025, "step": 12015 }, { "epoch": 1.8387146136189747, "grad_norm": 2.284066487090659, "learning_rate": 3.39193417119672e-07, "loss": 0.3526, "step": 12016 }, { "epoch": 1.838867635807192, "grad_norm": 2.1539273397928618, "learning_rate": 3.385537465220079e-07, "loss": 0.3264, "step": 12017 }, { "epoch": 1.8390206579954094, "grad_norm": 2.0446058854531945, "learning_rate": 3.3791466927155403e-07, "loss": 0.2517, "step": 12018 }, { "epoch": 1.8391736801836265, "grad_norm": 1.8921978306564606, "learning_rate": 3.37276185407559e-07, "loss": 0.2678, "step": 12019 }, { "epoch": 1.839326702371844, "grad_norm": 1.9626988958729294, "learning_rate": 3.366382949692326e-07, "loss": 0.2971, "step": 12020 }, { "epoch": 1.8394797245600611, "grad_norm": 2.0565364790390217, "learning_rate": 3.360009979957546e-07, "loss": 0.2869, "step": 12021 }, { "epoch": 1.8396327467482785, "grad_norm": 2.1341795864028397, "learning_rate": 3.353642945262592e-07, "loss": 0.2688, "step": 12022 }, { "epoch": 1.8397857689364958, "grad_norm": 2.4168517975060135, "learning_rate": 3.347281845998496e-07, "loss": 0.3241, "step": 12023 }, { "epoch": 1.839938791124713, "grad_norm": 2.3182262108339344, "learning_rate": 3.340926682555956e-07, "loss": 0.2966, "step": 12024 }, { "epoch": 1.8400918133129305, "grad_norm": 2.116133961225955, "learning_rate": 3.3345774553252273e-07, "loss": 0.3218, "step": 12025 }, { "epoch": 1.8402448355011476, "grad_norm": 2.1324931497746125, "learning_rate": 3.328234164696242e-07, "loss": 0.3341, "step": 12026 }, { "epoch": 1.840397857689365, "grad_norm": 1.9944972529438794, "learning_rate": 3.3218968110586114e-07, "loss": 0.2519, "step": 12027 }, { "epoch": 1.8405508798775823, "grad_norm": 1.9909357917518529, "learning_rate": 3.3155653948014674e-07, "loss": 0.2383, "step": 12028 }, { "epoch": 1.8407039020657996, "grad_norm": 2.515119551859431, "learning_rate": 3.3092399163137113e-07, "loss": 0.3151, "step": 12029 }, { "epoch": 1.840856924254017, "grad_norm": 2.3275887638623285, "learning_rate": 3.302920375983787e-07, "loss": 0.2848, "step": 12030 }, { "epoch": 1.841009946442234, "grad_norm": 1.9495815422530196, "learning_rate": 3.2966067741997844e-07, "loss": 0.2069, "step": 12031 }, { "epoch": 1.8411629686304514, "grad_norm": 1.9592391943120646, "learning_rate": 3.290299111349471e-07, "loss": 0.2031, "step": 12032 }, { "epoch": 1.8413159908186687, "grad_norm": 2.4296933720566023, "learning_rate": 3.283997387820248e-07, "loss": 0.3, "step": 12033 }, { "epoch": 1.841469013006886, "grad_norm": 2.056443705826707, "learning_rate": 3.2777016039990615e-07, "loss": 0.2354, "step": 12034 }, { "epoch": 1.8416220351951034, "grad_norm": 2.4922123003136005, "learning_rate": 3.2714117602726137e-07, "loss": 0.2721, "step": 12035 }, { "epoch": 1.8417750573833205, "grad_norm": 2.2539525516934087, "learning_rate": 3.265127857027184e-07, "loss": 0.2819, "step": 12036 }, { "epoch": 1.841928079571538, "grad_norm": 2.3067629986694316, "learning_rate": 3.2588498946486634e-07, "loss": 0.2877, "step": 12037 }, { "epoch": 1.8420811017597551, "grad_norm": 2.1901125523396123, "learning_rate": 3.2525778735226444e-07, "loss": 0.2798, "step": 12038 }, { "epoch": 1.8422341239479725, "grad_norm": 2.3560154126153017, "learning_rate": 3.2463117940342846e-07, "loss": 0.3309, "step": 12039 }, { "epoch": 1.8423871461361898, "grad_norm": 2.0695678919381315, "learning_rate": 3.2400516565684217e-07, "loss": 0.2819, "step": 12040 }, { "epoch": 1.842540168324407, "grad_norm": 2.0646589283114603, "learning_rate": 3.2337974615095134e-07, "loss": 0.247, "step": 12041 }, { "epoch": 1.8426931905126245, "grad_norm": 2.1398428401944214, "learning_rate": 3.2275492092416536e-07, "loss": 0.2873, "step": 12042 }, { "epoch": 1.8428462127008416, "grad_norm": 2.289253406530182, "learning_rate": 3.2213069001485797e-07, "loss": 0.3365, "step": 12043 }, { "epoch": 1.842999234889059, "grad_norm": 1.9642071977337394, "learning_rate": 3.21507053461364e-07, "loss": 0.2847, "step": 12044 }, { "epoch": 1.8431522570772763, "grad_norm": 2.3167926844831674, "learning_rate": 3.208840113019851e-07, "loss": 0.2929, "step": 12045 }, { "epoch": 1.8433052792654934, "grad_norm": 2.1338331581061585, "learning_rate": 3.2026156357498284e-07, "loss": 0.2996, "step": 12046 }, { "epoch": 1.843458301453711, "grad_norm": 2.1716582602915993, "learning_rate": 3.1963971031858664e-07, "loss": 0.2735, "step": 12047 }, { "epoch": 1.843611323641928, "grad_norm": 2.07267232853433, "learning_rate": 3.1901845157098486e-07, "loss": 0.2846, "step": 12048 }, { "epoch": 1.8437643458301454, "grad_norm": 2.0413863077886303, "learning_rate": 3.1839778737033256e-07, "loss": 0.2788, "step": 12049 }, { "epoch": 1.8439173680183627, "grad_norm": 2.132499243615788, "learning_rate": 3.1777771775474697e-07, "loss": 0.2613, "step": 12050 }, { "epoch": 1.8440703902065798, "grad_norm": 2.012435426584495, "learning_rate": 3.171582427623077e-07, "loss": 0.2077, "step": 12051 }, { "epoch": 1.8442234123947974, "grad_norm": 2.3338924545977537, "learning_rate": 3.1653936243105974e-07, "loss": 0.2969, "step": 12052 }, { "epoch": 1.8443764345830145, "grad_norm": 2.439473361933731, "learning_rate": 3.1592107679901393e-07, "loss": 0.3477, "step": 12053 }, { "epoch": 1.8445294567712318, "grad_norm": 1.9503200571517338, "learning_rate": 3.1530338590413877e-07, "loss": 0.247, "step": 12054 }, { "epoch": 1.8446824789594491, "grad_norm": 1.9160994751283733, "learning_rate": 3.1468628978436723e-07, "loss": 0.2587, "step": 12055 }, { "epoch": 1.8448355011476663, "grad_norm": 2.2006778123960355, "learning_rate": 3.1406978847760226e-07, "loss": 0.2572, "step": 12056 }, { "epoch": 1.8449885233358838, "grad_norm": 2.298106954995458, "learning_rate": 3.1345388202170034e-07, "loss": 0.339, "step": 12057 }, { "epoch": 1.845141545524101, "grad_norm": 2.2870522685072068, "learning_rate": 3.128385704544912e-07, "loss": 0.2774, "step": 12058 }, { "epoch": 1.8452945677123183, "grad_norm": 2.0815855675565844, "learning_rate": 3.122238538137634e-07, "loss": 0.2914, "step": 12059 }, { "epoch": 1.8454475899005356, "grad_norm": 2.0949484272761567, "learning_rate": 3.1160973213726466e-07, "loss": 0.2838, "step": 12060 }, { "epoch": 1.845600612088753, "grad_norm": 2.1367734350362744, "learning_rate": 3.109962054627147e-07, "loss": 0.2684, "step": 12061 }, { "epoch": 1.8457536342769703, "grad_norm": 1.9203254480433196, "learning_rate": 3.103832738277923e-07, "loss": 0.2774, "step": 12062 }, { "epoch": 1.8459066564651874, "grad_norm": 2.2174394172339484, "learning_rate": 3.097709372701374e-07, "loss": 0.3001, "step": 12063 }, { "epoch": 1.8460596786534047, "grad_norm": 2.07364457958336, "learning_rate": 3.0915919582735763e-07, "loss": 0.2439, "step": 12064 }, { "epoch": 1.846212700841622, "grad_norm": 2.4434845521963062, "learning_rate": 3.085480495370241e-07, "loss": 0.2569, "step": 12065 }, { "epoch": 1.8463657230298394, "grad_norm": 2.155863352925435, "learning_rate": 3.079374984366668e-07, "loss": 0.3176, "step": 12066 }, { "epoch": 1.8465187452180567, "grad_norm": 1.9860984079286215, "learning_rate": 3.073275425637834e-07, "loss": 0.2281, "step": 12067 }, { "epoch": 1.8466717674062738, "grad_norm": 2.4004615498420803, "learning_rate": 3.06718181955834e-07, "loss": 0.2703, "step": 12068 }, { "epoch": 1.8468247895944914, "grad_norm": 2.2840856637864255, "learning_rate": 3.0610941665024097e-07, "loss": 0.272, "step": 12069 }, { "epoch": 1.8469778117827085, "grad_norm": 2.089748940200058, "learning_rate": 3.0550124668439097e-07, "loss": 0.2438, "step": 12070 }, { "epoch": 1.8471308339709258, "grad_norm": 2.219418314806367, "learning_rate": 3.0489367209563527e-07, "loss": 0.2898, "step": 12071 }, { "epoch": 1.8472838561591431, "grad_norm": 2.060183624027669, "learning_rate": 3.042866929212862e-07, "loss": 0.2405, "step": 12072 }, { "epoch": 1.8474368783473603, "grad_norm": 2.010860439464339, "learning_rate": 3.036803091986218e-07, "loss": 0.2258, "step": 12073 }, { "epoch": 1.8475899005355778, "grad_norm": 1.9942119712715967, "learning_rate": 3.030745209648811e-07, "loss": 0.3044, "step": 12074 }, { "epoch": 1.847742922723795, "grad_norm": 2.232052662763698, "learning_rate": 3.024693282572688e-07, "loss": 0.285, "step": 12075 }, { "epoch": 1.8478959449120123, "grad_norm": 2.0825006725065585, "learning_rate": 3.01864731112953e-07, "loss": 0.2628, "step": 12076 }, { "epoch": 1.8480489671002296, "grad_norm": 1.9978220340953112, "learning_rate": 3.012607295690617e-07, "loss": 0.1955, "step": 12077 }, { "epoch": 1.8482019892884467, "grad_norm": 2.0714850256582724, "learning_rate": 3.0065732366269197e-07, "loss": 0.2598, "step": 12078 }, { "epoch": 1.8483550114766643, "grad_norm": 1.9595160794383788, "learning_rate": 3.0005451343089964e-07, "loss": 0.213, "step": 12079 }, { "epoch": 1.8485080336648814, "grad_norm": 2.179653007063922, "learning_rate": 2.9945229891070624e-07, "loss": 0.2505, "step": 12080 }, { "epoch": 1.8486610558530987, "grad_norm": 2.0967176533438967, "learning_rate": 2.9885068013909444e-07, "loss": 0.2826, "step": 12081 }, { "epoch": 1.848814078041316, "grad_norm": 1.8612577442029008, "learning_rate": 2.982496571530158e-07, "loss": 0.2543, "step": 12082 }, { "epoch": 1.8489671002295331, "grad_norm": 2.3138064490032746, "learning_rate": 2.976492299893774e-07, "loss": 0.2842, "step": 12083 }, { "epoch": 1.8491201224177507, "grad_norm": 2.426532374935647, "learning_rate": 2.9704939868505533e-07, "loss": 0.3227, "step": 12084 }, { "epoch": 1.8492731446059678, "grad_norm": 1.9737808547398248, "learning_rate": 2.9645016327689014e-07, "loss": 0.3011, "step": 12085 }, { "epoch": 1.8494261667941851, "grad_norm": 1.9982381311792963, "learning_rate": 2.9585152380167906e-07, "loss": 0.2686, "step": 12086 }, { "epoch": 1.8495791889824025, "grad_norm": 1.985073678732461, "learning_rate": 2.952534802961882e-07, "loss": 0.2569, "step": 12087 }, { "epoch": 1.8497322111706196, "grad_norm": 1.9793562344285673, "learning_rate": 2.946560327971493e-07, "loss": 0.2316, "step": 12088 }, { "epoch": 1.8498852333588371, "grad_norm": 2.424299187829567, "learning_rate": 2.9405918134124747e-07, "loss": 0.2653, "step": 12089 }, { "epoch": 1.8500382555470543, "grad_norm": 2.119611836762099, "learning_rate": 2.9346292596514227e-07, "loss": 0.2722, "step": 12090 }, { "epoch": 1.8501912777352716, "grad_norm": 2.386035985833903, "learning_rate": 2.928672667054533e-07, "loss": 0.3155, "step": 12091 }, { "epoch": 1.850344299923489, "grad_norm": 1.9398662606503192, "learning_rate": 2.9227220359875685e-07, "loss": 0.279, "step": 12092 }, { "epoch": 1.850497322111706, "grad_norm": 2.569255627334306, "learning_rate": 2.916777366816026e-07, "loss": 0.2903, "step": 12093 }, { "epoch": 1.8506503442999236, "grad_norm": 2.268604135723616, "learning_rate": 2.9108386599049685e-07, "loss": 0.2867, "step": 12094 }, { "epoch": 1.8508033664881407, "grad_norm": 2.3828483785992374, "learning_rate": 2.9049059156191385e-07, "loss": 0.3234, "step": 12095 }, { "epoch": 1.850956388676358, "grad_norm": 1.988877704957895, "learning_rate": 2.8989791343228657e-07, "loss": 0.2306, "step": 12096 }, { "epoch": 1.8511094108645754, "grad_norm": 2.2265877027141383, "learning_rate": 2.8930583163801487e-07, "loss": 0.2704, "step": 12097 }, { "epoch": 1.8512624330527927, "grad_norm": 2.0913282906056505, "learning_rate": 2.887143462154596e-07, "loss": 0.271, "step": 12098 }, { "epoch": 1.85141545524101, "grad_norm": 2.172619994500396, "learning_rate": 2.8812345720094836e-07, "loss": 0.3294, "step": 12099 }, { "epoch": 1.8515684774292271, "grad_norm": 2.136696908463675, "learning_rate": 2.875331646307677e-07, "loss": 0.2473, "step": 12100 }, { "epoch": 1.8517214996174445, "grad_norm": 2.074684483541853, "learning_rate": 2.869434685411709e-07, "loss": 0.2527, "step": 12101 }, { "epoch": 1.8518745218056618, "grad_norm": 2.133340003665217, "learning_rate": 2.863543689683734e-07, "loss": 0.2935, "step": 12102 }, { "epoch": 1.8520275439938791, "grad_norm": 1.951720876586633, "learning_rate": 2.8576586594855517e-07, "loss": 0.2645, "step": 12103 }, { "epoch": 1.8521805661820965, "grad_norm": 2.147938107651279, "learning_rate": 2.851779595178561e-07, "loss": 0.2582, "step": 12104 }, { "epoch": 1.8523335883703136, "grad_norm": 2.226493399132335, "learning_rate": 2.8459064971238293e-07, "loss": 0.2827, "step": 12105 }, { "epoch": 1.8524866105585311, "grad_norm": 2.2685272163662655, "learning_rate": 2.840039365682068e-07, "loss": 0.3398, "step": 12106 }, { "epoch": 1.8526396327467483, "grad_norm": 2.313428395457457, "learning_rate": 2.8341782012135665e-07, "loss": 0.2826, "step": 12107 }, { "epoch": 1.8527926549349656, "grad_norm": 2.3340885234134943, "learning_rate": 2.828323004078293e-07, "loss": 0.2838, "step": 12108 }, { "epoch": 1.852945677123183, "grad_norm": 1.784707937660604, "learning_rate": 2.822473774635859e-07, "loss": 0.2615, "step": 12109 }, { "epoch": 1.8530986993114, "grad_norm": 1.9472104360411793, "learning_rate": 2.8166305132454443e-07, "loss": 0.2362, "step": 12110 }, { "epoch": 1.8532517214996176, "grad_norm": 1.9800530360384525, "learning_rate": 2.8107932202659504e-07, "loss": 0.298, "step": 12111 }, { "epoch": 1.8534047436878347, "grad_norm": 2.2978407370753695, "learning_rate": 2.804961896055858e-07, "loss": 0.325, "step": 12112 }, { "epoch": 1.853557765876052, "grad_norm": 2.141324858232483, "learning_rate": 2.7991365409732683e-07, "loss": 0.2936, "step": 12113 }, { "epoch": 1.8537107880642694, "grad_norm": 2.481993377344709, "learning_rate": 2.7933171553759633e-07, "loss": 0.3219, "step": 12114 }, { "epoch": 1.8538638102524865, "grad_norm": 1.9074014250506492, "learning_rate": 2.7875037396213335e-07, "loss": 0.2674, "step": 12115 }, { "epoch": 1.854016832440704, "grad_norm": 2.168713968336492, "learning_rate": 2.7816962940663827e-07, "loss": 0.2711, "step": 12116 }, { "epoch": 1.8541698546289211, "grad_norm": 2.495180968740349, "learning_rate": 2.7758948190677927e-07, "loss": 0.3058, "step": 12117 }, { "epoch": 1.8543228768171385, "grad_norm": 2.281648475117201, "learning_rate": 2.7700993149818336e-07, "loss": 0.2417, "step": 12118 }, { "epoch": 1.8544758990053558, "grad_norm": 2.151961070143828, "learning_rate": 2.764309782164454e-07, "loss": 0.2624, "step": 12119 }, { "epoch": 1.854628921193573, "grad_norm": 1.9512571981380695, "learning_rate": 2.7585262209711916e-07, "loss": 0.2198, "step": 12120 }, { "epoch": 1.8547819433817905, "grad_norm": 2.072196470323757, "learning_rate": 2.7527486317572515e-07, "loss": 0.281, "step": 12121 }, { "epoch": 1.8549349655700076, "grad_norm": 2.157528977725661, "learning_rate": 2.746977014877439e-07, "loss": 0.3241, "step": 12122 }, { "epoch": 1.855087987758225, "grad_norm": 2.1224216420455, "learning_rate": 2.7412113706862366e-07, "loss": 0.2503, "step": 12123 }, { "epoch": 1.8552410099464423, "grad_norm": 1.9186758271968076, "learning_rate": 2.7354516995377165e-07, "loss": 0.228, "step": 12124 }, { "epoch": 1.8553940321346594, "grad_norm": 2.2800359932130942, "learning_rate": 2.729698001785608e-07, "loss": 0.2943, "step": 12125 }, { "epoch": 1.855547054322877, "grad_norm": 2.138629020001359, "learning_rate": 2.723950277783272e-07, "loss": 0.2682, "step": 12126 }, { "epoch": 1.855700076511094, "grad_norm": 2.059609498153823, "learning_rate": 2.718208527883692e-07, "loss": 0.2549, "step": 12127 }, { "epoch": 1.8558530986993114, "grad_norm": 2.0336964526176806, "learning_rate": 2.712472752439499e-07, "loss": 0.2152, "step": 12128 }, { "epoch": 1.8560061208875287, "grad_norm": 2.575196197121381, "learning_rate": 2.706742951802943e-07, "loss": 0.2578, "step": 12129 }, { "epoch": 1.856159143075746, "grad_norm": 2.0950964294914676, "learning_rate": 2.701019126325921e-07, "loss": 0.2927, "step": 12130 }, { "epoch": 1.8563121652639634, "grad_norm": 1.9792281674828598, "learning_rate": 2.6953012763599293e-07, "loss": 0.2385, "step": 12131 }, { "epoch": 1.8564651874521805, "grad_norm": 2.45643230103461, "learning_rate": 2.689589402256165e-07, "loss": 0.3375, "step": 12132 }, { "epoch": 1.8566182096403978, "grad_norm": 2.1003291220472655, "learning_rate": 2.683883504365392e-07, "loss": 0.2646, "step": 12133 }, { "epoch": 1.8567712318286151, "grad_norm": 2.4439436285141083, "learning_rate": 2.67818358303803e-07, "loss": 0.3014, "step": 12134 }, { "epoch": 1.8569242540168325, "grad_norm": 2.0784054999384582, "learning_rate": 2.672489638624154e-07, "loss": 0.2691, "step": 12135 }, { "epoch": 1.8570772762050498, "grad_norm": 2.3457672844778803, "learning_rate": 2.666801671473429e-07, "loss": 0.2712, "step": 12136 }, { "epoch": 1.857230298393267, "grad_norm": 2.2535684260502205, "learning_rate": 2.6611196819351647e-07, "loss": 0.2782, "step": 12137 }, { "epoch": 1.8573833205814845, "grad_norm": 2.4045532560845486, "learning_rate": 2.6554436703583595e-07, "loss": 0.2972, "step": 12138 }, { "epoch": 1.8575363427697016, "grad_norm": 2.6298059779139744, "learning_rate": 2.6497736370915573e-07, "loss": 0.2809, "step": 12139 }, { "epoch": 1.857689364957919, "grad_norm": 2.332091100831672, "learning_rate": 2.64410958248299e-07, "loss": 0.2753, "step": 12140 }, { "epoch": 1.8578423871461363, "grad_norm": 1.7920366458419108, "learning_rate": 2.638451506880524e-07, "loss": 0.2298, "step": 12141 }, { "epoch": 1.8579954093343534, "grad_norm": 1.9857680513176472, "learning_rate": 2.6327994106316146e-07, "loss": 0.2223, "step": 12142 }, { "epoch": 1.858148431522571, "grad_norm": 2.1848014681855763, "learning_rate": 2.627153294083418e-07, "loss": 0.2969, "step": 12143 }, { "epoch": 1.858301453710788, "grad_norm": 1.9778201877077595, "learning_rate": 2.621513157582656e-07, "loss": 0.2965, "step": 12144 }, { "epoch": 1.8584544758990054, "grad_norm": 2.229623695748292, "learning_rate": 2.615879001475707e-07, "loss": 0.2964, "step": 12145 }, { "epoch": 1.8586074980872227, "grad_norm": 2.183425999238914, "learning_rate": 2.6102508261085957e-07, "loss": 0.3343, "step": 12146 }, { "epoch": 1.8587605202754398, "grad_norm": 2.206930477766042, "learning_rate": 2.6046286318269775e-07, "loss": 0.2877, "step": 12147 }, { "epoch": 1.8589135424636574, "grad_norm": 3.0938588466275148, "learning_rate": 2.5990124189761325e-07, "loss": 0.3017, "step": 12148 }, { "epoch": 1.8590665646518745, "grad_norm": 2.059332216618004, "learning_rate": 2.593402187900973e-07, "loss": 0.2688, "step": 12149 }, { "epoch": 1.8592195868400918, "grad_norm": 2.1161232443504594, "learning_rate": 2.587797938946035e-07, "loss": 0.3114, "step": 12150 }, { "epoch": 1.8593726090283091, "grad_norm": 2.0546549960609592, "learning_rate": 2.5821996724554985e-07, "loss": 0.2921, "step": 12151 }, { "epoch": 1.8595256312165263, "grad_norm": 2.1037161550199492, "learning_rate": 2.576607388773189e-07, "loss": 0.2607, "step": 12152 }, { "epoch": 1.8596786534047438, "grad_norm": 2.0870667813048684, "learning_rate": 2.5710210882425424e-07, "loss": 0.2594, "step": 12153 }, { "epoch": 1.859831675592961, "grad_norm": 2.2012957148422396, "learning_rate": 2.5654407712066287e-07, "loss": 0.2806, "step": 12154 }, { "epoch": 1.8599846977811783, "grad_norm": 2.1918892229646465, "learning_rate": 2.559866438008174e-07, "loss": 0.3092, "step": 12155 }, { "epoch": 1.8601377199693956, "grad_norm": 2.2243548563257796, "learning_rate": 2.554298088989504e-07, "loss": 0.3184, "step": 12156 }, { "epoch": 1.8602907421576127, "grad_norm": 2.3448776203557977, "learning_rate": 2.54873572449259e-07, "loss": 0.3585, "step": 12157 }, { "epoch": 1.8604437643458303, "grad_norm": 2.063362643976679, "learning_rate": 2.543179344859059e-07, "loss": 0.2759, "step": 12158 }, { "epoch": 1.8605967865340474, "grad_norm": 2.336058681945544, "learning_rate": 2.5376289504301375e-07, "loss": 0.3135, "step": 12159 }, { "epoch": 1.8607498087222647, "grad_norm": 2.312197431308048, "learning_rate": 2.5320845415466756e-07, "loss": 0.2921, "step": 12160 }, { "epoch": 1.860902830910482, "grad_norm": 2.0693817907130034, "learning_rate": 2.5265461185492224e-07, "loss": 0.2645, "step": 12161 }, { "epoch": 1.8610558530986994, "grad_norm": 2.0482766219818296, "learning_rate": 2.5210136817778843e-07, "loss": 0.2503, "step": 12162 }, { "epoch": 1.8612088752869167, "grad_norm": 2.391935771524626, "learning_rate": 2.5154872315724333e-07, "loss": 0.3501, "step": 12163 }, { "epoch": 1.8613618974751338, "grad_norm": 2.236266464810737, "learning_rate": 2.509966768272276e-07, "loss": 0.2932, "step": 12164 }, { "epoch": 1.8615149196633511, "grad_norm": 2.310119778373762, "learning_rate": 2.5044522922164414e-07, "loss": 0.3187, "step": 12165 }, { "epoch": 1.8616679418515685, "grad_norm": 2.126907185113331, "learning_rate": 2.498943803743592e-07, "loss": 0.2129, "step": 12166 }, { "epoch": 1.8618209640397858, "grad_norm": 2.048088304206397, "learning_rate": 2.4934413031920456e-07, "loss": 0.2373, "step": 12167 }, { "epoch": 1.8619739862280031, "grad_norm": 1.9929259125015373, "learning_rate": 2.487944790899688e-07, "loss": 0.2448, "step": 12168 }, { "epoch": 1.8621270084162203, "grad_norm": 1.934843136968899, "learning_rate": 2.4824542672041263e-07, "loss": 0.25, "step": 12169 }, { "epoch": 1.8622800306044378, "grad_norm": 1.8995690929106952, "learning_rate": 2.4769697324425577e-07, "loss": 0.2292, "step": 12170 }, { "epoch": 1.862433052792655, "grad_norm": 2.1530208013880423, "learning_rate": 2.471491186951758e-07, "loss": 0.2673, "step": 12171 }, { "epoch": 1.8625860749808723, "grad_norm": 1.8712526951486876, "learning_rate": 2.4660186310682343e-07, "loss": 0.2554, "step": 12172 }, { "epoch": 1.8627390971690896, "grad_norm": 2.2666491985656885, "learning_rate": 2.4605520651280634e-07, "loss": 0.314, "step": 12173 }, { "epoch": 1.8628921193573067, "grad_norm": 1.7644271265516238, "learning_rate": 2.4550914894669544e-07, "loss": 0.2522, "step": 12174 }, { "epoch": 1.8630451415455243, "grad_norm": 2.1397863046954284, "learning_rate": 2.4496369044202826e-07, "loss": 0.3065, "step": 12175 }, { "epoch": 1.8631981637337414, "grad_norm": 2.2356898789015585, "learning_rate": 2.4441883103230256e-07, "loss": 0.2685, "step": 12176 }, { "epoch": 1.8633511859219587, "grad_norm": 2.227907810018491, "learning_rate": 2.438745707509815e-07, "loss": 0.3018, "step": 12177 }, { "epoch": 1.863504208110176, "grad_norm": 2.307269133388442, "learning_rate": 2.4333090963148843e-07, "loss": 0.3108, "step": 12178 }, { "epoch": 1.8636572302983931, "grad_norm": 2.2926041689812564, "learning_rate": 2.427878477072121e-07, "loss": 0.2948, "step": 12179 }, { "epoch": 1.8638102524866107, "grad_norm": 2.2200065269776497, "learning_rate": 2.4224538501150475e-07, "loss": 0.2883, "step": 12180 }, { "epoch": 1.8639632746748278, "grad_norm": 2.0355521766689733, "learning_rate": 2.417035215776808e-07, "loss": 0.2665, "step": 12181 }, { "epoch": 1.8641162968630451, "grad_norm": 2.0094946627575867, "learning_rate": 2.4116225743901935e-07, "loss": 0.2702, "step": 12182 }, { "epoch": 1.8642693190512625, "grad_norm": 2.172054629406562, "learning_rate": 2.4062159262875917e-07, "loss": 0.3103, "step": 12183 }, { "epoch": 1.8644223412394796, "grad_norm": 2.2125272166267274, "learning_rate": 2.4008152718010493e-07, "loss": 0.2704, "step": 12184 }, { "epoch": 1.8645753634276971, "grad_norm": 1.9717482284027534, "learning_rate": 2.395420611262278e-07, "loss": 0.2178, "step": 12185 }, { "epoch": 1.8647283856159143, "grad_norm": 2.256922690173856, "learning_rate": 2.3900319450025464e-07, "loss": 0.2931, "step": 12186 }, { "epoch": 1.8648814078041316, "grad_norm": 2.739839348840143, "learning_rate": 2.3846492733527905e-07, "loss": 0.2836, "step": 12187 }, { "epoch": 1.865034429992349, "grad_norm": 1.700510993453522, "learning_rate": 2.3792725966436224e-07, "loss": 0.2278, "step": 12188 }, { "epoch": 1.865187452180566, "grad_norm": 1.9545979397503868, "learning_rate": 2.3739019152052013e-07, "loss": 0.2552, "step": 12189 }, { "epoch": 1.8653404743687836, "grad_norm": 2.195267570630192, "learning_rate": 2.3685372293673737e-07, "loss": 0.2948, "step": 12190 }, { "epoch": 1.8654934965570007, "grad_norm": 1.8382213012672446, "learning_rate": 2.3631785394596317e-07, "loss": 0.2203, "step": 12191 }, { "epoch": 1.865646518745218, "grad_norm": 1.8451009051414737, "learning_rate": 2.3578258458110347e-07, "loss": 0.2316, "step": 12192 }, { "epoch": 1.8657995409334354, "grad_norm": 2.3210095540011624, "learning_rate": 2.3524791487503418e-07, "loss": 0.2854, "step": 12193 }, { "epoch": 1.8659525631216525, "grad_norm": 2.146736780352666, "learning_rate": 2.3471384486059123e-07, "loss": 0.2932, "step": 12194 }, { "epoch": 1.86610558530987, "grad_norm": 1.9464980080620138, "learning_rate": 2.3418037457057063e-07, "loss": 0.278, "step": 12195 }, { "epoch": 1.8662586074980871, "grad_norm": 2.066963030143947, "learning_rate": 2.336475040377384e-07, "loss": 0.2585, "step": 12196 }, { "epoch": 1.8664116296863045, "grad_norm": 2.311400102076601, "learning_rate": 2.3311523329481943e-07, "loss": 0.2815, "step": 12197 }, { "epoch": 1.8665646518745218, "grad_norm": 2.067821743921238, "learning_rate": 2.3258356237450208e-07, "loss": 0.2692, "step": 12198 }, { "epoch": 1.8667176740627391, "grad_norm": 3.1778578650717884, "learning_rate": 2.3205249130943908e-07, "loss": 0.3116, "step": 12199 }, { "epoch": 1.8668706962509565, "grad_norm": 2.2301021426943506, "learning_rate": 2.3152202013224434e-07, "loss": 0.2504, "step": 12200 }, { "epoch": 1.8670237184391736, "grad_norm": 1.783192454316723, "learning_rate": 2.3099214887549626e-07, "loss": 0.1713, "step": 12201 }, { "epoch": 1.867176740627391, "grad_norm": 2.0357835170166374, "learning_rate": 2.3046287757173768e-07, "loss": 0.2838, "step": 12202 }, { "epoch": 1.8673297628156083, "grad_norm": 1.9008329193246671, "learning_rate": 2.2993420625347263e-07, "loss": 0.2363, "step": 12203 }, { "epoch": 1.8674827850038256, "grad_norm": 2.164894992849952, "learning_rate": 2.2940613495316844e-07, "loss": 0.2903, "step": 12204 }, { "epoch": 1.867635807192043, "grad_norm": 1.818748508477644, "learning_rate": 2.2887866370325696e-07, "loss": 0.1966, "step": 12205 }, { "epoch": 1.86778882938026, "grad_norm": 2.5998851775941416, "learning_rate": 2.2835179253613005e-07, "loss": 0.2729, "step": 12206 }, { "epoch": 1.8679418515684776, "grad_norm": 2.2297654340062523, "learning_rate": 2.2782552148414738e-07, "loss": 0.3441, "step": 12207 }, { "epoch": 1.8680948737566947, "grad_norm": 2.147215993679389, "learning_rate": 2.2729985057962866e-07, "loss": 0.2219, "step": 12208 }, { "epoch": 1.868247895944912, "grad_norm": 2.015334317509236, "learning_rate": 2.2677477985485697e-07, "loss": 0.2214, "step": 12209 }, { "epoch": 1.8684009181331294, "grad_norm": 2.0696416363691137, "learning_rate": 2.262503093420787e-07, "loss": 0.2825, "step": 12210 }, { "epoch": 1.8685539403213465, "grad_norm": 2.0538848905826192, "learning_rate": 2.2572643907350477e-07, "loss": 0.2514, "step": 12211 }, { "epoch": 1.868706962509564, "grad_norm": 2.0587824354921422, "learning_rate": 2.2520316908130612e-07, "loss": 0.2369, "step": 12212 }, { "epoch": 1.8688599846977811, "grad_norm": 2.2401753137094067, "learning_rate": 2.2468049939762038e-07, "loss": 0.3055, "step": 12213 }, { "epoch": 1.8690130068859985, "grad_norm": 1.9765528689673304, "learning_rate": 2.241584300545485e-07, "loss": 0.2514, "step": 12214 }, { "epoch": 1.8691660290742158, "grad_norm": 2.16610698984525, "learning_rate": 2.2363696108414822e-07, "loss": 0.2618, "step": 12215 }, { "epoch": 1.869319051262433, "grad_norm": 2.272639274412879, "learning_rate": 2.2311609251844834e-07, "loss": 0.3037, "step": 12216 }, { "epoch": 1.8694720734506505, "grad_norm": 2.2152036642185946, "learning_rate": 2.2259582438943773e-07, "loss": 0.3251, "step": 12217 }, { "epoch": 1.8696250956388676, "grad_norm": 2.0843874546884247, "learning_rate": 2.2207615672906523e-07, "loss": 0.2705, "step": 12218 }, { "epoch": 1.869778117827085, "grad_norm": 2.192065193696456, "learning_rate": 2.215570895692476e-07, "loss": 0.274, "step": 12219 }, { "epoch": 1.8699311400153023, "grad_norm": 2.252514725359488, "learning_rate": 2.2103862294186374e-07, "loss": 0.3029, "step": 12220 }, { "epoch": 1.8700841622035194, "grad_norm": 1.7304474836249897, "learning_rate": 2.2052075687875262e-07, "loss": 0.2527, "step": 12221 }, { "epoch": 1.870237184391737, "grad_norm": 2.319453397662655, "learning_rate": 2.2000349141171995e-07, "loss": 0.3471, "step": 12222 }, { "epoch": 1.870390206579954, "grad_norm": 2.096261818076652, "learning_rate": 2.194868265725325e-07, "loss": 0.2351, "step": 12223 }, { "epoch": 1.8705432287681714, "grad_norm": 1.7582270248063583, "learning_rate": 2.1897076239291936e-07, "loss": 0.3032, "step": 12224 }, { "epoch": 1.8706962509563887, "grad_norm": 2.144383125659378, "learning_rate": 2.184552989045763e-07, "loss": 0.2726, "step": 12225 }, { "epoch": 1.8708492731446058, "grad_norm": 4.532884435368886, "learning_rate": 2.1794043613916015e-07, "loss": 0.3022, "step": 12226 }, { "epoch": 1.8710022953328234, "grad_norm": 2.559387386062656, "learning_rate": 2.1742617412828682e-07, "loss": 0.3715, "step": 12227 }, { "epoch": 1.8711553175210405, "grad_norm": 2.162713822623037, "learning_rate": 2.169125129035432e-07, "loss": 0.2815, "step": 12228 }, { "epoch": 1.8713083397092578, "grad_norm": 2.2515483930461526, "learning_rate": 2.163994524964741e-07, "loss": 0.3593, "step": 12229 }, { "epoch": 1.8714613618974751, "grad_norm": 1.8777623529142353, "learning_rate": 2.158869929385876e-07, "loss": 0.21, "step": 12230 }, { "epoch": 1.8716143840856925, "grad_norm": 2.0469775737816946, "learning_rate": 2.1537513426135637e-07, "loss": 0.2926, "step": 12231 }, { "epoch": 1.8717674062739098, "grad_norm": 2.303687292526856, "learning_rate": 2.1486387649621632e-07, "loss": 0.324, "step": 12232 }, { "epoch": 1.871920428462127, "grad_norm": 2.3834650957610792, "learning_rate": 2.1435321967456567e-07, "loss": 0.3276, "step": 12233 }, { "epoch": 1.8720734506503443, "grad_norm": 2.2343221661881434, "learning_rate": 2.1384316382776493e-07, "loss": 0.3089, "step": 12234 }, { "epoch": 1.8722264728385616, "grad_norm": 1.8172686458735308, "learning_rate": 2.1333370898713902e-07, "loss": 0.2393, "step": 12235 }, { "epoch": 1.872379495026779, "grad_norm": 2.608854617407368, "learning_rate": 2.1282485518397622e-07, "loss": 0.3307, "step": 12236 }, { "epoch": 1.8725325172149963, "grad_norm": 2.50025253282384, "learning_rate": 2.1231660244952713e-07, "loss": 0.2851, "step": 12237 }, { "epoch": 1.8726855394032134, "grad_norm": 2.0332244215536095, "learning_rate": 2.1180895081500453e-07, "loss": 0.2816, "step": 12238 }, { "epoch": 1.872838561591431, "grad_norm": 1.8344196972742979, "learning_rate": 2.113019003115857e-07, "loss": 0.2178, "step": 12239 }, { "epoch": 1.872991583779648, "grad_norm": 2.1325275388576395, "learning_rate": 2.107954509704102e-07, "loss": 0.2919, "step": 12240 }, { "epoch": 1.8731446059678654, "grad_norm": 2.03238396828965, "learning_rate": 2.1028960282258204e-07, "loss": 0.312, "step": 12241 }, { "epoch": 1.8732976281560827, "grad_norm": 2.134605895045237, "learning_rate": 2.0978435589916635e-07, "loss": 0.2619, "step": 12242 }, { "epoch": 1.8734506503442998, "grad_norm": 2.0790319221111764, "learning_rate": 2.0927971023119498e-07, "loss": 0.2806, "step": 12243 }, { "epoch": 1.8736036725325174, "grad_norm": 2.1151547837451323, "learning_rate": 2.0877566584965646e-07, "loss": 0.2672, "step": 12244 }, { "epoch": 1.8737566947207345, "grad_norm": 1.8800490838528072, "learning_rate": 2.082722227855083e-07, "loss": 0.2575, "step": 12245 }, { "epoch": 1.8739097169089518, "grad_norm": 1.9156558849897791, "learning_rate": 2.0776938106966903e-07, "loss": 0.2848, "step": 12246 }, { "epoch": 1.8740627390971691, "grad_norm": 2.1518764006308797, "learning_rate": 2.0726714073301845e-07, "loss": 0.278, "step": 12247 }, { "epoch": 1.8742157612853863, "grad_norm": 2.115861447241316, "learning_rate": 2.0676550180640187e-07, "loss": 0.3185, "step": 12248 }, { "epoch": 1.8743687834736038, "grad_norm": 2.426652595668358, "learning_rate": 2.0626446432062798e-07, "loss": 0.2452, "step": 12249 }, { "epoch": 1.874521805661821, "grad_norm": 2.011114310109736, "learning_rate": 2.0576402830646548e-07, "loss": 0.2554, "step": 12250 }, { "epoch": 1.8746748278500383, "grad_norm": 2.441039647615812, "learning_rate": 2.052641937946509e-07, "loss": 0.3226, "step": 12251 }, { "epoch": 1.8748278500382556, "grad_norm": 2.2657634566140827, "learning_rate": 2.0476496081587972e-07, "loss": 0.2532, "step": 12252 }, { "epoch": 1.8749808722264727, "grad_norm": 2.2793594074333727, "learning_rate": 2.042663294008096e-07, "loss": 0.3178, "step": 12253 }, { "epoch": 1.8751338944146902, "grad_norm": 2.3583753493859168, "learning_rate": 2.0376829958006606e-07, "loss": 0.3027, "step": 12254 }, { "epoch": 1.8752869166029074, "grad_norm": 2.3976898464771317, "learning_rate": 2.0327087138423464e-07, "loss": 0.3205, "step": 12255 }, { "epoch": 1.8754399387911247, "grad_norm": 2.3833629536154044, "learning_rate": 2.027740448438631e-07, "loss": 0.2334, "step": 12256 }, { "epoch": 1.875592960979342, "grad_norm": 2.5591541641816455, "learning_rate": 2.0227781998946483e-07, "loss": 0.2747, "step": 12257 }, { "epoch": 1.8757459831675591, "grad_norm": 1.9460841714107107, "learning_rate": 2.0178219685151544e-07, "loss": 0.2954, "step": 12258 }, { "epoch": 1.8758990053557767, "grad_norm": 2.130399857557917, "learning_rate": 2.0128717546045174e-07, "loss": 0.3298, "step": 12259 }, { "epoch": 1.8760520275439938, "grad_norm": 2.054983131339847, "learning_rate": 2.007927558466749e-07, "loss": 0.2231, "step": 12260 }, { "epoch": 1.8762050497322111, "grad_norm": 1.9846915653480426, "learning_rate": 2.0029893804054956e-07, "loss": 0.2199, "step": 12261 }, { "epoch": 1.8763580719204285, "grad_norm": 2.1665049130134575, "learning_rate": 1.9980572207240367e-07, "loss": 0.3055, "step": 12262 }, { "epoch": 1.8765110941086458, "grad_norm": 2.0578045313911857, "learning_rate": 1.9931310797252635e-07, "loss": 0.2991, "step": 12263 }, { "epoch": 1.8766641162968631, "grad_norm": 1.7469439315722723, "learning_rate": 1.9882109577117337e-07, "loss": 0.2242, "step": 12264 }, { "epoch": 1.8768171384850802, "grad_norm": 2.174488307923911, "learning_rate": 1.9832968549855724e-07, "loss": 0.3399, "step": 12265 }, { "epoch": 1.8769701606732976, "grad_norm": 1.947263660138171, "learning_rate": 1.978388771848594e-07, "loss": 0.238, "step": 12266 }, { "epoch": 1.877123182861515, "grad_norm": 2.155219460753691, "learning_rate": 1.9734867086022458e-07, "loss": 0.2746, "step": 12267 }, { "epoch": 1.8772762050497322, "grad_norm": 2.042073767668411, "learning_rate": 1.9685906655475428e-07, "loss": 0.2658, "step": 12268 }, { "epoch": 1.8774292272379496, "grad_norm": 1.8416151846022213, "learning_rate": 1.9637006429851778e-07, "loss": 0.214, "step": 12269 }, { "epoch": 1.8775822494261667, "grad_norm": 2.3539053420151492, "learning_rate": 1.9588166412154997e-07, "loss": 0.3265, "step": 12270 }, { "epoch": 1.8777352716143842, "grad_norm": 2.4376515005557637, "learning_rate": 1.9539386605384125e-07, "loss": 0.2822, "step": 12271 }, { "epoch": 1.8778882938026014, "grad_norm": 1.8886652401346042, "learning_rate": 1.9490667012535215e-07, "loss": 0.2249, "step": 12272 }, { "epoch": 1.8780413159908187, "grad_norm": 2.329875267873853, "learning_rate": 1.9442007636600312e-07, "loss": 0.2711, "step": 12273 }, { "epoch": 1.878194338179036, "grad_norm": 2.189273001021051, "learning_rate": 1.9393408480567477e-07, "loss": 0.2499, "step": 12274 }, { "epoch": 1.8783473603672531, "grad_norm": 2.439968634578106, "learning_rate": 1.934486954742165e-07, "loss": 0.3041, "step": 12275 }, { "epoch": 1.8785003825554707, "grad_norm": 1.9757854141283482, "learning_rate": 1.9296390840143897e-07, "loss": 0.2711, "step": 12276 }, { "epoch": 1.8786534047436878, "grad_norm": 2.0689238149257476, "learning_rate": 1.9247972361711053e-07, "loss": 0.2598, "step": 12277 }, { "epoch": 1.8788064269319051, "grad_norm": 2.109148148749415, "learning_rate": 1.9199614115097076e-07, "loss": 0.2545, "step": 12278 }, { "epoch": 1.8789594491201225, "grad_norm": 2.4297663317572438, "learning_rate": 1.91513161032717e-07, "loss": 0.2645, "step": 12279 }, { "epoch": 1.8791124713083396, "grad_norm": 2.1144787392422755, "learning_rate": 1.9103078329201108e-07, "loss": 0.2495, "step": 12280 }, { "epoch": 1.8792654934965571, "grad_norm": 2.559522126569114, "learning_rate": 1.905490079584782e-07, "loss": 0.2995, "step": 12281 }, { "epoch": 1.8794185156847742, "grad_norm": 2.186023542033774, "learning_rate": 1.9006783506170467e-07, "loss": 0.288, "step": 12282 }, { "epoch": 1.8795715378729916, "grad_norm": 2.3879072385137405, "learning_rate": 1.8958726463124243e-07, "loss": 0.28, "step": 12283 }, { "epoch": 1.879724560061209, "grad_norm": 2.342145125582227, "learning_rate": 1.8910729669660454e-07, "loss": 0.2903, "step": 12284 }, { "epoch": 1.879877582249426, "grad_norm": 2.2675058883591985, "learning_rate": 1.8862793128726853e-07, "loss": 0.2908, "step": 12285 }, { "epoch": 1.8800306044376436, "grad_norm": 2.1204252540865367, "learning_rate": 1.8814916843267306e-07, "loss": 0.3024, "step": 12286 }, { "epoch": 1.8801836266258607, "grad_norm": 2.1179211787734027, "learning_rate": 1.876710081622224e-07, "loss": 0.2748, "step": 12287 }, { "epoch": 1.880336648814078, "grad_norm": 2.3750767038120775, "learning_rate": 1.871934505052797e-07, "loss": 0.3196, "step": 12288 }, { "epoch": 1.8804896710022954, "grad_norm": 2.156315708260614, "learning_rate": 1.86716495491176e-07, "loss": 0.2977, "step": 12289 }, { "epoch": 1.8806426931905125, "grad_norm": 2.0621837306649367, "learning_rate": 1.8624014314920114e-07, "loss": 0.3236, "step": 12290 }, { "epoch": 1.88079571537873, "grad_norm": 1.917989006367677, "learning_rate": 1.8576439350861175e-07, "loss": 0.2343, "step": 12291 }, { "epoch": 1.8809487375669471, "grad_norm": 1.9783581142507576, "learning_rate": 1.8528924659862447e-07, "loss": 0.2416, "step": 12292 }, { "epoch": 1.8811017597551645, "grad_norm": 2.0896830236966557, "learning_rate": 1.8481470244841925e-07, "loss": 0.2807, "step": 12293 }, { "epoch": 1.8812547819433818, "grad_norm": 2.09622278574327, "learning_rate": 1.843407610871406e-07, "loss": 0.2848, "step": 12294 }, { "epoch": 1.881407804131599, "grad_norm": 1.9323158480708773, "learning_rate": 1.838674225438941e-07, "loss": 0.2593, "step": 12295 }, { "epoch": 1.8815608263198165, "grad_norm": 2.2310584238434843, "learning_rate": 1.8339468684775209e-07, "loss": 0.2542, "step": 12296 }, { "epoch": 1.8817138485080336, "grad_norm": 2.0500256612721874, "learning_rate": 1.8292255402774462e-07, "loss": 0.2342, "step": 12297 }, { "epoch": 1.881866870696251, "grad_norm": 2.115013908907418, "learning_rate": 1.824510241128663e-07, "loss": 0.2748, "step": 12298 }, { "epoch": 1.8820198928844682, "grad_norm": 1.907028475113633, "learning_rate": 1.8198009713207843e-07, "loss": 0.2571, "step": 12299 }, { "epoch": 1.8821729150726856, "grad_norm": 2.071888069560488, "learning_rate": 1.8150977311430007e-07, "loss": 0.3025, "step": 12300 }, { "epoch": 1.882325937260903, "grad_norm": 1.9734825940094098, "learning_rate": 1.8104005208841702e-07, "loss": 0.296, "step": 12301 }, { "epoch": 1.88247895944912, "grad_norm": 2.07524085707333, "learning_rate": 1.805709340832773e-07, "loss": 0.2904, "step": 12302 }, { "epoch": 1.8826319816373374, "grad_norm": 2.006754654899638, "learning_rate": 1.80102419127689e-07, "loss": 0.2159, "step": 12303 }, { "epoch": 1.8827850038255547, "grad_norm": 2.0742510517617077, "learning_rate": 1.7963450725042687e-07, "loss": 0.2474, "step": 12304 }, { "epoch": 1.882938026013772, "grad_norm": 2.082631008210018, "learning_rate": 1.7916719848022902e-07, "loss": 0.2379, "step": 12305 }, { "epoch": 1.8830910482019894, "grad_norm": 2.032130659886695, "learning_rate": 1.787004928457903e-07, "loss": 0.2687, "step": 12306 }, { "epoch": 1.8832440703902065, "grad_norm": 2.1550406519384424, "learning_rate": 1.782343903757766e-07, "loss": 0.2834, "step": 12307 }, { "epoch": 1.883397092578424, "grad_norm": 2.22732515632349, "learning_rate": 1.777688910988118e-07, "loss": 0.3246, "step": 12308 }, { "epoch": 1.8835501147666411, "grad_norm": 2.4500515480082665, "learning_rate": 1.7730399504348404e-07, "loss": 0.3195, "step": 12309 }, { "epoch": 1.8837031369548585, "grad_norm": 2.268660747881073, "learning_rate": 1.76839702238345e-07, "loss": 0.3028, "step": 12310 }, { "epoch": 1.8838561591430758, "grad_norm": 1.6763232823881535, "learning_rate": 1.7637601271190852e-07, "loss": 0.213, "step": 12311 }, { "epoch": 1.884009181331293, "grad_norm": 2.132550267954547, "learning_rate": 1.7591292649265069e-07, "loss": 0.2764, "step": 12312 }, { "epoch": 1.8841622035195105, "grad_norm": 3.1774836443797185, "learning_rate": 1.754504436090121e-07, "loss": 0.3129, "step": 12313 }, { "epoch": 1.8843152257077276, "grad_norm": 2.3320424100195085, "learning_rate": 1.7498856408939558e-07, "loss": 0.2989, "step": 12314 }, { "epoch": 1.884468247895945, "grad_norm": 2.0604693493677138, "learning_rate": 1.7452728796216732e-07, "loss": 0.3438, "step": 12315 }, { "epoch": 1.8846212700841622, "grad_norm": 2.090110244983448, "learning_rate": 1.7406661525565582e-07, "loss": 0.2364, "step": 12316 }, { "epoch": 1.8847742922723794, "grad_norm": 2.235882951459802, "learning_rate": 1.7360654599815285e-07, "loss": 0.3374, "step": 12317 }, { "epoch": 1.884927314460597, "grad_norm": 2.080730815047136, "learning_rate": 1.731470802179125e-07, "loss": 0.3256, "step": 12318 }, { "epoch": 1.885080336648814, "grad_norm": 2.3867969064570334, "learning_rate": 1.7268821794315328e-07, "loss": 0.2551, "step": 12319 }, { "epoch": 1.8852333588370314, "grad_norm": 2.2624261138843966, "learning_rate": 1.7222995920205488e-07, "loss": 0.3053, "step": 12320 }, { "epoch": 1.8853863810252487, "grad_norm": 2.095280750448679, "learning_rate": 1.7177230402276147e-07, "loss": 0.2418, "step": 12321 }, { "epoch": 1.8855394032134658, "grad_norm": 1.9204988322442775, "learning_rate": 1.7131525243337944e-07, "loss": 0.2802, "step": 12322 }, { "epoch": 1.8856924254016834, "grad_norm": 2.3341667000082715, "learning_rate": 1.7085880446197633e-07, "loss": 0.2716, "step": 12323 }, { "epoch": 1.8858454475899005, "grad_norm": 2.4746042589685717, "learning_rate": 1.7040296013658643e-07, "loss": 0.3252, "step": 12324 }, { "epoch": 1.8859984697781178, "grad_norm": 2.1480370697284847, "learning_rate": 1.6994771948520506e-07, "loss": 0.2229, "step": 12325 }, { "epoch": 1.8861514919663351, "grad_norm": 1.8615177634968916, "learning_rate": 1.694930825357899e-07, "loss": 0.2294, "step": 12326 }, { "epoch": 1.8863045141545522, "grad_norm": 2.215497558948797, "learning_rate": 1.6903904931625968e-07, "loss": 0.2807, "step": 12327 }, { "epoch": 1.8864575363427698, "grad_norm": 2.191961181905781, "learning_rate": 1.6858561985450327e-07, "loss": 0.2419, "step": 12328 }, { "epoch": 1.886610558530987, "grad_norm": 1.9739018107667339, "learning_rate": 1.681327941783628e-07, "loss": 0.3075, "step": 12329 }, { "epoch": 1.8867635807192042, "grad_norm": 1.7804060009453269, "learning_rate": 1.6768057231564938e-07, "loss": 0.2203, "step": 12330 }, { "epoch": 1.8869166029074216, "grad_norm": 2.2020925173246333, "learning_rate": 1.6722895429413856e-07, "loss": 0.2131, "step": 12331 }, { "epoch": 1.887069625095639, "grad_norm": 2.1911656545700677, "learning_rate": 1.6677794014156145e-07, "loss": 0.3261, "step": 12332 }, { "epoch": 1.8872226472838562, "grad_norm": 2.007242826500715, "learning_rate": 1.6632752988561929e-07, "loss": 0.2273, "step": 12333 }, { "epoch": 1.8873756694720734, "grad_norm": 1.9420679663669713, "learning_rate": 1.6587772355397437e-07, "loss": 0.2709, "step": 12334 }, { "epoch": 1.8875286916602907, "grad_norm": 2.1440316856194244, "learning_rate": 1.6542852117424791e-07, "loss": 0.239, "step": 12335 }, { "epoch": 1.887681713848508, "grad_norm": 2.155474547422099, "learning_rate": 1.6497992277403008e-07, "loss": 0.2677, "step": 12336 }, { "epoch": 1.8878347360367254, "grad_norm": 1.772226946632601, "learning_rate": 1.6453192838086996e-07, "loss": 0.2311, "step": 12337 }, { "epoch": 1.8879877582249427, "grad_norm": 2.0741420930815706, "learning_rate": 1.6408453802227998e-07, "loss": 0.301, "step": 12338 }, { "epoch": 1.8881407804131598, "grad_norm": 2.3602693015766856, "learning_rate": 1.6363775172573814e-07, "loss": 0.3471, "step": 12339 }, { "epoch": 1.8882938026013774, "grad_norm": 1.8958952695132465, "learning_rate": 1.631915695186803e-07, "loss": 0.2583, "step": 12340 }, { "epoch": 1.8884468247895945, "grad_norm": 2.248151990745066, "learning_rate": 1.6274599142851123e-07, "loss": 0.3297, "step": 12341 }, { "epoch": 1.8885998469778118, "grad_norm": 2.240236669309958, "learning_rate": 1.6230101748259453e-07, "loss": 0.2999, "step": 12342 }, { "epoch": 1.8887528691660291, "grad_norm": 2.068997854305708, "learning_rate": 1.6185664770825727e-07, "loss": 0.2607, "step": 12343 }, { "epoch": 1.8889058913542462, "grad_norm": 2.298467877444234, "learning_rate": 1.6141288213278982e-07, "loss": 0.3697, "step": 12344 }, { "epoch": 1.8890589135424638, "grad_norm": 2.063704879522499, "learning_rate": 1.6096972078344598e-07, "loss": 0.3015, "step": 12345 }, { "epoch": 1.889211935730681, "grad_norm": 2.368876065700203, "learning_rate": 1.6052716368744392e-07, "loss": 0.2898, "step": 12346 }, { "epoch": 1.8893649579188982, "grad_norm": 2.07193482877275, "learning_rate": 1.600852108719597e-07, "loss": 0.2883, "step": 12347 }, { "epoch": 1.8895179801071156, "grad_norm": 2.1398240131474298, "learning_rate": 1.59643862364135e-07, "loss": 0.297, "step": 12348 }, { "epoch": 1.8896710022953327, "grad_norm": 2.2600179058392778, "learning_rate": 1.5920311819107915e-07, "loss": 0.328, "step": 12349 }, { "epoch": 1.8898240244835502, "grad_norm": 2.0289016769988897, "learning_rate": 1.587629783798561e-07, "loss": 0.2667, "step": 12350 }, { "epoch": 1.8899770466717674, "grad_norm": 1.9507615969456944, "learning_rate": 1.5832344295749758e-07, "loss": 0.2921, "step": 12351 }, { "epoch": 1.8901300688599847, "grad_norm": 2.2770940001537148, "learning_rate": 1.5788451195099752e-07, "loss": 0.2753, "step": 12352 }, { "epoch": 1.890283091048202, "grad_norm": 2.434599224167367, "learning_rate": 1.5744618538731106e-07, "loss": 0.3081, "step": 12353 }, { "epoch": 1.8904361132364191, "grad_norm": 2.1500543081305867, "learning_rate": 1.5700846329335995e-07, "loss": 0.2885, "step": 12354 }, { "epoch": 1.8905891354246367, "grad_norm": 1.920834570258339, "learning_rate": 1.5657134569602604e-07, "loss": 0.2705, "step": 12355 }, { "epoch": 1.8907421576128538, "grad_norm": 2.1694939010872174, "learning_rate": 1.5613483262215125e-07, "loss": 0.2975, "step": 12356 }, { "epoch": 1.8908951798010711, "grad_norm": 2.155350044486877, "learning_rate": 1.5569892409854626e-07, "loss": 0.2426, "step": 12357 }, { "epoch": 1.8910482019892885, "grad_norm": 2.3352026536074475, "learning_rate": 1.5526362015198305e-07, "loss": 0.3415, "step": 12358 }, { "epoch": 1.8912012241775056, "grad_norm": 2.425425876866039, "learning_rate": 1.5482892080919132e-07, "loss": 0.3196, "step": 12359 }, { "epoch": 1.8913542463657231, "grad_norm": 2.1513830506914813, "learning_rate": 1.5439482609687196e-07, "loss": 0.2952, "step": 12360 }, { "epoch": 1.8915072685539402, "grad_norm": 2.0535776550069884, "learning_rate": 1.539613360416825e-07, "loss": 0.2172, "step": 12361 }, { "epoch": 1.8916602907421576, "grad_norm": 2.1660181727611514, "learning_rate": 1.5352845067024392e-07, "loss": 0.2655, "step": 12362 }, { "epoch": 1.891813312930375, "grad_norm": 2.0220484053824195, "learning_rate": 1.530961700091438e-07, "loss": 0.235, "step": 12363 }, { "epoch": 1.8919663351185922, "grad_norm": 2.0225572484268746, "learning_rate": 1.5266449408492979e-07, "loss": 0.2296, "step": 12364 }, { "epoch": 1.8921193573068096, "grad_norm": 2.5943193506917366, "learning_rate": 1.522334229241107e-07, "loss": 0.3002, "step": 12365 }, { "epoch": 1.8922723794950267, "grad_norm": 2.314365201977131, "learning_rate": 1.5180295655316312e-07, "loss": 0.3017, "step": 12366 }, { "epoch": 1.892425401683244, "grad_norm": 2.0155411326429147, "learning_rate": 1.513730949985215e-07, "loss": 0.268, "step": 12367 }, { "epoch": 1.8925784238714614, "grad_norm": 1.936628601404376, "learning_rate": 1.5094383828658687e-07, "loss": 0.2173, "step": 12368 }, { "epoch": 1.8927314460596787, "grad_norm": 2.6655173289279808, "learning_rate": 1.5051518644372044e-07, "loss": 0.3635, "step": 12369 }, { "epoch": 1.892884468247896, "grad_norm": 1.9786815581966082, "learning_rate": 1.5008713949624777e-07, "loss": 0.3102, "step": 12370 }, { "epoch": 1.8930374904361131, "grad_norm": 2.1595530135445213, "learning_rate": 1.4965969747045673e-07, "loss": 0.3011, "step": 12371 }, { "epoch": 1.8931905126243307, "grad_norm": 2.203393747451202, "learning_rate": 1.4923286039259855e-07, "loss": 0.2665, "step": 12372 }, { "epoch": 1.8933435348125478, "grad_norm": 2.1359510563270736, "learning_rate": 1.488066282888878e-07, "loss": 0.3154, "step": 12373 }, { "epoch": 1.8934965570007651, "grad_norm": 1.952958864492451, "learning_rate": 1.4838100118549803e-07, "loss": 0.2494, "step": 12374 }, { "epoch": 1.8936495791889825, "grad_norm": 2.7907391263373866, "learning_rate": 1.4795597910857274e-07, "loss": 0.3871, "step": 12375 }, { "epoch": 1.8938026013771996, "grad_norm": 2.601807768733684, "learning_rate": 1.4753156208421104e-07, "loss": 0.2742, "step": 12376 }, { "epoch": 1.8939556235654171, "grad_norm": 2.0452811830427478, "learning_rate": 1.4710775013847879e-07, "loss": 0.2629, "step": 12377 }, { "epoch": 1.8941086457536342, "grad_norm": 2.1949730369156284, "learning_rate": 1.4668454329740621e-07, "loss": 0.3188, "step": 12378 }, { "epoch": 1.8942616679418516, "grad_norm": 2.1266212476482753, "learning_rate": 1.4626194158698149e-07, "loss": 0.293, "step": 12379 }, { "epoch": 1.894414690130069, "grad_norm": 2.3910652112091997, "learning_rate": 1.4583994503315713e-07, "loss": 0.3309, "step": 12380 }, { "epoch": 1.894567712318286, "grad_norm": 2.195165911640832, "learning_rate": 1.4541855366185464e-07, "loss": 0.261, "step": 12381 }, { "epoch": 1.8947207345065036, "grad_norm": 2.2227897094612015, "learning_rate": 1.4499776749894668e-07, "loss": 0.2892, "step": 12382 }, { "epoch": 1.8948737566947207, "grad_norm": 1.983371860106799, "learning_rate": 1.4457758657028142e-07, "loss": 0.2333, "step": 12383 }, { "epoch": 1.895026778882938, "grad_norm": 2.166258470216104, "learning_rate": 1.4415801090166048e-07, "loss": 0.2943, "step": 12384 }, { "epoch": 1.8951798010711554, "grad_norm": 2.421166045256063, "learning_rate": 1.43739040518851e-07, "loss": 0.3261, "step": 12385 }, { "epoch": 1.8953328232593725, "grad_norm": 2.5040245258127927, "learning_rate": 1.4332067544758688e-07, "loss": 0.3423, "step": 12386 }, { "epoch": 1.89548584544759, "grad_norm": 2.0306426946320775, "learning_rate": 1.4290291571355975e-07, "loss": 0.2614, "step": 12387 }, { "epoch": 1.8956388676358071, "grad_norm": 1.902404848644786, "learning_rate": 1.4248576134242353e-07, "loss": 0.235, "step": 12388 }, { "epoch": 1.8957918898240245, "grad_norm": 2.030986303913293, "learning_rate": 1.420692123598011e-07, "loss": 0.2736, "step": 12389 }, { "epoch": 1.8959449120122418, "grad_norm": 2.281148057551701, "learning_rate": 1.4165326879127195e-07, "loss": 0.357, "step": 12390 }, { "epoch": 1.896097934200459, "grad_norm": 2.1204540996922163, "learning_rate": 1.4123793066238233e-07, "loss": 0.2949, "step": 12391 }, { "epoch": 1.8962509563886765, "grad_norm": 2.023496130387062, "learning_rate": 1.4082319799863963e-07, "loss": 0.276, "step": 12392 }, { "epoch": 1.8964039785768936, "grad_norm": 2.1823663321503886, "learning_rate": 1.4040907082551237e-07, "loss": 0.3208, "step": 12393 }, { "epoch": 1.896557000765111, "grad_norm": 2.002947903538194, "learning_rate": 1.3999554916843571e-07, "loss": 0.2331, "step": 12394 }, { "epoch": 1.8967100229533282, "grad_norm": 2.046209375144362, "learning_rate": 1.3958263305280494e-07, "loss": 0.2501, "step": 12395 }, { "epoch": 1.8968630451415456, "grad_norm": 1.9425497013925002, "learning_rate": 1.3917032250397867e-07, "loss": 0.2359, "step": 12396 }, { "epoch": 1.897016067329763, "grad_norm": 2.0608664471035745, "learning_rate": 1.387586175472788e-07, "loss": 0.2626, "step": 12397 }, { "epoch": 1.89716908951798, "grad_norm": 2.1798385264217734, "learning_rate": 1.3834751820798964e-07, "loss": 0.2987, "step": 12398 }, { "epoch": 1.8973221117061974, "grad_norm": 1.976167049765422, "learning_rate": 1.3793702451135872e-07, "loss": 0.2521, "step": 12399 }, { "epoch": 1.8974751338944147, "grad_norm": 2.1064510108566052, "learning_rate": 1.3752713648259475e-07, "loss": 0.3585, "step": 12400 }, { "epoch": 1.897628156082632, "grad_norm": 1.8467627262346928, "learning_rate": 1.3711785414687207e-07, "loss": 0.2776, "step": 12401 }, { "epoch": 1.8977811782708494, "grad_norm": 2.0021520489916513, "learning_rate": 1.367091775293261e-07, "loss": 0.2253, "step": 12402 }, { "epoch": 1.8979342004590665, "grad_norm": 2.004962053610836, "learning_rate": 1.3630110665505347e-07, "loss": 0.2494, "step": 12403 }, { "epoch": 1.898087222647284, "grad_norm": 2.1024242844915, "learning_rate": 1.3589364154911855e-07, "loss": 0.2501, "step": 12404 }, { "epoch": 1.8982402448355011, "grad_norm": 1.974976585706706, "learning_rate": 1.3548678223654354e-07, "loss": 0.2945, "step": 12405 }, { "epoch": 1.8983932670237185, "grad_norm": 1.822674609909908, "learning_rate": 1.3508052874231514e-07, "loss": 0.2078, "step": 12406 }, { "epoch": 1.8985462892119358, "grad_norm": 2.409596533644061, "learning_rate": 1.346748810913834e-07, "loss": 0.2808, "step": 12407 }, { "epoch": 1.898699311400153, "grad_norm": 2.1866218250648752, "learning_rate": 1.3426983930866055e-07, "loss": 0.2917, "step": 12408 }, { "epoch": 1.8988523335883705, "grad_norm": 2.2318137725230778, "learning_rate": 1.338654034190212e-07, "loss": 0.3389, "step": 12409 }, { "epoch": 1.8990053557765876, "grad_norm": 2.1864107341649834, "learning_rate": 1.3346157344730438e-07, "loss": 0.2668, "step": 12410 }, { "epoch": 1.899158377964805, "grad_norm": 2.0640684514509036, "learning_rate": 1.330583494183102e-07, "loss": 0.3253, "step": 12411 }, { "epoch": 1.8993114001530222, "grad_norm": 2.061874070757145, "learning_rate": 1.326557313568033e-07, "loss": 0.2789, "step": 12412 }, { "epoch": 1.8994644223412394, "grad_norm": 2.262601025711515, "learning_rate": 1.3225371928750842e-07, "loss": 0.3414, "step": 12413 }, { "epoch": 1.899617444529457, "grad_norm": 2.071223047182192, "learning_rate": 1.318523132351157e-07, "loss": 0.2732, "step": 12414 }, { "epoch": 1.899770466717674, "grad_norm": 2.2037935773831743, "learning_rate": 1.3145151322427663e-07, "loss": 0.2175, "step": 12415 }, { "epoch": 1.8999234889058914, "grad_norm": 1.9905111743598205, "learning_rate": 1.3105131927960702e-07, "loss": 0.257, "step": 12416 }, { "epoch": 1.9000765110941087, "grad_norm": 2.0918754035359166, "learning_rate": 1.3065173142568276e-07, "loss": 0.2877, "step": 12417 }, { "epoch": 1.9002295332823258, "grad_norm": 2.1951012882049237, "learning_rate": 1.3025274968704426e-07, "loss": 0.3017, "step": 12418 }, { "epoch": 1.9003825554705434, "grad_norm": 2.047502157807223, "learning_rate": 1.2985437408819524e-07, "loss": 0.2675, "step": 12419 }, { "epoch": 1.9005355776587605, "grad_norm": 2.345449255427472, "learning_rate": 1.2945660465360166e-07, "loss": 0.2968, "step": 12420 }, { "epoch": 1.9006885998469778, "grad_norm": 2.2405813389873455, "learning_rate": 1.2905944140769178e-07, "loss": 0.2276, "step": 12421 }, { "epoch": 1.9008416220351951, "grad_norm": 1.8624021115253389, "learning_rate": 1.2866288437485718e-07, "loss": 0.252, "step": 12422 }, { "epoch": 1.9009946442234122, "grad_norm": 2.0359610897944154, "learning_rate": 1.2826693357945176e-07, "loss": 0.2366, "step": 12423 }, { "epoch": 1.9011476664116298, "grad_norm": 2.2615454945887556, "learning_rate": 1.2787158904579268e-07, "loss": 0.2951, "step": 12424 }, { "epoch": 1.901300688599847, "grad_norm": 2.738627055525505, "learning_rate": 1.274768507981583e-07, "loss": 0.3292, "step": 12425 }, { "epoch": 1.9014537107880642, "grad_norm": 2.153882026555609, "learning_rate": 1.270827188607926e-07, "loss": 0.2416, "step": 12426 }, { "epoch": 1.9016067329762816, "grad_norm": 2.000143651464614, "learning_rate": 1.2668919325789953e-07, "loss": 0.2669, "step": 12427 }, { "epoch": 1.9017597551644987, "grad_norm": 2.1215493546758073, "learning_rate": 1.2629627401364864e-07, "loss": 0.3234, "step": 12428 }, { "epoch": 1.9019127773527162, "grad_norm": 1.9597291534053758, "learning_rate": 1.2590396115216953e-07, "loss": 0.2631, "step": 12429 }, { "epoch": 1.9020657995409334, "grad_norm": 2.429837051524621, "learning_rate": 1.2551225469755512e-07, "loss": 0.3115, "step": 12430 }, { "epoch": 1.9022188217291507, "grad_norm": 2.089905167506059, "learning_rate": 1.25121154673864e-07, "loss": 0.2776, "step": 12431 }, { "epoch": 1.902371843917368, "grad_norm": 1.9529949744574147, "learning_rate": 1.2473066110511244e-07, "loss": 0.2083, "step": 12432 }, { "epoch": 1.9025248661055854, "grad_norm": 2.2045107277586258, "learning_rate": 1.2434077401528245e-07, "loss": 0.2174, "step": 12433 }, { "epoch": 1.9026778882938027, "grad_norm": 2.1168068178123405, "learning_rate": 1.2395149342832035e-07, "loss": 0.2749, "step": 12434 }, { "epoch": 1.9028309104820198, "grad_norm": 2.0035038229922337, "learning_rate": 1.2356281936813152e-07, "loss": 0.2764, "step": 12435 }, { "epoch": 1.9029839326702371, "grad_norm": 1.8645531982106915, "learning_rate": 1.2317475185858797e-07, "loss": 0.2857, "step": 12436 }, { "epoch": 1.9031369548584545, "grad_norm": 1.9683962909981605, "learning_rate": 1.227872909235206e-07, "loss": 0.2156, "step": 12437 }, { "epoch": 1.9032899770466718, "grad_norm": 2.5372871229932756, "learning_rate": 1.2240043658672485e-07, "loss": 0.3423, "step": 12438 }, { "epoch": 1.9034429992348891, "grad_norm": 2.2487446160355384, "learning_rate": 1.220141888719606e-07, "loss": 0.3086, "step": 12439 }, { "epoch": 1.9035960214231062, "grad_norm": 2.4433518403609487, "learning_rate": 1.2162854780294775e-07, "loss": 0.3025, "step": 12440 }, { "epoch": 1.9037490436113238, "grad_norm": 2.285883593805717, "learning_rate": 1.212435134033707e-07, "loss": 0.3215, "step": 12441 }, { "epoch": 1.903902065799541, "grad_norm": 2.07019078472154, "learning_rate": 1.20859085696875e-07, "loss": 0.2195, "step": 12442 }, { "epoch": 1.9040550879877582, "grad_norm": 2.1764385332904204, "learning_rate": 1.204752647070706e-07, "loss": 0.2439, "step": 12443 }, { "epoch": 1.9042081101759756, "grad_norm": 2.0657575704110194, "learning_rate": 1.200920504575287e-07, "loss": 0.3091, "step": 12444 }, { "epoch": 1.9043611323641927, "grad_norm": 2.2142022624529867, "learning_rate": 1.1970944297178377e-07, "loss": 0.3519, "step": 12445 }, { "epoch": 1.9045141545524102, "grad_norm": 2.2201081578650896, "learning_rate": 1.1932744227333481e-07, "loss": 0.2401, "step": 12446 }, { "epoch": 1.9046671767406274, "grad_norm": 2.0766398494803036, "learning_rate": 1.1894604838564083e-07, "loss": 0.2747, "step": 12447 }, { "epoch": 1.9048201989288447, "grad_norm": 1.8669964143468456, "learning_rate": 1.1856526133212421e-07, "loss": 0.2535, "step": 12448 }, { "epoch": 1.904973221117062, "grad_norm": 2.1864682779631717, "learning_rate": 1.1818508113617288e-07, "loss": 0.2969, "step": 12449 }, { "epoch": 1.9051262433052791, "grad_norm": 2.1671985516248315, "learning_rate": 1.1780550782113154e-07, "loss": 0.2791, "step": 12450 }, { "epoch": 1.9052792654934967, "grad_norm": 1.7088933065196328, "learning_rate": 1.1742654141031484e-07, "loss": 0.1846, "step": 12451 }, { "epoch": 1.9054322876817138, "grad_norm": 2.148970892648732, "learning_rate": 1.1704818192699419e-07, "loss": 0.2711, "step": 12452 }, { "epoch": 1.9055853098699311, "grad_norm": 2.100215150528268, "learning_rate": 1.1667042939440765e-07, "loss": 0.252, "step": 12453 }, { "epoch": 1.9057383320581485, "grad_norm": 2.360942987759934, "learning_rate": 1.1629328383575444e-07, "loss": 0.3214, "step": 12454 }, { "epoch": 1.9058913542463656, "grad_norm": 2.1994757368737536, "learning_rate": 1.1591674527419495e-07, "loss": 0.2825, "step": 12455 }, { "epoch": 1.9060443764345831, "grad_norm": 2.1044747531659933, "learning_rate": 1.1554081373285398e-07, "loss": 0.3447, "step": 12456 }, { "epoch": 1.9061973986228002, "grad_norm": 1.779675570955133, "learning_rate": 1.1516548923482196e-07, "loss": 0.2186, "step": 12457 }, { "epoch": 1.9063504208110176, "grad_norm": 2.09708548162683, "learning_rate": 1.1479077180314601e-07, "loss": 0.2884, "step": 12458 }, { "epoch": 1.906503442999235, "grad_norm": 1.909809773435519, "learning_rate": 1.1441666146083885e-07, "loss": 0.2479, "step": 12459 }, { "epoch": 1.906656465187452, "grad_norm": 1.806316291654644, "learning_rate": 1.1404315823087875e-07, "loss": 0.2085, "step": 12460 }, { "epoch": 1.9068094873756696, "grad_norm": 2.127004509384845, "learning_rate": 1.1367026213620181e-07, "loss": 0.2636, "step": 12461 }, { "epoch": 1.9069625095638867, "grad_norm": 2.205530653360376, "learning_rate": 1.1329797319970859e-07, "loss": 0.2809, "step": 12462 }, { "epoch": 1.907115531752104, "grad_norm": 2.41892985391259, "learning_rate": 1.1292629144426637e-07, "loss": 0.3434, "step": 12463 }, { "epoch": 1.9072685539403214, "grad_norm": 2.0336010224200036, "learning_rate": 1.1255521689269577e-07, "loss": 0.2807, "step": 12464 }, { "epoch": 1.9074215761285387, "grad_norm": 2.721142821132342, "learning_rate": 1.1218474956779191e-07, "loss": 0.3746, "step": 12465 }, { "epoch": 1.907574598316756, "grad_norm": 1.8455778126082905, "learning_rate": 1.1181488949230323e-07, "loss": 0.2009, "step": 12466 }, { "epoch": 1.9077276205049731, "grad_norm": 2.164500498682435, "learning_rate": 1.1144563668894381e-07, "loss": 0.3123, "step": 12467 }, { "epoch": 1.9078806426931905, "grad_norm": 2.3175653893300705, "learning_rate": 1.1107699118039328e-07, "loss": 0.2817, "step": 12468 }, { "epoch": 1.9080336648814078, "grad_norm": 2.1705603938031337, "learning_rate": 1.1070895298929019e-07, "loss": 0.2563, "step": 12469 }, { "epoch": 1.9081866870696251, "grad_norm": 2.170225385518329, "learning_rate": 1.1034152213823646e-07, "loss": 0.3054, "step": 12470 }, { "epoch": 1.9083397092578425, "grad_norm": 2.392108868688555, "learning_rate": 1.099746986497996e-07, "loss": 0.3374, "step": 12471 }, { "epoch": 1.9084927314460596, "grad_norm": 1.943591082629844, "learning_rate": 1.0960848254650603e-07, "loss": 0.2415, "step": 12472 }, { "epoch": 1.9086457536342771, "grad_norm": 2.144948863915394, "learning_rate": 1.0924287385084776e-07, "loss": 0.2933, "step": 12473 }, { "epoch": 1.9087987758224942, "grad_norm": 1.9600270754442108, "learning_rate": 1.0887787258527682e-07, "loss": 0.27, "step": 12474 }, { "epoch": 1.9089517980107116, "grad_norm": 2.29484241675231, "learning_rate": 1.0851347877220975e-07, "loss": 0.3108, "step": 12475 }, { "epoch": 1.909104820198929, "grad_norm": 1.8828750051292165, "learning_rate": 1.0814969243402529e-07, "loss": 0.2352, "step": 12476 }, { "epoch": 1.909257842387146, "grad_norm": 2.089128050877077, "learning_rate": 1.0778651359306669e-07, "loss": 0.2714, "step": 12477 }, { "epoch": 1.9094108645753636, "grad_norm": 2.2924888911310894, "learning_rate": 1.074239422716361e-07, "loss": 0.2585, "step": 12478 }, { "epoch": 1.9095638867635807, "grad_norm": 2.1199032370475708, "learning_rate": 1.0706197849200128e-07, "loss": 0.2535, "step": 12479 }, { "epoch": 1.909716908951798, "grad_norm": 1.80239557608064, "learning_rate": 1.0670062227639111e-07, "loss": 0.265, "step": 12480 }, { "epoch": 1.9098699311400154, "grad_norm": 2.147866520218618, "learning_rate": 1.0633987364700004e-07, "loss": 0.223, "step": 12481 }, { "epoch": 1.9100229533282325, "grad_norm": 2.466143189763118, "learning_rate": 1.0597973262598038e-07, "loss": 0.3343, "step": 12482 }, { "epoch": 1.91017597551645, "grad_norm": 2.3340145813910613, "learning_rate": 1.0562019923545108e-07, "loss": 0.3126, "step": 12483 }, { "epoch": 1.9103289977046671, "grad_norm": 2.1269179837487022, "learning_rate": 1.0526127349749227e-07, "loss": 0.2429, "step": 12484 }, { "epoch": 1.9104820198928845, "grad_norm": 2.248127125499551, "learning_rate": 1.049029554341463e-07, "loss": 0.2539, "step": 12485 }, { "epoch": 1.9106350420811018, "grad_norm": 2.0877173544121357, "learning_rate": 1.0454524506742225e-07, "loss": 0.2841, "step": 12486 }, { "epoch": 1.910788064269319, "grad_norm": 2.1014344280122867, "learning_rate": 1.0418814241928366e-07, "loss": 0.2673, "step": 12487 }, { "epoch": 1.9109410864575365, "grad_norm": 2.2504178395361425, "learning_rate": 1.0383164751166409e-07, "loss": 0.2564, "step": 12488 }, { "epoch": 1.9110941086457536, "grad_norm": 1.9582453306000769, "learning_rate": 1.0347576036645824e-07, "loss": 0.2639, "step": 12489 }, { "epoch": 1.911247130833971, "grad_norm": 2.0337728796022656, "learning_rate": 1.0312048100552085e-07, "loss": 0.2854, "step": 12490 }, { "epoch": 1.9114001530221882, "grad_norm": 2.3114295530401257, "learning_rate": 1.0276580945067116e-07, "loss": 0.2843, "step": 12491 }, { "epoch": 1.9115531752104054, "grad_norm": 1.7069199598118328, "learning_rate": 1.0241174572369172e-07, "loss": 0.2143, "step": 12492 }, { "epoch": 1.911706197398623, "grad_norm": 2.099298087764273, "learning_rate": 1.0205828984632626e-07, "loss": 0.2617, "step": 12493 }, { "epoch": 1.91185921958684, "grad_norm": 2.2982638891837803, "learning_rate": 1.0170544184028186e-07, "loss": 0.3266, "step": 12494 }, { "epoch": 1.9120122417750574, "grad_norm": 1.921762199018458, "learning_rate": 1.0135320172723007e-07, "loss": 0.2517, "step": 12495 }, { "epoch": 1.9121652639632747, "grad_norm": 2.0989247222133285, "learning_rate": 1.0100156952880025e-07, "loss": 0.2335, "step": 12496 }, { "epoch": 1.912318286151492, "grad_norm": 2.2323329776738263, "learning_rate": 1.0065054526658957e-07, "loss": 0.2551, "step": 12497 }, { "epoch": 1.9124713083397094, "grad_norm": 2.2590344295387066, "learning_rate": 1.0030012896215635e-07, "loss": 0.2943, "step": 12498 }, { "epoch": 1.9126243305279265, "grad_norm": 2.2349113812770134, "learning_rate": 9.995032063701781e-08, "loss": 0.2179, "step": 12499 }, { "epoch": 1.9127773527161438, "grad_norm": 2.086848029996208, "learning_rate": 9.960112031266123e-08, "loss": 0.2488, "step": 12500 }, { "epoch": 1.9129303749043611, "grad_norm": 2.5167862029364088, "learning_rate": 9.925252801052943e-08, "loss": 0.2748, "step": 12501 }, { "epoch": 1.9130833970925785, "grad_norm": 1.8074348221085181, "learning_rate": 9.890454375203306e-08, "loss": 0.2377, "step": 12502 }, { "epoch": 1.9132364192807958, "grad_norm": 1.9891945424069828, "learning_rate": 9.855716755854062e-08, "loss": 0.2427, "step": 12503 }, { "epoch": 1.913389441469013, "grad_norm": 1.9930912025356717, "learning_rate": 9.821039945138833e-08, "loss": 0.2679, "step": 12504 }, { "epoch": 1.9135424636572305, "grad_norm": 1.9143467675625112, "learning_rate": 9.78642394518714e-08, "loss": 0.2671, "step": 12505 }, { "epoch": 1.9136954858454476, "grad_norm": 2.3126000901167094, "learning_rate": 9.751868758124839e-08, "loss": 0.3273, "step": 12506 }, { "epoch": 1.913848508033665, "grad_norm": 2.48212979320504, "learning_rate": 9.717374386074118e-08, "loss": 0.3258, "step": 12507 }, { "epoch": 1.9140015302218822, "grad_norm": 1.9366579694374044, "learning_rate": 9.682940831153509e-08, "loss": 0.2183, "step": 12508 }, { "epoch": 1.9141545524100994, "grad_norm": 2.146378976268681, "learning_rate": 9.648568095477539e-08, "loss": 0.3257, "step": 12509 }, { "epoch": 1.914307574598317, "grad_norm": 2.0975727968979614, "learning_rate": 9.614256181157411e-08, "loss": 0.2838, "step": 12510 }, { "epoch": 1.914460596786534, "grad_norm": 2.3790386333219535, "learning_rate": 9.580005090300104e-08, "loss": 0.2916, "step": 12511 }, { "epoch": 1.9146136189747514, "grad_norm": 2.063496598618312, "learning_rate": 9.54581482500927e-08, "loss": 0.2567, "step": 12512 }, { "epoch": 1.9147666411629687, "grad_norm": 2.177710720193587, "learning_rate": 9.511685387384673e-08, "loss": 0.2677, "step": 12513 }, { "epoch": 1.9149196633511858, "grad_norm": 2.3674245980586703, "learning_rate": 9.477616779522191e-08, "loss": 0.3017, "step": 12514 }, { "epoch": 1.9150726855394034, "grad_norm": 2.3237512722434808, "learning_rate": 9.443609003514376e-08, "loss": 0.2724, "step": 12515 }, { "epoch": 1.9152257077276205, "grad_norm": 2.1349857240907815, "learning_rate": 9.409662061449553e-08, "loss": 0.278, "step": 12516 }, { "epoch": 1.9153787299158378, "grad_norm": 1.929247113089893, "learning_rate": 9.375775955412502e-08, "loss": 0.2658, "step": 12517 }, { "epoch": 1.9155317521040551, "grad_norm": 2.078402299391299, "learning_rate": 9.341950687484447e-08, "loss": 0.2476, "step": 12518 }, { "epoch": 1.9156847742922722, "grad_norm": 2.150314478366211, "learning_rate": 9.308186259742724e-08, "loss": 0.292, "step": 12519 }, { "epoch": 1.9158377964804898, "grad_norm": 2.2735395559159626, "learning_rate": 9.274482674260787e-08, "loss": 0.295, "step": 12520 }, { "epoch": 1.915990818668707, "grad_norm": 2.0485204164380066, "learning_rate": 9.240839933108647e-08, "loss": 0.2815, "step": 12521 }, { "epoch": 1.9161438408569242, "grad_norm": 1.9152040534504782, "learning_rate": 9.20725803835243e-08, "loss": 0.2533, "step": 12522 }, { "epoch": 1.9162968630451416, "grad_norm": 2.280007794782544, "learning_rate": 9.173736992054483e-08, "loss": 0.2678, "step": 12523 }, { "epoch": 1.9164498852333587, "grad_norm": 2.5147731875289656, "learning_rate": 9.140276796273495e-08, "loss": 0.3013, "step": 12524 }, { "epoch": 1.9166029074215762, "grad_norm": 2.1676092373292843, "learning_rate": 9.106877453064267e-08, "loss": 0.2655, "step": 12525 }, { "epoch": 1.9167559296097934, "grad_norm": 2.3267045294522135, "learning_rate": 9.073538964478267e-08, "loss": 0.2656, "step": 12526 }, { "epoch": 1.9169089517980107, "grad_norm": 2.089513835534899, "learning_rate": 9.040261332562639e-08, "loss": 0.2434, "step": 12527 }, { "epoch": 1.917061973986228, "grad_norm": 2.163041959965656, "learning_rate": 9.0070445593613e-08, "loss": 0.2686, "step": 12528 }, { "epoch": 1.9172149961744451, "grad_norm": 1.979792412059793, "learning_rate": 8.973888646914064e-08, "loss": 0.241, "step": 12529 }, { "epoch": 1.9173680183626627, "grad_norm": 1.8840145007212765, "learning_rate": 8.940793597257303e-08, "loss": 0.2338, "step": 12530 }, { "epoch": 1.9175210405508798, "grad_norm": 2.101688674121348, "learning_rate": 8.90775941242339e-08, "loss": 0.2335, "step": 12531 }, { "epoch": 1.9176740627390971, "grad_norm": 2.200367918955295, "learning_rate": 8.874786094441257e-08, "loss": 0.2976, "step": 12532 }, { "epoch": 1.9178270849273145, "grad_norm": 2.0360117662478583, "learning_rate": 8.84187364533573e-08, "loss": 0.2713, "step": 12533 }, { "epoch": 1.9179801071155318, "grad_norm": 1.870087321117023, "learning_rate": 8.809022067128193e-08, "loss": 0.2147, "step": 12534 }, { "epoch": 1.9181331293037491, "grad_norm": 1.9813617183509307, "learning_rate": 8.776231361836141e-08, "loss": 0.2583, "step": 12535 }, { "epoch": 1.9182861514919662, "grad_norm": 2.285906135950109, "learning_rate": 8.743501531473409e-08, "loss": 0.2987, "step": 12536 }, { "epoch": 1.9184391736801836, "grad_norm": 2.0323696479504765, "learning_rate": 8.710832578050166e-08, "loss": 0.2593, "step": 12537 }, { "epoch": 1.918592195868401, "grad_norm": 2.1254409285361473, "learning_rate": 8.678224503572474e-08, "loss": 0.2387, "step": 12538 }, { "epoch": 1.9187452180566182, "grad_norm": 2.023023101695638, "learning_rate": 8.645677310043288e-08, "loss": 0.2443, "step": 12539 }, { "epoch": 1.9188982402448356, "grad_norm": 2.0102458000598773, "learning_rate": 8.613190999461119e-08, "loss": 0.2564, "step": 12540 }, { "epoch": 1.9190512624330527, "grad_norm": 1.8525700822632671, "learning_rate": 8.580765573821148e-08, "loss": 0.2515, "step": 12541 }, { "epoch": 1.9192042846212702, "grad_norm": 2.096632072259417, "learning_rate": 8.548401035115006e-08, "loss": 0.2639, "step": 12542 }, { "epoch": 1.9193573068094874, "grad_norm": 2.1823118733419493, "learning_rate": 8.516097385330102e-08, "loss": 0.2719, "step": 12543 }, { "epoch": 1.9195103289977047, "grad_norm": 2.217747952601633, "learning_rate": 8.483854626450183e-08, "loss": 0.2093, "step": 12544 }, { "epoch": 1.919663351185922, "grad_norm": 2.109037847197852, "learning_rate": 8.451672760455775e-08, "loss": 0.3524, "step": 12545 }, { "epoch": 1.9198163733741391, "grad_norm": 2.2662799826187023, "learning_rate": 8.419551789322966e-08, "loss": 0.293, "step": 12546 }, { "epoch": 1.9199693955623567, "grad_norm": 2.123070326666157, "learning_rate": 8.38749171502462e-08, "loss": 0.2602, "step": 12547 }, { "epoch": 1.9201224177505738, "grad_norm": 2.0546614358340296, "learning_rate": 8.355492539529719e-08, "loss": 0.305, "step": 12548 }, { "epoch": 1.9202754399387911, "grad_norm": 2.072183050978695, "learning_rate": 8.323554264803247e-08, "loss": 0.2799, "step": 12549 }, { "epoch": 1.9204284621270085, "grad_norm": 2.3442769629175904, "learning_rate": 8.291676892806743e-08, "loss": 0.3164, "step": 12550 }, { "epoch": 1.9205814843152256, "grad_norm": 2.035394827803196, "learning_rate": 8.25986042549809e-08, "loss": 0.2435, "step": 12551 }, { "epoch": 1.9207345065034431, "grad_norm": 2.1027386412155615, "learning_rate": 8.228104864830943e-08, "loss": 0.3025, "step": 12552 }, { "epoch": 1.9208875286916602, "grad_norm": 2.094493796125041, "learning_rate": 8.196410212755856e-08, "loss": 0.2693, "step": 12553 }, { "epoch": 1.9210405508798776, "grad_norm": 2.4802220142511264, "learning_rate": 8.164776471219049e-08, "loss": 0.3032, "step": 12554 }, { "epoch": 1.921193573068095, "grad_norm": 1.9498766283903592, "learning_rate": 8.133203642163523e-08, "loss": 0.237, "step": 12555 }, { "epoch": 1.921346595256312, "grad_norm": 2.2455347228710574, "learning_rate": 8.10169172752806e-08, "loss": 0.2767, "step": 12556 }, { "epoch": 1.9214996174445296, "grad_norm": 1.957315184740743, "learning_rate": 8.070240729248114e-08, "loss": 0.2109, "step": 12557 }, { "epoch": 1.9216526396327467, "grad_norm": 1.965643467306098, "learning_rate": 8.038850649255137e-08, "loss": 0.2379, "step": 12558 }, { "epoch": 1.921805661820964, "grad_norm": 2.19852476658279, "learning_rate": 8.00752148947681e-08, "loss": 0.2581, "step": 12559 }, { "epoch": 1.9219586840091814, "grad_norm": 2.004743748037509, "learning_rate": 7.976253251837374e-08, "loss": 0.301, "step": 12560 }, { "epoch": 1.9221117061973985, "grad_norm": 2.2203783236108947, "learning_rate": 7.945045938257067e-08, "loss": 0.2943, "step": 12561 }, { "epoch": 1.922264728385616, "grad_norm": 2.355138775293594, "learning_rate": 7.91389955065236e-08, "loss": 0.5327, "step": 12562 }, { "epoch": 1.9224177505738331, "grad_norm": 2.2869360368009946, "learning_rate": 7.882814090936164e-08, "loss": 0.3156, "step": 12563 }, { "epoch": 1.9225707727620505, "grad_norm": 2.269187238957002, "learning_rate": 7.85178956101762e-08, "loss": 0.3369, "step": 12564 }, { "epoch": 1.9227237949502678, "grad_norm": 2.260139178110384, "learning_rate": 7.82082596280187e-08, "loss": 0.2689, "step": 12565 }, { "epoch": 1.9228768171384851, "grad_norm": 2.307748324586521, "learning_rate": 7.789923298190616e-08, "loss": 0.317, "step": 12566 }, { "epoch": 1.9230298393267025, "grad_norm": 1.878965997966544, "learning_rate": 7.759081569081784e-08, "loss": 0.2762, "step": 12567 }, { "epoch": 1.9231828615149196, "grad_norm": 2.121729343823374, "learning_rate": 7.728300777369412e-08, "loss": 0.2658, "step": 12568 }, { "epoch": 1.923335883703137, "grad_norm": 2.1638380361755796, "learning_rate": 7.697580924943881e-08, "loss": 0.2773, "step": 12569 }, { "epoch": 1.9234889058913542, "grad_norm": 2.1895033980428797, "learning_rate": 7.666922013691791e-08, "loss": 0.1938, "step": 12570 }, { "epoch": 1.9236419280795716, "grad_norm": 2.3956553016334556, "learning_rate": 7.63632404549608e-08, "loss": 0.3345, "step": 12571 }, { "epoch": 1.923794950267789, "grad_norm": 1.7449932799207868, "learning_rate": 7.605787022235912e-08, "loss": 0.2398, "step": 12572 }, { "epoch": 1.923947972456006, "grad_norm": 2.2376418795698387, "learning_rate": 7.575310945786452e-08, "loss": 0.284, "step": 12573 }, { "epoch": 1.9241009946442236, "grad_norm": 1.9035723603217567, "learning_rate": 7.54489581801976e-08, "loss": 0.2773, "step": 12574 }, { "epoch": 1.9242540168324407, "grad_norm": 2.2575554720495394, "learning_rate": 7.514541640803342e-08, "loss": 0.2984, "step": 12575 }, { "epoch": 1.924407039020658, "grad_norm": 1.981363852999967, "learning_rate": 7.484248416001594e-08, "loss": 0.2508, "step": 12576 }, { "epoch": 1.9245600612088754, "grad_norm": 1.9614331502072087, "learning_rate": 7.454016145474918e-08, "loss": 0.2651, "step": 12577 }, { "epoch": 1.9247130833970925, "grad_norm": 2.1497780699564566, "learning_rate": 7.423844831079941e-08, "loss": 0.347, "step": 12578 }, { "epoch": 1.92486610558531, "grad_norm": 2.240514112168765, "learning_rate": 7.393734474669623e-08, "loss": 0.2748, "step": 12579 }, { "epoch": 1.9250191277735271, "grad_norm": 1.8984880370754438, "learning_rate": 7.363685078093264e-08, "loss": 0.2359, "step": 12580 }, { "epoch": 1.9251721499617445, "grad_norm": 1.9864328339745811, "learning_rate": 7.333696643196164e-08, "loss": 0.236, "step": 12581 }, { "epoch": 1.9253251721499618, "grad_norm": 2.234747741359613, "learning_rate": 7.303769171820074e-08, "loss": 0.252, "step": 12582 }, { "epoch": 1.925478194338179, "grad_norm": 1.9754636671622134, "learning_rate": 7.273902665802967e-08, "loss": 0.2453, "step": 12583 }, { "epoch": 1.9256312165263965, "grad_norm": 2.146107414003439, "learning_rate": 7.244097126979044e-08, "loss": 0.2792, "step": 12584 }, { "epoch": 1.9257842387146136, "grad_norm": 1.9724035356572298, "learning_rate": 7.214352557178838e-08, "loss": 0.2872, "step": 12585 }, { "epoch": 1.925937260902831, "grad_norm": 1.757835085743604, "learning_rate": 7.184668958229003e-08, "loss": 0.2815, "step": 12586 }, { "epoch": 1.9260902830910482, "grad_norm": 2.1502075931636013, "learning_rate": 7.15504633195263e-08, "loss": 0.2656, "step": 12587 }, { "epoch": 1.9262433052792653, "grad_norm": 2.0155875426810343, "learning_rate": 7.125484680168826e-08, "loss": 0.27, "step": 12588 }, { "epoch": 1.926396327467483, "grad_norm": 1.8428482707965728, "learning_rate": 7.095984004693246e-08, "loss": 0.2136, "step": 12589 }, { "epoch": 1.9265493496557, "grad_norm": 2.2304850376404213, "learning_rate": 7.066544307337442e-08, "loss": 0.2676, "step": 12590 }, { "epoch": 1.9267023718439173, "grad_norm": 2.5671786865674706, "learning_rate": 7.037165589909523e-08, "loss": 0.316, "step": 12591 }, { "epoch": 1.9268553940321347, "grad_norm": 2.2924701688919007, "learning_rate": 7.007847854213822e-08, "loss": 0.2505, "step": 12592 }, { "epoch": 1.9270084162203518, "grad_norm": 2.0974154895266355, "learning_rate": 6.978591102050791e-08, "loss": 0.309, "step": 12593 }, { "epoch": 1.9271614384085693, "grad_norm": 2.1726512258841764, "learning_rate": 6.949395335217102e-08, "loss": 0.3529, "step": 12594 }, { "epoch": 1.9273144605967865, "grad_norm": 2.4202430555156775, "learning_rate": 6.920260555506098e-08, "loss": 0.3122, "step": 12595 }, { "epoch": 1.9274674827850038, "grad_norm": 2.542246552788122, "learning_rate": 6.891186764706681e-08, "loss": 0.2913, "step": 12596 }, { "epoch": 1.9276205049732211, "grad_norm": 2.3066907503970624, "learning_rate": 6.862173964604535e-08, "loss": 0.277, "step": 12597 }, { "epoch": 1.9277735271614385, "grad_norm": 2.4028158038821417, "learning_rate": 6.833222156981567e-08, "loss": 0.2989, "step": 12598 }, { "epoch": 1.9279265493496558, "grad_norm": 2.4353789679467313, "learning_rate": 6.804331343615577e-08, "loss": 0.2893, "step": 12599 }, { "epoch": 1.928079571537873, "grad_norm": 2.2241183604402632, "learning_rate": 6.775501526281037e-08, "loss": 0.272, "step": 12600 }, { "epoch": 1.9282325937260902, "grad_norm": 2.430754290427599, "learning_rate": 6.746732706748637e-08, "loss": 0.2673, "step": 12601 }, { "epoch": 1.9283856159143076, "grad_norm": 2.281050405523614, "learning_rate": 6.718024886784858e-08, "loss": 0.2809, "step": 12602 }, { "epoch": 1.928538638102525, "grad_norm": 2.1745371935601066, "learning_rate": 6.689378068152841e-08, "loss": 0.2604, "step": 12603 }, { "epoch": 1.9286916602907422, "grad_norm": 2.1098243004454025, "learning_rate": 6.660792252612069e-08, "loss": 0.2782, "step": 12604 }, { "epoch": 1.9288446824789593, "grad_norm": 2.298117415399452, "learning_rate": 6.632267441918027e-08, "loss": 0.2785, "step": 12605 }, { "epoch": 1.928997704667177, "grad_norm": 2.486226541978198, "learning_rate": 6.603803637822537e-08, "loss": 0.3168, "step": 12606 }, { "epoch": 1.929150726855394, "grad_norm": 2.236276600745981, "learning_rate": 6.575400842073643e-08, "loss": 0.293, "step": 12607 }, { "epoch": 1.9293037490436113, "grad_norm": 2.3638793794792217, "learning_rate": 6.547059056415617e-08, "loss": 0.2954, "step": 12608 }, { "epoch": 1.9294567712318287, "grad_norm": 2.2428113757322494, "learning_rate": 6.518778282589067e-08, "loss": 0.2923, "step": 12609 }, { "epoch": 1.9296097934200458, "grad_norm": 2.5389227577644284, "learning_rate": 6.490558522330936e-08, "loss": 0.3074, "step": 12610 }, { "epoch": 1.9297628156082633, "grad_norm": 2.090509434099971, "learning_rate": 6.462399777374284e-08, "loss": 0.2688, "step": 12611 }, { "epoch": 1.9299158377964805, "grad_norm": 2.1129234004043957, "learning_rate": 6.434302049448393e-08, "loss": 0.2829, "step": 12612 }, { "epoch": 1.9300688599846978, "grad_norm": 1.8011178532916772, "learning_rate": 6.406265340278772e-08, "loss": 0.2099, "step": 12613 }, { "epoch": 1.9302218821729151, "grad_norm": 2.264044979485212, "learning_rate": 6.378289651587377e-08, "loss": 0.2486, "step": 12614 }, { "epoch": 1.9303749043611322, "grad_norm": 2.1444665625136534, "learning_rate": 6.350374985092389e-08, "loss": 0.2625, "step": 12615 }, { "epoch": 1.9305279265493498, "grad_norm": 1.8452951812548426, "learning_rate": 6.322521342507993e-08, "loss": 0.2091, "step": 12616 }, { "epoch": 1.930680948737567, "grad_norm": 2.2619235464051566, "learning_rate": 6.294728725544708e-08, "loss": 0.2916, "step": 12617 }, { "epoch": 1.9308339709257842, "grad_norm": 1.8293153143516894, "learning_rate": 6.266997135909725e-08, "loss": 0.2575, "step": 12618 }, { "epoch": 1.9309869931140016, "grad_norm": 1.9826796507956632, "learning_rate": 6.239326575305904e-08, "loss": 0.2715, "step": 12619 }, { "epoch": 1.9311400153022187, "grad_norm": 2.3014999251705257, "learning_rate": 6.211717045432553e-08, "loss": 0.3369, "step": 12620 }, { "epoch": 1.9312930374904362, "grad_norm": 2.2258466809607436, "learning_rate": 6.184168547985536e-08, "loss": 0.299, "step": 12621 }, { "epoch": 1.9314460596786533, "grad_norm": 1.994257024190829, "learning_rate": 6.156681084656501e-08, "loss": 0.2588, "step": 12622 }, { "epoch": 1.9315990818668707, "grad_norm": 2.1592430726452148, "learning_rate": 6.129254657133544e-08, "loss": 0.2516, "step": 12623 }, { "epoch": 1.931752104055088, "grad_norm": 1.931950663977846, "learning_rate": 6.101889267101201e-08, "loss": 0.2307, "step": 12624 }, { "epoch": 1.9319051262433051, "grad_norm": 2.3942192745722406, "learning_rate": 6.074584916239911e-08, "loss": 0.3507, "step": 12625 }, { "epoch": 1.9320581484315227, "grad_norm": 2.098354148192397, "learning_rate": 6.047341606226664e-08, "loss": 0.3061, "step": 12626 }, { "epoch": 1.9322111706197398, "grad_norm": 2.163742519632644, "learning_rate": 6.020159338734566e-08, "loss": 0.2322, "step": 12627 }, { "epoch": 1.9323641928079571, "grad_norm": 2.3218334894180708, "learning_rate": 5.993038115432837e-08, "loss": 0.3174, "step": 12628 }, { "epoch": 1.9325172149961745, "grad_norm": 1.7732066918572658, "learning_rate": 5.965977937987366e-08, "loss": 0.2302, "step": 12629 }, { "epoch": 1.9326702371843916, "grad_norm": 2.000109135862685, "learning_rate": 5.938978808059825e-08, "loss": 0.2409, "step": 12630 }, { "epoch": 1.9328232593726091, "grad_norm": 2.333789338782402, "learning_rate": 5.9120407273082215e-08, "loss": 0.345, "step": 12631 }, { "epoch": 1.9329762815608262, "grad_norm": 1.90728146018191, "learning_rate": 5.8851636973872306e-08, "loss": 0.2432, "step": 12632 }, { "epoch": 1.9331293037490436, "grad_norm": 2.208001178052822, "learning_rate": 5.8583477199474213e-08, "loss": 0.294, "step": 12633 }, { "epoch": 1.933282325937261, "grad_norm": 2.354523824473684, "learning_rate": 5.831592796635477e-08, "loss": 0.3385, "step": 12634 }, { "epoch": 1.9334353481254782, "grad_norm": 1.8312645826200542, "learning_rate": 5.804898929094638e-08, "loss": 0.2109, "step": 12635 }, { "epoch": 1.9335883703136956, "grad_norm": 2.0830635861639366, "learning_rate": 5.77826611896426e-08, "loss": 0.2663, "step": 12636 }, { "epoch": 1.9337413925019127, "grad_norm": 2.419482269865649, "learning_rate": 5.751694367879923e-08, "loss": 0.2717, "step": 12637 }, { "epoch": 1.93389441469013, "grad_norm": 2.0657515526693464, "learning_rate": 5.725183677473656e-08, "loss": 0.2888, "step": 12638 }, { "epoch": 1.9340474368783473, "grad_norm": 2.0596978612467565, "learning_rate": 5.698734049373489e-08, "loss": 0.2544, "step": 12639 }, { "epoch": 1.9342004590665647, "grad_norm": 2.5250076265493955, "learning_rate": 5.6723454852036785e-08, "loss": 0.3538, "step": 12640 }, { "epoch": 1.934353481254782, "grad_norm": 2.2943276103623536, "learning_rate": 5.64601798658504e-08, "loss": 0.2966, "step": 12641 }, { "epoch": 1.9345065034429991, "grad_norm": 2.1423583068445087, "learning_rate": 5.6197515551343895e-08, "loss": 0.3187, "step": 12642 }, { "epoch": 1.9346595256312167, "grad_norm": 2.123351982906415, "learning_rate": 5.593546192464883e-08, "loss": 0.2805, "step": 12643 }, { "epoch": 1.9348125478194338, "grad_norm": 2.1385231757290257, "learning_rate": 5.567401900185787e-08, "loss": 0.2985, "step": 12644 }, { "epoch": 1.9349655700076511, "grad_norm": 2.283449728780997, "learning_rate": 5.541318679902707e-08, "loss": 0.3114, "step": 12645 }, { "epoch": 1.9351185921958685, "grad_norm": 1.9670750492646343, "learning_rate": 5.515296533217696e-08, "loss": 0.2487, "step": 12646 }, { "epoch": 1.9352716143840856, "grad_norm": 2.095455321897977, "learning_rate": 5.4893354617286954e-08, "loss": 0.2793, "step": 12647 }, { "epoch": 1.9354246365723031, "grad_norm": 1.9765462462707317, "learning_rate": 5.4634354670302095e-08, "loss": 0.3064, "step": 12648 }, { "epoch": 1.9355776587605202, "grad_norm": 1.8820560851270636, "learning_rate": 5.437596550712743e-08, "loss": 0.2657, "step": 12649 }, { "epoch": 1.9357306809487376, "grad_norm": 2.3788353271474634, "learning_rate": 5.411818714363248e-08, "loss": 0.3056, "step": 12650 }, { "epoch": 1.935883703136955, "grad_norm": 2.2261586061883953, "learning_rate": 5.3861019595649045e-08, "loss": 0.3066, "step": 12651 }, { "epoch": 1.936036725325172, "grad_norm": 1.785826281369035, "learning_rate": 5.360446287896781e-08, "loss": 0.2866, "step": 12652 }, { "epoch": 1.9361897475133896, "grad_norm": 2.331191024955345, "learning_rate": 5.334851700934951e-08, "loss": 0.3063, "step": 12653 }, { "epoch": 1.9363427697016067, "grad_norm": 1.8719935808142152, "learning_rate": 5.309318200250824e-08, "loss": 0.2523, "step": 12654 }, { "epoch": 1.936495791889824, "grad_norm": 2.223253250317612, "learning_rate": 5.283845787412811e-08, "loss": 0.3068, "step": 12655 }, { "epoch": 1.9366488140780413, "grad_norm": 2.1951752055550853, "learning_rate": 5.258434463985218e-08, "loss": 0.2619, "step": 12656 }, { "epoch": 1.9368018362662585, "grad_norm": 2.059501912533121, "learning_rate": 5.233084231528574e-08, "loss": 0.2819, "step": 12657 }, { "epoch": 1.936954858454476, "grad_norm": 2.2992385321273896, "learning_rate": 5.207795091599743e-08, "loss": 0.2655, "step": 12658 }, { "epoch": 1.9371078806426931, "grad_norm": 2.034712783110768, "learning_rate": 5.182567045751929e-08, "loss": 0.2648, "step": 12659 }, { "epoch": 1.9372609028309105, "grad_norm": 2.0115061960293747, "learning_rate": 5.157400095534337e-08, "loss": 0.2186, "step": 12660 }, { "epoch": 1.9374139250191278, "grad_norm": 1.9064111987400696, "learning_rate": 5.1322942424927303e-08, "loss": 0.2377, "step": 12661 }, { "epoch": 1.937566947207345, "grad_norm": 2.1764256108666236, "learning_rate": 5.107249488168875e-08, "loss": 0.247, "step": 12662 }, { "epoch": 1.9377199693955625, "grad_norm": 1.7123673286799752, "learning_rate": 5.082265834100875e-08, "loss": 0.2426, "step": 12663 }, { "epoch": 1.9378729915837796, "grad_norm": 2.3757105271033483, "learning_rate": 5.0573432818229464e-08, "loss": 0.2783, "step": 12664 }, { "epoch": 1.938026013771997, "grad_norm": 2.0579111088781223, "learning_rate": 5.0324818328659766e-08, "loss": 0.2608, "step": 12665 }, { "epoch": 1.9381790359602142, "grad_norm": 2.160905591204374, "learning_rate": 5.007681488756522e-08, "loss": 0.2706, "step": 12666 }, { "epoch": 1.9383320581484316, "grad_norm": 2.2877271140280393, "learning_rate": 4.982942251017808e-08, "loss": 0.2357, "step": 12667 }, { "epoch": 1.938485080336649, "grad_norm": 2.1542809755239216, "learning_rate": 4.958264121169176e-08, "loss": 0.2625, "step": 12668 }, { "epoch": 1.938638102524866, "grad_norm": 2.059582513808786, "learning_rate": 4.93364710072608e-08, "loss": 0.2883, "step": 12669 }, { "epoch": 1.9387911247130833, "grad_norm": 2.1724063331417636, "learning_rate": 4.9090911912005325e-08, "loss": 0.2804, "step": 12670 }, { "epoch": 1.9389441469013007, "grad_norm": 2.076507363841404, "learning_rate": 4.884596394100438e-08, "loss": 0.2798, "step": 12671 }, { "epoch": 1.939097169089518, "grad_norm": 2.1280474655352286, "learning_rate": 4.8601627109302605e-08, "loss": 0.3037, "step": 12672 }, { "epoch": 1.9392501912777353, "grad_norm": 2.191630607477236, "learning_rate": 4.835790143190466e-08, "loss": 0.2652, "step": 12673 }, { "epoch": 1.9394032134659525, "grad_norm": 2.017617923200027, "learning_rate": 4.8114786923778575e-08, "loss": 0.2702, "step": 12674 }, { "epoch": 1.93955623565417, "grad_norm": 2.1656108843312754, "learning_rate": 4.787228359985685e-08, "loss": 0.2703, "step": 12675 }, { "epoch": 1.9397092578423871, "grad_norm": 2.056305398421697, "learning_rate": 4.7630391475029794e-08, "loss": 0.2748, "step": 12676 }, { "epoch": 1.9398622800306045, "grad_norm": 2.0212531782535934, "learning_rate": 4.7389110564154407e-08, "loss": 0.2648, "step": 12677 }, { "epoch": 1.9400153022188218, "grad_norm": 2.183826196568856, "learning_rate": 4.714844088204884e-08, "loss": 0.2861, "step": 12678 }, { "epoch": 1.940168324407039, "grad_norm": 1.9595429105828446, "learning_rate": 4.6908382443493496e-08, "loss": 0.2806, "step": 12679 }, { "epoch": 1.9403213465952565, "grad_norm": 2.104117403711713, "learning_rate": 4.666893526323102e-08, "loss": 0.3612, "step": 12680 }, { "epoch": 1.9404743687834736, "grad_norm": 2.3085008583768447, "learning_rate": 4.643009935596632e-08, "loss": 0.3073, "step": 12681 }, { "epoch": 1.940627390971691, "grad_norm": 2.146451918561183, "learning_rate": 4.6191874736367656e-08, "loss": 0.2558, "step": 12682 }, { "epoch": 1.9407804131599082, "grad_norm": 2.3192462362711734, "learning_rate": 4.595426141906667e-08, "loss": 0.3116, "step": 12683 }, { "epoch": 1.9409334353481253, "grad_norm": 2.4574745152024793, "learning_rate": 4.57172594186539e-08, "loss": 0.2959, "step": 12684 }, { "epoch": 1.941086457536343, "grad_norm": 1.9916261660219228, "learning_rate": 4.54808687496866e-08, "loss": 0.2328, "step": 12685 }, { "epoch": 1.94123947972456, "grad_norm": 1.8992522773265608, "learning_rate": 4.5245089426680934e-08, "loss": 0.2541, "step": 12686 }, { "epoch": 1.9413925019127773, "grad_norm": 1.8079036077768846, "learning_rate": 4.500992146411753e-08, "loss": 0.2135, "step": 12687 }, { "epoch": 1.9415455241009947, "grad_norm": 1.9889511801750943, "learning_rate": 4.4775364876438185e-08, "loss": 0.2427, "step": 12688 }, { "epoch": 1.9416985462892118, "grad_norm": 1.751479263129326, "learning_rate": 4.454141967805026e-08, "loss": 0.2189, "step": 12689 }, { "epoch": 1.9418515684774293, "grad_norm": 2.0423292553879957, "learning_rate": 4.430808588331892e-08, "loss": 0.257, "step": 12690 }, { "epoch": 1.9420045906656465, "grad_norm": 2.0041499310977504, "learning_rate": 4.407536350657493e-08, "loss": 0.2035, "step": 12691 }, { "epoch": 1.9421576128538638, "grad_norm": 2.312755751955661, "learning_rate": 4.3843252562110193e-08, "loss": 0.2828, "step": 12692 }, { "epoch": 1.9423106350420811, "grad_norm": 1.971954947422424, "learning_rate": 4.361175306418109e-08, "loss": 0.2417, "step": 12693 }, { "epoch": 1.9424636572302982, "grad_norm": 1.831074912801939, "learning_rate": 4.33808650270029e-08, "loss": 0.2528, "step": 12694 }, { "epoch": 1.9426166794185158, "grad_norm": 2.0578830783806525, "learning_rate": 4.3150588464756504e-08, "loss": 0.2771, "step": 12695 }, { "epoch": 1.942769701606733, "grad_norm": 1.961835942990468, "learning_rate": 4.2920923391583937e-08, "loss": 0.221, "step": 12696 }, { "epoch": 1.9429227237949502, "grad_norm": 1.9705490121987508, "learning_rate": 4.269186982159057e-08, "loss": 0.2571, "step": 12697 }, { "epoch": 1.9430757459831676, "grad_norm": 2.0673965559846312, "learning_rate": 4.246342776884182e-08, "loss": 0.2342, "step": 12698 }, { "epoch": 1.943228768171385, "grad_norm": 2.027833565441114, "learning_rate": 4.223559724736759e-08, "loss": 0.2193, "step": 12699 }, { "epoch": 1.9433817903596022, "grad_norm": 1.928042773055958, "learning_rate": 4.200837827116111e-08, "loss": 0.2249, "step": 12700 }, { "epoch": 1.9435348125478193, "grad_norm": 2.0145095411771186, "learning_rate": 4.1781770854175674e-08, "loss": 0.2194, "step": 12701 }, { "epoch": 1.9436878347360367, "grad_norm": 2.0755925996216606, "learning_rate": 4.1555775010329034e-08, "loss": 0.2577, "step": 12702 }, { "epoch": 1.943840856924254, "grad_norm": 2.1088247690311714, "learning_rate": 4.1330390753500094e-08, "loss": 0.28, "step": 12703 }, { "epoch": 1.9439938791124713, "grad_norm": 2.070960100346576, "learning_rate": 4.110561809752889e-08, "loss": 0.2509, "step": 12704 }, { "epoch": 1.9441469013006887, "grad_norm": 2.0891100512185603, "learning_rate": 4.0881457056222153e-08, "loss": 0.2513, "step": 12705 }, { "epoch": 1.9442999234889058, "grad_norm": 2.5962353577847566, "learning_rate": 4.065790764334554e-08, "loss": 0.3419, "step": 12706 }, { "epoch": 1.9444529456771233, "grad_norm": 1.9640071319967864, "learning_rate": 4.043496987262696e-08, "loss": 0.2708, "step": 12707 }, { "epoch": 1.9446059678653405, "grad_norm": 2.179001565244998, "learning_rate": 4.021264375775991e-08, "loss": 0.3385, "step": 12708 }, { "epoch": 1.9447589900535578, "grad_norm": 2.1426220088913954, "learning_rate": 3.99909293123979e-08, "loss": 0.2687, "step": 12709 }, { "epoch": 1.9449120122417751, "grad_norm": 1.8754076871641312, "learning_rate": 3.97698265501556e-08, "loss": 0.2513, "step": 12710 }, { "epoch": 1.9450650344299922, "grad_norm": 2.042815422922247, "learning_rate": 3.954933548461326e-08, "loss": 0.2162, "step": 12711 }, { "epoch": 1.9452180566182098, "grad_norm": 1.8552967583474373, "learning_rate": 3.932945612931227e-08, "loss": 0.2369, "step": 12712 }, { "epoch": 1.945371078806427, "grad_norm": 1.907696643956236, "learning_rate": 3.911018849775405e-08, "loss": 0.2624, "step": 12713 }, { "epoch": 1.9455241009946442, "grad_norm": 2.0798771441829116, "learning_rate": 3.8891532603407834e-08, "loss": 0.2829, "step": 12714 }, { "epoch": 1.9456771231828616, "grad_norm": 2.3790723531215257, "learning_rate": 3.867348845970065e-08, "loss": 0.3033, "step": 12715 }, { "epoch": 1.9458301453710787, "grad_norm": 1.8852530500177367, "learning_rate": 3.84560560800229e-08, "loss": 0.228, "step": 12716 }, { "epoch": 1.9459831675592962, "grad_norm": 2.1820151584706613, "learning_rate": 3.8239235477728344e-08, "loss": 0.3082, "step": 12717 }, { "epoch": 1.9461361897475133, "grad_norm": 2.2140014660667933, "learning_rate": 3.8023026666133e-08, "loss": 0.2537, "step": 12718 }, { "epoch": 1.9462892119357307, "grad_norm": 2.2659188032539195, "learning_rate": 3.7807429658516246e-08, "loss": 0.2632, "step": 12719 }, { "epoch": 1.946442234123948, "grad_norm": 2.5293218796462464, "learning_rate": 3.7592444468116384e-08, "loss": 0.2743, "step": 12720 }, { "epoch": 1.9465952563121651, "grad_norm": 1.8671996561905655, "learning_rate": 3.7378071108138405e-08, "loss": 0.2409, "step": 12721 }, { "epoch": 1.9467482785003827, "grad_norm": 2.239141860501856, "learning_rate": 3.716430959174622e-08, "loss": 0.3049, "step": 12722 }, { "epoch": 1.9469013006885998, "grad_norm": 1.909309810127325, "learning_rate": 3.6951159932069323e-08, "loss": 0.2702, "step": 12723 }, { "epoch": 1.9470543228768171, "grad_norm": 2.1027549057800097, "learning_rate": 3.673862214219726e-08, "loss": 0.2468, "step": 12724 }, { "epoch": 1.9472073450650345, "grad_norm": 1.864832404974843, "learning_rate": 3.65266962351829e-08, "loss": 0.223, "step": 12725 }, { "epoch": 1.9473603672532516, "grad_norm": 1.7866644661828868, "learning_rate": 3.6315382224041406e-08, "loss": 0.2208, "step": 12726 }, { "epoch": 1.9475133894414691, "grad_norm": 2.125342586374641, "learning_rate": 3.610468012175017e-08, "loss": 0.3016, "step": 12727 }, { "epoch": 1.9476664116296862, "grad_norm": 2.676246515950065, "learning_rate": 3.5894589941248835e-08, "loss": 0.3001, "step": 12728 }, { "epoch": 1.9478194338179036, "grad_norm": 2.1133835150212024, "learning_rate": 3.568511169544153e-08, "loss": 0.3017, "step": 12729 }, { "epoch": 1.947972456006121, "grad_norm": 1.9531071847013737, "learning_rate": 3.547624539719241e-08, "loss": 0.2712, "step": 12730 }, { "epoch": 1.9481254781943382, "grad_norm": 2.3816136624101403, "learning_rate": 3.526799105932788e-08, "loss": 0.3098, "step": 12731 }, { "epoch": 1.9482785003825556, "grad_norm": 2.0472889114981436, "learning_rate": 3.506034869463881e-08, "loss": 0.2275, "step": 12732 }, { "epoch": 1.9484315225707727, "grad_norm": 2.005149594082394, "learning_rate": 3.4853318315876126e-08, "loss": 0.28, "step": 12733 }, { "epoch": 1.94858454475899, "grad_norm": 2.307715143977197, "learning_rate": 3.4646899935755205e-08, "loss": 0.3267, "step": 12734 }, { "epoch": 1.9487375669472073, "grad_norm": 2.6018657290133205, "learning_rate": 3.4441093566953685e-08, "loss": 0.2484, "step": 12735 }, { "epoch": 1.9488905891354247, "grad_norm": 2.2228026058532255, "learning_rate": 3.4235899222108126e-08, "loss": 0.2843, "step": 12736 }, { "epoch": 1.949043611323642, "grad_norm": 2.278609420116441, "learning_rate": 3.4031316913824e-08, "loss": 0.2884, "step": 12737 }, { "epoch": 1.9491966335118591, "grad_norm": 2.3154914775864954, "learning_rate": 3.3827346654663473e-08, "loss": 0.324, "step": 12738 }, { "epoch": 1.9493496557000765, "grad_norm": 2.6375485637022438, "learning_rate": 3.36239884571532e-08, "loss": 0.2916, "step": 12739 }, { "epoch": 1.9495026778882938, "grad_norm": 1.9118207499010522, "learning_rate": 3.342124233378319e-08, "loss": 0.2729, "step": 12740 }, { "epoch": 1.9496557000765111, "grad_norm": 1.9701216325844941, "learning_rate": 3.3219108297003476e-08, "loss": 0.2535, "step": 12741 }, { "epoch": 1.9498087222647285, "grad_norm": 2.2188220603731286, "learning_rate": 3.3017586359228584e-08, "loss": 0.2926, "step": 12742 }, { "epoch": 1.9499617444529456, "grad_norm": 1.828588995912328, "learning_rate": 3.281667653283416e-08, "loss": 0.2301, "step": 12743 }, { "epoch": 1.9501147666411631, "grad_norm": 2.072114708886476, "learning_rate": 3.2616378830160336e-08, "loss": 0.2664, "step": 12744 }, { "epoch": 1.9502677888293802, "grad_norm": 2.09207507253338, "learning_rate": 3.241669326350727e-08, "loss": 0.2392, "step": 12745 }, { "epoch": 1.9504208110175976, "grad_norm": 2.222014245372224, "learning_rate": 3.221761984513849e-08, "loss": 0.3022, "step": 12746 }, { "epoch": 1.950573833205815, "grad_norm": 2.4473576328151823, "learning_rate": 3.2019158587279773e-08, "loss": 0.3092, "step": 12747 }, { "epoch": 1.950726855394032, "grad_norm": 2.1372539044169256, "learning_rate": 3.1821309502119146e-08, "loss": 0.2586, "step": 12748 }, { "epoch": 1.9508798775822496, "grad_norm": 2.2224469014528383, "learning_rate": 3.1624072601808e-08, "loss": 0.3213, "step": 12749 }, { "epoch": 1.9510328997704667, "grad_norm": 2.268220714282305, "learning_rate": 3.142744789845997e-08, "loss": 0.2969, "step": 12750 }, { "epoch": 1.951185921958684, "grad_norm": 2.2497531369255426, "learning_rate": 3.123143540414875e-08, "loss": 0.2996, "step": 12751 }, { "epoch": 1.9513389441469013, "grad_norm": 2.0618539697503935, "learning_rate": 3.1036035130912464e-08, "loss": 0.3029, "step": 12752 }, { "epoch": 1.9514919663351185, "grad_norm": 1.9194248768655344, "learning_rate": 3.0841247090752646e-08, "loss": 0.2567, "step": 12753 }, { "epoch": 1.951644988523336, "grad_norm": 2.2237292380811384, "learning_rate": 3.064707129563194e-08, "loss": 0.2751, "step": 12754 }, { "epoch": 1.9517980107115531, "grad_norm": 2.1877555347738062, "learning_rate": 3.045350775747413e-08, "loss": 0.2889, "step": 12755 }, { "epoch": 1.9519510328997705, "grad_norm": 2.1896793457465074, "learning_rate": 3.02605564881675e-08, "loss": 0.2728, "step": 12756 }, { "epoch": 1.9521040550879878, "grad_norm": 1.6954979190860753, "learning_rate": 3.006821749956146e-08, "loss": 0.2552, "step": 12757 }, { "epoch": 1.952257077276205, "grad_norm": 2.1512376385993712, "learning_rate": 2.9876490803469884e-08, "loss": 0.2612, "step": 12758 }, { "epoch": 1.9524100994644225, "grad_norm": 2.4529291510609856, "learning_rate": 2.968537641166669e-08, "loss": 0.3468, "step": 12759 }, { "epoch": 1.9525631216526396, "grad_norm": 2.25080392769247, "learning_rate": 2.9494874335886935e-08, "loss": 0.2853, "step": 12760 }, { "epoch": 1.952716143840857, "grad_norm": 2.520938479932384, "learning_rate": 2.9304984587833485e-08, "loss": 0.3053, "step": 12761 }, { "epoch": 1.9528691660290742, "grad_norm": 2.014803782393137, "learning_rate": 2.9115707179165898e-08, "loss": 0.2684, "step": 12762 }, { "epoch": 1.9530221882172913, "grad_norm": 2.2036033789059104, "learning_rate": 2.8927042121508208e-08, "loss": 0.2638, "step": 12763 }, { "epoch": 1.953175210405509, "grad_norm": 2.2003394728145618, "learning_rate": 2.8738989426448928e-08, "loss": 0.3004, "step": 12764 }, { "epoch": 1.953328232593726, "grad_norm": 1.9303823675832934, "learning_rate": 2.855154910553548e-08, "loss": 0.2712, "step": 12765 }, { "epoch": 1.9534812547819433, "grad_norm": 2.1463555044250775, "learning_rate": 2.8364721170280883e-08, "loss": 0.2339, "step": 12766 }, { "epoch": 1.9536342769701607, "grad_norm": 2.2150266187516934, "learning_rate": 2.817850563215707e-08, "loss": 0.3033, "step": 12767 }, { "epoch": 1.953787299158378, "grad_norm": 2.258512569394117, "learning_rate": 2.799290250260156e-08, "loss": 0.2643, "step": 12768 }, { "epoch": 1.9539403213465953, "grad_norm": 2.300631573365148, "learning_rate": 2.780791179301301e-08, "loss": 0.3314, "step": 12769 }, { "epoch": 1.9540933435348125, "grad_norm": 2.148802915378045, "learning_rate": 2.7623533514751223e-08, "loss": 0.2894, "step": 12770 }, { "epoch": 1.9542463657230298, "grad_norm": 2.5611381404597, "learning_rate": 2.7439767679140474e-08, "loss": 0.2613, "step": 12771 }, { "epoch": 1.9543993879112471, "grad_norm": 1.7155427881312075, "learning_rate": 2.7256614297467287e-08, "loss": 0.2885, "step": 12772 }, { "epoch": 1.9545524100994645, "grad_norm": 1.8547576958237793, "learning_rate": 2.707407338097823e-08, "loss": 0.253, "step": 12773 }, { "epoch": 1.9547054322876818, "grad_norm": 2.401444734532838, "learning_rate": 2.6892144940885435e-08, "loss": 0.3416, "step": 12774 }, { "epoch": 1.954858454475899, "grad_norm": 2.3311045666651102, "learning_rate": 2.6710828988359973e-08, "loss": 0.3475, "step": 12775 }, { "epoch": 1.9550114766641165, "grad_norm": 2.233725116048879, "learning_rate": 2.653012553453849e-08, "loss": 0.2838, "step": 12776 }, { "epoch": 1.9551644988523336, "grad_norm": 2.06475531213535, "learning_rate": 2.635003459051877e-08, "loss": 0.3182, "step": 12777 }, { "epoch": 1.955317521040551, "grad_norm": 2.2259592026875135, "learning_rate": 2.617055616736086e-08, "loss": 0.2861, "step": 12778 }, { "epoch": 1.9554705432287682, "grad_norm": 2.3905401922864216, "learning_rate": 2.5991690276087056e-08, "loss": 0.297, "step": 12779 }, { "epoch": 1.9556235654169853, "grad_norm": 1.7548109646120047, "learning_rate": 2.581343692768079e-08, "loss": 0.1689, "step": 12780 }, { "epoch": 1.955776587605203, "grad_norm": 2.3205852966806804, "learning_rate": 2.5635796133091084e-08, "loss": 0.2712, "step": 12781 }, { "epoch": 1.95592960979342, "grad_norm": 2.076512690272171, "learning_rate": 2.54587679032281e-08, "loss": 0.2737, "step": 12782 }, { "epoch": 1.9560826319816373, "grad_norm": 1.9197513713440644, "learning_rate": 2.528235224896203e-08, "loss": 0.2378, "step": 12783 }, { "epoch": 1.9562356541698547, "grad_norm": 2.137377531971493, "learning_rate": 2.5106549181127536e-08, "loss": 0.2342, "step": 12784 }, { "epoch": 1.9563886763580718, "grad_norm": 2.4352962597543413, "learning_rate": 2.4931358710522657e-08, "loss": 0.2878, "step": 12785 }, { "epoch": 1.9565416985462893, "grad_norm": 2.244106347035568, "learning_rate": 2.4756780847905448e-08, "loss": 0.2981, "step": 12786 }, { "epoch": 1.9566947207345065, "grad_norm": 2.212890440083552, "learning_rate": 2.4582815603998445e-08, "loss": 0.2592, "step": 12787 }, { "epoch": 1.9568477429227238, "grad_norm": 2.0677355320744435, "learning_rate": 2.440946298948421e-08, "loss": 0.2614, "step": 12788 }, { "epoch": 1.9570007651109411, "grad_norm": 2.185133311922832, "learning_rate": 2.4236723015008678e-08, "loss": 0.2937, "step": 12789 }, { "epoch": 1.9571537872991582, "grad_norm": 2.16851766484783, "learning_rate": 2.4064595691182247e-08, "loss": 0.2739, "step": 12790 }, { "epoch": 1.9573068094873758, "grad_norm": 2.0968905865267, "learning_rate": 2.3893081028575348e-08, "loss": 0.2871, "step": 12791 }, { "epoch": 1.957459831675593, "grad_norm": 2.1826172797738552, "learning_rate": 2.3722179037720673e-08, "loss": 0.287, "step": 12792 }, { "epoch": 1.9576128538638102, "grad_norm": 2.0634184984657478, "learning_rate": 2.3551889729114264e-08, "loss": 0.2339, "step": 12793 }, { "epoch": 1.9577658760520276, "grad_norm": 2.3855698367985756, "learning_rate": 2.3382213113214426e-08, "loss": 0.2683, "step": 12794 }, { "epoch": 1.9579188982402447, "grad_norm": 2.2611003903495996, "learning_rate": 2.321314920044171e-08, "loss": 0.2936, "step": 12795 }, { "epoch": 1.9580719204284622, "grad_norm": 2.1631865336468046, "learning_rate": 2.3044698001178923e-08, "loss": 0.295, "step": 12796 }, { "epoch": 1.9582249426166793, "grad_norm": 2.012992323941021, "learning_rate": 2.287685952577223e-08, "loss": 0.3247, "step": 12797 }, { "epoch": 1.9583779648048967, "grad_norm": 2.2715497490794054, "learning_rate": 2.270963378452673e-08, "loss": 0.2826, "step": 12798 }, { "epoch": 1.958530986993114, "grad_norm": 2.3168577682957023, "learning_rate": 2.2543020787715308e-08, "loss": 0.3328, "step": 12799 }, { "epoch": 1.9586840091813313, "grad_norm": 2.0418637103355786, "learning_rate": 2.2377020545568673e-08, "loss": 0.2509, "step": 12800 }, { "epoch": 1.9588370313695487, "grad_norm": 2.1613693720849323, "learning_rate": 2.221163306828089e-08, "loss": 0.2569, "step": 12801 }, { "epoch": 1.9589900535577658, "grad_norm": 1.972935734881763, "learning_rate": 2.2046858366010503e-08, "loss": 0.2469, "step": 12802 }, { "epoch": 1.9591430757459831, "grad_norm": 1.8252344132384086, "learning_rate": 2.1882696448877193e-08, "loss": 0.2564, "step": 12803 }, { "epoch": 1.9592960979342005, "grad_norm": 1.8533980617714614, "learning_rate": 2.1719147326961786e-08, "loss": 0.2677, "step": 12804 }, { "epoch": 1.9594491201224178, "grad_norm": 1.9106183949752555, "learning_rate": 2.155621101030847e-08, "loss": 0.3246, "step": 12805 }, { "epoch": 1.9596021423106351, "grad_norm": 2.4116686758652293, "learning_rate": 2.1393887508924794e-08, "loss": 0.2986, "step": 12806 }, { "epoch": 1.9597551644988522, "grad_norm": 2.0370580101783755, "learning_rate": 2.123217683277834e-08, "loss": 0.2389, "step": 12807 }, { "epoch": 1.9599081866870698, "grad_norm": 2.3374821126533636, "learning_rate": 2.1071078991801164e-08, "loss": 0.2636, "step": 12808 }, { "epoch": 1.960061208875287, "grad_norm": 1.7950419140596388, "learning_rate": 2.0910593995887573e-08, "loss": 0.2105, "step": 12809 }, { "epoch": 1.9602142310635042, "grad_norm": 2.073353178793678, "learning_rate": 2.075072185489191e-08, "loss": 0.2332, "step": 12810 }, { "epoch": 1.9603672532517216, "grad_norm": 2.4863571935424273, "learning_rate": 2.059146257863409e-08, "loss": 0.308, "step": 12811 }, { "epoch": 1.9605202754399387, "grad_norm": 2.2423145973838268, "learning_rate": 2.0432816176894075e-08, "loss": 0.2668, "step": 12812 }, { "epoch": 1.9606732976281562, "grad_norm": 2.2613131124583283, "learning_rate": 2.0274782659414072e-08, "loss": 0.2899, "step": 12813 }, { "epoch": 1.9608263198163733, "grad_norm": 2.2129547501462046, "learning_rate": 2.0117362035901866e-08, "loss": 0.2971, "step": 12814 }, { "epoch": 1.9609793420045907, "grad_norm": 2.096062489754385, "learning_rate": 1.9960554316024174e-08, "loss": 0.2741, "step": 12815 }, { "epoch": 1.961132364192808, "grad_norm": 1.853363747759997, "learning_rate": 1.9804359509409954e-08, "loss": 0.2461, "step": 12816 }, { "epoch": 1.9612853863810251, "grad_norm": 2.1127207539609905, "learning_rate": 1.9648777625653757e-08, "loss": 0.2984, "step": 12817 }, { "epoch": 1.9614384085692427, "grad_norm": 1.8244916883822468, "learning_rate": 1.9493808674307945e-08, "loss": 0.2208, "step": 12818 }, { "epoch": 1.9615914307574598, "grad_norm": 1.9379785767665714, "learning_rate": 1.9339452664892677e-08, "loss": 0.2565, "step": 12819 }, { "epoch": 1.9617444529456771, "grad_norm": 1.971258507742363, "learning_rate": 1.918570960688593e-08, "loss": 0.2639, "step": 12820 }, { "epoch": 1.9618974751338945, "grad_norm": 2.206834865901176, "learning_rate": 1.9032579509729033e-08, "loss": 0.2336, "step": 12821 }, { "epoch": 1.9620504973221116, "grad_norm": 2.2371951125913694, "learning_rate": 1.88800623828278e-08, "loss": 0.2632, "step": 12822 }, { "epoch": 1.9622035195103291, "grad_norm": 2.214750729742328, "learning_rate": 1.8728158235549187e-08, "loss": 0.3168, "step": 12823 }, { "epoch": 1.9623565416985462, "grad_norm": 1.8243923581833847, "learning_rate": 1.8576867077220174e-08, "loss": 0.2238, "step": 12824 }, { "epoch": 1.9625095638867636, "grad_norm": 2.2820739863423527, "learning_rate": 1.8426188917133324e-08, "loss": 0.2798, "step": 12825 }, { "epoch": 1.962662586074981, "grad_norm": 2.3473441828274417, "learning_rate": 1.8276123764543463e-08, "loss": 0.3383, "step": 12826 }, { "epoch": 1.962815608263198, "grad_norm": 1.810468033827546, "learning_rate": 1.8126671628664328e-08, "loss": 0.2646, "step": 12827 }, { "epoch": 1.9629686304514156, "grad_norm": 2.0161550530630774, "learning_rate": 1.7977832518676354e-08, "loss": 0.2554, "step": 12828 }, { "epoch": 1.9631216526396327, "grad_norm": 2.0057193127755526, "learning_rate": 1.7829606443718893e-08, "loss": 0.2195, "step": 12829 }, { "epoch": 1.96327467482785, "grad_norm": 1.9895992190324854, "learning_rate": 1.7681993412895782e-08, "loss": 0.1972, "step": 12830 }, { "epoch": 1.9634276970160673, "grad_norm": 2.424438500859323, "learning_rate": 1.7534993435273095e-08, "loss": 0.284, "step": 12831 }, { "epoch": 1.9635807192042847, "grad_norm": 2.342021551362485, "learning_rate": 1.738860651987806e-08, "loss": 0.2584, "step": 12832 }, { "epoch": 1.963733741392502, "grad_norm": 1.952229890512349, "learning_rate": 1.724283267570126e-08, "loss": 0.3085, "step": 12833 }, { "epoch": 1.9638867635807191, "grad_norm": 2.042895088129427, "learning_rate": 1.7097671911693315e-08, "loss": 0.236, "step": 12834 }, { "epoch": 1.9640397857689365, "grad_norm": 2.325198304496035, "learning_rate": 1.6953124236772645e-08, "loss": 0.2503, "step": 12835 }, { "epoch": 1.9641928079571538, "grad_norm": 2.1434028335453803, "learning_rate": 1.6809189659813264e-08, "loss": 0.2563, "step": 12836 }, { "epoch": 1.9643458301453711, "grad_norm": 2.7341650968690705, "learning_rate": 1.6665868189655878e-08, "loss": 0.2641, "step": 12837 }, { "epoch": 1.9644988523335885, "grad_norm": 2.1110716721025806, "learning_rate": 1.652315983510344e-08, "loss": 0.2664, "step": 12838 }, { "epoch": 1.9646518745218056, "grad_norm": 2.4763953852189524, "learning_rate": 1.638106460491895e-08, "loss": 0.3259, "step": 12839 }, { "epoch": 1.9648048967100231, "grad_norm": 1.9515477357591082, "learning_rate": 1.6239582507828754e-08, "loss": 0.2274, "step": 12840 }, { "epoch": 1.9649579188982402, "grad_norm": 2.1874600207771144, "learning_rate": 1.6098713552523683e-08, "loss": 0.3048, "step": 12841 }, { "epoch": 1.9651109410864576, "grad_norm": 2.163087834030164, "learning_rate": 1.5958457747652368e-08, "loss": 0.2767, "step": 12842 }, { "epoch": 1.965263963274675, "grad_norm": 2.405722917761059, "learning_rate": 1.5818815101831252e-08, "loss": 0.2629, "step": 12843 }, { "epoch": 1.965416985462892, "grad_norm": 2.3389774387611086, "learning_rate": 1.567978562363459e-08, "loss": 0.2692, "step": 12844 }, { "epoch": 1.9655700076511096, "grad_norm": 1.774637055198869, "learning_rate": 1.5541369321601107e-08, "loss": 0.2302, "step": 12845 }, { "epoch": 1.9657230298393267, "grad_norm": 2.2203580711233815, "learning_rate": 1.5403566204231777e-08, "loss": 0.2742, "step": 12846 }, { "epoch": 1.965876052027544, "grad_norm": 2.0022244855679183, "learning_rate": 1.5266376279988726e-08, "loss": 0.2747, "step": 12847 }, { "epoch": 1.9660290742157613, "grad_norm": 2.086556860314529, "learning_rate": 1.512979955729854e-08, "loss": 0.2604, "step": 12848 }, { "epoch": 1.9661820964039785, "grad_norm": 2.044756387848911, "learning_rate": 1.499383604454896e-08, "loss": 0.2365, "step": 12849 }, { "epoch": 1.966335118592196, "grad_norm": 2.2031325687200654, "learning_rate": 1.4858485750088857e-08, "loss": 0.2612, "step": 12850 }, { "epoch": 1.9664881407804131, "grad_norm": 2.0442598118479873, "learning_rate": 1.4723748682231587e-08, "loss": 0.2223, "step": 12851 }, { "epoch": 1.9666411629686305, "grad_norm": 2.024031388837172, "learning_rate": 1.4589624849250527e-08, "loss": 0.2676, "step": 12852 }, { "epoch": 1.9667941851568478, "grad_norm": 2.042948797618122, "learning_rate": 1.4456114259384647e-08, "loss": 0.2578, "step": 12853 }, { "epoch": 1.966947207345065, "grad_norm": 1.9906287211778895, "learning_rate": 1.4323216920831829e-08, "loss": 0.2132, "step": 12854 }, { "epoch": 1.9671002295332825, "grad_norm": 2.482263149556431, "learning_rate": 1.4190932841755544e-08, "loss": 0.333, "step": 12855 }, { "epoch": 1.9672532517214996, "grad_norm": 1.7243344603142814, "learning_rate": 1.4059262030278186e-08, "loss": 0.2489, "step": 12856 }, { "epoch": 1.967406273909717, "grad_norm": 2.451829415489958, "learning_rate": 1.392820449448662e-08, "loss": 0.2961, "step": 12857 }, { "epoch": 1.9675592960979342, "grad_norm": 2.3167822602608132, "learning_rate": 1.3797760242429958e-08, "loss": 0.2894, "step": 12858 }, { "epoch": 1.9677123182861513, "grad_norm": 2.163481464374622, "learning_rate": 1.3667929282118464e-08, "loss": 0.2569, "step": 12859 }, { "epoch": 1.967865340474369, "grad_norm": 1.8496510780178912, "learning_rate": 1.3538711621526868e-08, "loss": 0.2437, "step": 12860 }, { "epoch": 1.968018362662586, "grad_norm": 2.229870372063878, "learning_rate": 1.3410107268589934e-08, "loss": 0.3158, "step": 12861 }, { "epoch": 1.9681713848508033, "grad_norm": 2.30265422085295, "learning_rate": 1.3282116231205789e-08, "loss": 0.2171, "step": 12862 }, { "epoch": 1.9683244070390207, "grad_norm": 2.053435870710879, "learning_rate": 1.3154738517235921e-08, "loss": 0.2806, "step": 12863 }, { "epoch": 1.9684774292272378, "grad_norm": 2.1300685488173254, "learning_rate": 1.3027974134501853e-08, "loss": 0.3023, "step": 12864 }, { "epoch": 1.9686304514154553, "grad_norm": 2.094110877470264, "learning_rate": 1.2901823090789577e-08, "loss": 0.2508, "step": 12865 }, { "epoch": 1.9687834736036725, "grad_norm": 2.003064762109647, "learning_rate": 1.2776285393845122e-08, "loss": 0.258, "step": 12866 }, { "epoch": 1.9689364957918898, "grad_norm": 2.4322997253204144, "learning_rate": 1.2651361051380095e-08, "loss": 0.3001, "step": 12867 }, { "epoch": 1.9690895179801071, "grad_norm": 2.0065478541875463, "learning_rate": 1.2527050071065028e-08, "loss": 0.3071, "step": 12868 }, { "epoch": 1.9692425401683245, "grad_norm": 1.9145659667385493, "learning_rate": 1.2403352460536034e-08, "loss": 0.267, "step": 12869 }, { "epoch": 1.9693955623565418, "grad_norm": 1.998726280086352, "learning_rate": 1.2280268227388148e-08, "loss": 0.2625, "step": 12870 }, { "epoch": 1.969548584544759, "grad_norm": 1.7810985547272813, "learning_rate": 1.215779737918088e-08, "loss": 0.2553, "step": 12871 }, { "epoch": 1.9697016067329762, "grad_norm": 2.0506737515397133, "learning_rate": 1.20359399234371e-08, "loss": 0.2376, "step": 12872 }, { "epoch": 1.9698546289211936, "grad_norm": 2.4243875323116053, "learning_rate": 1.1914695867638603e-08, "loss": 0.3341, "step": 12873 }, { "epoch": 1.970007651109411, "grad_norm": 1.930622018969895, "learning_rate": 1.1794065219231654e-08, "loss": 0.2257, "step": 12874 }, { "epoch": 1.9701606732976282, "grad_norm": 1.864437419947269, "learning_rate": 1.167404798562588e-08, "loss": 0.2139, "step": 12875 }, { "epoch": 1.9703136954858453, "grad_norm": 1.897571077812364, "learning_rate": 1.1554644174192053e-08, "loss": 0.2248, "step": 12876 }, { "epoch": 1.970466717674063, "grad_norm": 1.9202972446689073, "learning_rate": 1.1435853792260976e-08, "loss": 0.2011, "step": 12877 }, { "epoch": 1.97061973986228, "grad_norm": 2.110629293594774, "learning_rate": 1.1317676847131254e-08, "loss": 0.3001, "step": 12878 }, { "epoch": 1.9707727620504973, "grad_norm": 2.392523432977483, "learning_rate": 1.1200113346058195e-08, "loss": 0.3097, "step": 12879 }, { "epoch": 1.9709257842387147, "grad_norm": 2.2066538540809435, "learning_rate": 1.1083163296262689e-08, "loss": 0.2743, "step": 12880 }, { "epoch": 1.9710788064269318, "grad_norm": 1.9974484118386584, "learning_rate": 1.096682670492677e-08, "loss": 0.2149, "step": 12881 }, { "epoch": 1.9712318286151493, "grad_norm": 2.0342333461670425, "learning_rate": 1.0851103579194722e-08, "loss": 0.2746, "step": 12882 }, { "epoch": 1.9713848508033665, "grad_norm": 2.3163031751434295, "learning_rate": 1.0735993926175304e-08, "loss": 0.2656, "step": 12883 }, { "epoch": 1.9715378729915838, "grad_norm": 1.9829201798149283, "learning_rate": 1.0621497752936194e-08, "loss": 0.2601, "step": 12884 }, { "epoch": 1.9716908951798011, "grad_norm": 2.1571377906044735, "learning_rate": 1.0507615066509547e-08, "loss": 0.2833, "step": 12885 }, { "epoch": 1.9718439173680182, "grad_norm": 2.07040945084194, "learning_rate": 1.0394345873889766e-08, "loss": 0.2879, "step": 12886 }, { "epoch": 1.9719969395562358, "grad_norm": 2.2778466632041887, "learning_rate": 1.0281690182032399e-08, "loss": 0.2694, "step": 12887 }, { "epoch": 1.972149961744453, "grad_norm": 1.9727267744304013, "learning_rate": 1.0169647997856358e-08, "loss": 0.2375, "step": 12888 }, { "epoch": 1.9723029839326702, "grad_norm": 2.0960160436743003, "learning_rate": 1.0058219328242802e-08, "loss": 0.3329, "step": 12889 }, { "epoch": 1.9724560061208876, "grad_norm": 2.046906516879214, "learning_rate": 9.947404180035147e-09, "loss": 0.2837, "step": 12890 }, { "epoch": 1.9726090283091047, "grad_norm": 2.3133831775980847, "learning_rate": 9.83720256003795e-09, "loss": 0.3173, "step": 12891 }, { "epoch": 1.9727620504973222, "grad_norm": 2.025923377837509, "learning_rate": 9.727614475020241e-09, "loss": 0.2715, "step": 12892 }, { "epoch": 1.9729150726855393, "grad_norm": 2.1217931982552334, "learning_rate": 9.618639931712193e-09, "loss": 0.2897, "step": 12893 }, { "epoch": 1.9730680948737567, "grad_norm": 2.3348882198976333, "learning_rate": 9.510278936806227e-09, "loss": 0.3275, "step": 12894 }, { "epoch": 1.973221117061974, "grad_norm": 2.5591447995277403, "learning_rate": 9.402531496957024e-09, "loss": 0.3593, "step": 12895 }, { "epoch": 1.9733741392501911, "grad_norm": 2.0794411023520243, "learning_rate": 9.29539761878151e-09, "loss": 0.2514, "step": 12896 }, { "epoch": 1.9735271614384087, "grad_norm": 2.097465611753309, "learning_rate": 9.188877308858867e-09, "loss": 0.2895, "step": 12897 }, { "epoch": 1.9736801836266258, "grad_norm": 2.0471184159575007, "learning_rate": 9.082970573732752e-09, "loss": 0.2243, "step": 12898 }, { "epoch": 1.9738332058148431, "grad_norm": 2.1026098669748086, "learning_rate": 8.97767741990574e-09, "loss": 0.2623, "step": 12899 }, { "epoch": 1.9739862280030605, "grad_norm": 2.007922658885538, "learning_rate": 8.872997853843767e-09, "loss": 0.2379, "step": 12900 }, { "epoch": 1.9741392501912778, "grad_norm": 2.1522852184706345, "learning_rate": 8.76893188197725e-09, "loss": 0.2646, "step": 12901 }, { "epoch": 1.9742922723794951, "grad_norm": 1.7457113137200364, "learning_rate": 8.665479510696629e-09, "loss": 0.2275, "step": 12902 }, { "epoch": 1.9744452945677122, "grad_norm": 2.3852903678590494, "learning_rate": 8.562640746354601e-09, "loss": 0.2778, "step": 12903 }, { "epoch": 1.9745983167559296, "grad_norm": 2.0897669244676216, "learning_rate": 8.460415595268334e-09, "loss": 0.2541, "step": 12904 }, { "epoch": 1.974751338944147, "grad_norm": 2.21857340447905, "learning_rate": 8.358804063715032e-09, "loss": 0.2525, "step": 12905 }, { "epoch": 1.9749043611323642, "grad_norm": 1.9060225156510942, "learning_rate": 8.257806157934145e-09, "loss": 0.276, "step": 12906 }, { "epoch": 1.9750573833205816, "grad_norm": 2.062959820646989, "learning_rate": 8.157421884129602e-09, "loss": 0.2851, "step": 12907 }, { "epoch": 1.9752104055087987, "grad_norm": 1.7379375669518808, "learning_rate": 8.057651248466469e-09, "loss": 0.2096, "step": 12908 }, { "epoch": 1.9753634276970162, "grad_norm": 1.8775977832890811, "learning_rate": 7.958494257072069e-09, "loss": 0.2281, "step": 12909 }, { "epoch": 1.9755164498852333, "grad_norm": 1.8697635311100709, "learning_rate": 7.859950916034865e-09, "loss": 0.2855, "step": 12910 }, { "epoch": 1.9756694720734507, "grad_norm": 2.3981445826317906, "learning_rate": 7.76202123140779e-09, "loss": 0.3044, "step": 12911 }, { "epoch": 1.975822494261668, "grad_norm": 2.168622460349219, "learning_rate": 7.664705209204925e-09, "loss": 0.2939, "step": 12912 }, { "epoch": 1.9759755164498851, "grad_norm": 2.210540309755076, "learning_rate": 7.568002855402602e-09, "loss": 0.3456, "step": 12913 }, { "epoch": 1.9761285386381027, "grad_norm": 2.47623544445302, "learning_rate": 7.471914175940509e-09, "loss": 0.3503, "step": 12914 }, { "epoch": 1.9762815608263198, "grad_norm": 2.0110291162646146, "learning_rate": 7.376439176718375e-09, "loss": 0.2296, "step": 12915 }, { "epoch": 1.9764345830145371, "grad_norm": 2.059626415844223, "learning_rate": 7.2815778636003975e-09, "loss": 0.261, "step": 12916 }, { "epoch": 1.9765876052027544, "grad_norm": 1.9309744737462933, "learning_rate": 7.187330242413026e-09, "loss": 0.2504, "step": 12917 }, { "epoch": 1.9767406273909716, "grad_norm": 2.153764527156966, "learning_rate": 7.093696318943854e-09, "loss": 0.2581, "step": 12918 }, { "epoch": 1.9768936495791891, "grad_norm": 2.184851336719344, "learning_rate": 7.000676098942727e-09, "loss": 0.3208, "step": 12919 }, { "epoch": 1.9770466717674062, "grad_norm": 2.1162936254329954, "learning_rate": 6.9082695881228515e-09, "loss": 0.2874, "step": 12920 }, { "epoch": 1.9771996939556236, "grad_norm": 1.8457304539215134, "learning_rate": 6.816476792159687e-09, "loss": 0.2402, "step": 12921 }, { "epoch": 1.977352716143841, "grad_norm": 2.2107074116766294, "learning_rate": 6.725297716689838e-09, "loss": 0.3502, "step": 12922 }, { "epoch": 1.977505738332058, "grad_norm": 2.312117402985523, "learning_rate": 6.6347323673143786e-09, "loss": 0.3004, "step": 12923 }, { "epoch": 1.9776587605202756, "grad_norm": 1.916658673354669, "learning_rate": 6.544780749593305e-09, "loss": 0.2303, "step": 12924 }, { "epoch": 1.9778117827084927, "grad_norm": 1.8504219486404065, "learning_rate": 6.455442869052197e-09, "loss": 0.2086, "step": 12925 }, { "epoch": 1.97796480489671, "grad_norm": 2.252796300111889, "learning_rate": 6.366718731177779e-09, "loss": 0.3257, "step": 12926 }, { "epoch": 1.9781178270849273, "grad_norm": 2.0448647401610414, "learning_rate": 6.278608341416803e-09, "loss": 0.2517, "step": 12927 }, { "epoch": 1.9782708492731444, "grad_norm": 1.9816261463855467, "learning_rate": 6.1911117051838274e-09, "loss": 0.2709, "step": 12928 }, { "epoch": 1.978423871461362, "grad_norm": 2.1873499122980555, "learning_rate": 6.104228827850111e-09, "loss": 0.287, "step": 12929 }, { "epoch": 1.9785768936495791, "grad_norm": 2.1254405287529763, "learning_rate": 6.0179597147524975e-09, "loss": 0.2381, "step": 12930 }, { "epoch": 1.9787299158377964, "grad_norm": 2.0765132192816216, "learning_rate": 5.932304371187858e-09, "loss": 0.2851, "step": 12931 }, { "epoch": 1.9788829380260138, "grad_norm": 2.2452308925752194, "learning_rate": 5.847262802417542e-09, "loss": 0.2512, "step": 12932 }, { "epoch": 1.9790359602142311, "grad_norm": 2.1688896379609885, "learning_rate": 5.7628350136640365e-09, "loss": 0.3165, "step": 12933 }, { "epoch": 1.9791889824024484, "grad_norm": 2.1543318719909053, "learning_rate": 5.6790210101131945e-09, "loss": 0.2452, "step": 12934 }, { "epoch": 1.9793420045906656, "grad_norm": 1.9687252404414846, "learning_rate": 5.595820796912011e-09, "loss": 0.2407, "step": 12935 }, { "epoch": 1.979495026778883, "grad_norm": 2.2367361565308825, "learning_rate": 5.513234379168619e-09, "loss": 0.2738, "step": 12936 }, { "epoch": 1.9796480489671002, "grad_norm": 2.0907816123855185, "learning_rate": 5.431261761956741e-09, "loss": 0.2742, "step": 12937 }, { "epoch": 1.9798010711553176, "grad_norm": 1.9706136989908423, "learning_rate": 5.349902950310126e-09, "loss": 0.2708, "step": 12938 }, { "epoch": 1.979954093343535, "grad_norm": 1.9392410273061564, "learning_rate": 5.269157949224779e-09, "loss": 0.2548, "step": 12939 }, { "epoch": 1.980107115531752, "grad_norm": 2.3816947627575873, "learning_rate": 5.189026763661176e-09, "loss": 0.2669, "step": 12940 }, { "epoch": 1.9802601377199696, "grad_norm": 2.3186158240362045, "learning_rate": 5.109509398538714e-09, "loss": 0.3214, "step": 12941 }, { "epoch": 1.9804131599081867, "grad_norm": 1.9554878445595376, "learning_rate": 5.030605858740156e-09, "loss": 0.2202, "step": 12942 }, { "epoch": 1.980566182096404, "grad_norm": 2.0322729150228134, "learning_rate": 4.952316149114955e-09, "loss": 0.2662, "step": 12943 }, { "epoch": 1.9807192042846213, "grad_norm": 1.8278177150486519, "learning_rate": 4.874640274467046e-09, "loss": 0.2244, "step": 12944 }, { "epoch": 1.9808722264728384, "grad_norm": 2.174806576540863, "learning_rate": 4.797578239569278e-09, "loss": 0.2741, "step": 12945 }, { "epoch": 1.981025248661056, "grad_norm": 2.2415653534967217, "learning_rate": 4.721130049154532e-09, "loss": 0.2785, "step": 12946 }, { "epoch": 1.9811782708492731, "grad_norm": 2.166734196094784, "learning_rate": 4.64529570791572e-09, "loss": 0.2677, "step": 12947 }, { "epoch": 1.9813312930374904, "grad_norm": 2.0414327018784006, "learning_rate": 4.5700752205113385e-09, "loss": 0.2732, "step": 12948 }, { "epoch": 1.9814843152257078, "grad_norm": 2.256044758470271, "learning_rate": 4.495468591562135e-09, "loss": 0.3323, "step": 12949 }, { "epoch": 1.981637337413925, "grad_norm": 2.1976314322826114, "learning_rate": 4.421475825647781e-09, "loss": 0.2598, "step": 12950 }, { "epoch": 1.9817903596021424, "grad_norm": 1.7322213738511125, "learning_rate": 4.3480969273135276e-09, "loss": 0.2153, "step": 12951 }, { "epoch": 1.9819433817903596, "grad_norm": 1.9892742007837072, "learning_rate": 4.275331901066881e-09, "loss": 0.2685, "step": 12952 }, { "epoch": 1.982096403978577, "grad_norm": 2.201534831571064, "learning_rate": 4.20318075137427e-09, "loss": 0.2703, "step": 12953 }, { "epoch": 1.9822494261667942, "grad_norm": 2.3807635297799283, "learning_rate": 4.1316434826688124e-09, "loss": 0.243, "step": 12954 }, { "epoch": 1.9824024483550113, "grad_norm": 2.2659288950701346, "learning_rate": 4.060720099343662e-09, "loss": 0.3149, "step": 12955 }, { "epoch": 1.982555470543229, "grad_norm": 2.1254319890729083, "learning_rate": 3.990410605753115e-09, "loss": 0.2547, "step": 12956 }, { "epoch": 1.982708492731446, "grad_norm": 1.7746557192110035, "learning_rate": 3.920715006217047e-09, "loss": 0.2284, "step": 12957 }, { "epoch": 1.9828615149196633, "grad_norm": 2.070971432899368, "learning_rate": 3.851633305014257e-09, "loss": 0.2593, "step": 12958 }, { "epoch": 1.9830145371078807, "grad_norm": 2.2118533165855534, "learning_rate": 3.78316550638691e-09, "loss": 0.3016, "step": 12959 }, { "epoch": 1.9831675592960978, "grad_norm": 1.9279899584450073, "learning_rate": 3.715311614541639e-09, "loss": 0.2188, "step": 12960 }, { "epoch": 1.9833205814843153, "grad_norm": 2.071591635449214, "learning_rate": 3.648071633645112e-09, "loss": 0.2602, "step": 12961 }, { "epoch": 1.9834736036725324, "grad_norm": 2.147092374012748, "learning_rate": 3.5814455678262473e-09, "loss": 0.3022, "step": 12962 }, { "epoch": 1.9836266258607498, "grad_norm": 2.022467934629971, "learning_rate": 3.5154334211762174e-09, "loss": 0.2448, "step": 12963 }, { "epoch": 1.9837796480489671, "grad_norm": 2.130643029150222, "learning_rate": 3.4500351977506675e-09, "loss": 0.2165, "step": 12964 }, { "epoch": 1.9839326702371842, "grad_norm": 2.0960930753843914, "learning_rate": 3.3852509015652734e-09, "loss": 0.2506, "step": 12965 }, { "epoch": 1.9840856924254018, "grad_norm": 1.9855073543678754, "learning_rate": 3.3210805365979648e-09, "loss": 0.2233, "step": 12966 }, { "epoch": 1.984238714613619, "grad_norm": 2.051055830940307, "learning_rate": 3.2575241067911435e-09, "loss": 0.2452, "step": 12967 }, { "epoch": 1.9843917368018362, "grad_norm": 2.0931128703850708, "learning_rate": 3.194581616046133e-09, "loss": 0.2692, "step": 12968 }, { "epoch": 1.9845447589900536, "grad_norm": 1.736009426122614, "learning_rate": 3.1322530682309506e-09, "loss": 0.1924, "step": 12969 }, { "epoch": 1.984697781178271, "grad_norm": 2.124895379549314, "learning_rate": 3.0705384671714245e-09, "loss": 0.2864, "step": 12970 }, { "epoch": 1.9848508033664882, "grad_norm": 2.0882242357227283, "learning_rate": 3.0094378166578563e-09, "loss": 0.2996, "step": 12971 }, { "epoch": 1.9850038255547053, "grad_norm": 1.9487330157379934, "learning_rate": 2.9489511204439105e-09, "loss": 0.2617, "step": 12972 }, { "epoch": 1.9851568477429227, "grad_norm": 2.2667445577756444, "learning_rate": 2.8890783822432823e-09, "loss": 0.2701, "step": 12973 }, { "epoch": 1.98530986993114, "grad_norm": 2.3116383396322986, "learning_rate": 2.8298196057330308e-09, "loss": 0.2981, "step": 12974 }, { "epoch": 1.9854628921193573, "grad_norm": 1.7453561995500706, "learning_rate": 2.7711747945524668e-09, "loss": 0.1991, "step": 12975 }, { "epoch": 1.9856159143075747, "grad_norm": 1.9390207186144572, "learning_rate": 2.7131439523042646e-09, "loss": 0.2393, "step": 12976 }, { "epoch": 1.9857689364957918, "grad_norm": 2.3644792875455125, "learning_rate": 2.6557270825511293e-09, "loss": 0.2784, "step": 12977 }, { "epoch": 1.9859219586840093, "grad_norm": 2.178932135966078, "learning_rate": 2.5989241888191296e-09, "loss": 0.343, "step": 12978 }, { "epoch": 1.9860749808722264, "grad_norm": 2.1307936669410132, "learning_rate": 2.542735274597696e-09, "loss": 0.2718, "step": 12979 }, { "epoch": 1.9862280030604438, "grad_norm": 1.9874821763775943, "learning_rate": 2.4871603433374026e-09, "loss": 0.2545, "step": 12980 }, { "epoch": 1.9863810252486611, "grad_norm": 1.790519718630377, "learning_rate": 2.432199398451074e-09, "loss": 0.188, "step": 12981 }, { "epoch": 1.9865340474368782, "grad_norm": 2.3174362383429723, "learning_rate": 2.3778524433137882e-09, "loss": 0.2746, "step": 12982 }, { "epoch": 1.9866870696250958, "grad_norm": 2.346253570180665, "learning_rate": 2.3241194812639868e-09, "loss": 0.3198, "step": 12983 }, { "epoch": 1.986840091813313, "grad_norm": 2.1172501130386716, "learning_rate": 2.2710005156001415e-09, "loss": 0.2755, "step": 12984 }, { "epoch": 1.9869931140015302, "grad_norm": 2.2663121503499677, "learning_rate": 2.218495549586308e-09, "loss": 0.2343, "step": 12985 }, { "epoch": 1.9871461361897476, "grad_norm": 1.9793839185682316, "learning_rate": 2.166604586445464e-09, "loss": 0.2497, "step": 12986 }, { "epoch": 1.9872991583779647, "grad_norm": 2.2530991162584217, "learning_rate": 2.1153276293661708e-09, "loss": 0.2837, "step": 12987 }, { "epoch": 1.9874521805661822, "grad_norm": 2.3052278983511703, "learning_rate": 2.0646646814959093e-09, "loss": 0.3059, "step": 12988 }, { "epoch": 1.9876052027543993, "grad_norm": 2.0974839786486728, "learning_rate": 2.014615745946635e-09, "loss": 0.3217, "step": 12989 }, { "epoch": 1.9877582249426167, "grad_norm": 2.233579314139941, "learning_rate": 1.9651808257925564e-09, "loss": 0.2597, "step": 12990 }, { "epoch": 1.987911247130834, "grad_norm": 1.969583542552495, "learning_rate": 1.9163599240690225e-09, "loss": 0.2457, "step": 12991 }, { "epoch": 1.9880642693190511, "grad_norm": 2.2150331863004236, "learning_rate": 1.868153043774745e-09, "loss": 0.2915, "step": 12992 }, { "epoch": 1.9882172915072687, "grad_norm": 2.0676187007120923, "learning_rate": 1.82056018786958e-09, "loss": 0.2538, "step": 12993 }, { "epoch": 1.9883703136954858, "grad_norm": 1.9421580439547903, "learning_rate": 1.773581359277854e-09, "loss": 0.2766, "step": 12994 }, { "epoch": 1.9885233358837031, "grad_norm": 1.913701865466778, "learning_rate": 1.727216560882816e-09, "loss": 0.2257, "step": 12995 }, { "epoch": 1.9886763580719204, "grad_norm": 2.220818373435268, "learning_rate": 1.6814657955332992e-09, "loss": 0.3052, "step": 12996 }, { "epoch": 1.9888293802601376, "grad_norm": 2.179646529566567, "learning_rate": 1.6363290660392773e-09, "loss": 0.3342, "step": 12997 }, { "epoch": 1.9889824024483551, "grad_norm": 2.435209578355301, "learning_rate": 1.591806375170757e-09, "loss": 0.3043, "step": 12998 }, { "epoch": 1.9891354246365722, "grad_norm": 2.353359008708445, "learning_rate": 1.5478977256644379e-09, "loss": 0.2977, "step": 12999 }, { "epoch": 1.9892884468247896, "grad_norm": 2.2126170930853903, "learning_rate": 1.5046031202159417e-09, "loss": 0.2601, "step": 13000 }, { "epoch": 1.989441469013007, "grad_norm": 2.1203776595014348, "learning_rate": 1.4619225614831424e-09, "loss": 0.2572, "step": 13001 }, { "epoch": 1.9895944912012242, "grad_norm": 2.253613767274853, "learning_rate": 1.4198560520883865e-09, "loss": 0.2622, "step": 13002 }, { "epoch": 1.9897475133894416, "grad_norm": 2.11290047824741, "learning_rate": 1.378403594615163e-09, "loss": 0.3005, "step": 13003 }, { "epoch": 1.9899005355776587, "grad_norm": 2.1312196286465244, "learning_rate": 1.3375651916092135e-09, "loss": 0.2949, "step": 13004 }, { "epoch": 1.990053557765876, "grad_norm": 2.3139439421090287, "learning_rate": 1.297340845578532e-09, "loss": 0.2974, "step": 13005 }, { "epoch": 1.9902065799540933, "grad_norm": 1.9727633886230713, "learning_rate": 1.2577305589933642e-09, "loss": 0.2459, "step": 13006 }, { "epoch": 1.9903596021423107, "grad_norm": 2.0438809630472794, "learning_rate": 1.218734334286209e-09, "loss": 0.2784, "step": 13007 }, { "epoch": 1.990512624330528, "grad_norm": 1.9332274761646762, "learning_rate": 1.1803521738507072e-09, "loss": 0.3422, "step": 13008 }, { "epoch": 1.9906656465187451, "grad_norm": 1.9007967207990857, "learning_rate": 1.1425840800471933e-09, "loss": 0.254, "step": 13009 }, { "epoch": 1.9908186687069627, "grad_norm": 2.1458707099063057, "learning_rate": 1.1054300551927022e-09, "loss": 0.2593, "step": 13010 }, { "epoch": 1.9909716908951798, "grad_norm": 1.9056536998552065, "learning_rate": 1.0688901015687425e-09, "loss": 0.233, "step": 13011 }, { "epoch": 1.9911247130833971, "grad_norm": 2.3772144145607546, "learning_rate": 1.0329642214212953e-09, "loss": 0.313, "step": 13012 }, { "epoch": 1.9912777352716144, "grad_norm": 2.0814580331247927, "learning_rate": 9.976524169552638e-10, "loss": 0.2907, "step": 13013 }, { "epoch": 1.9914307574598316, "grad_norm": 2.0239695611574064, "learning_rate": 9.629546903400232e-10, "loss": 0.2643, "step": 13014 }, { "epoch": 1.9915837796480491, "grad_norm": 2.5593845320658444, "learning_rate": 9.288710437060922e-10, "loss": 0.3326, "step": 13015 }, { "epoch": 1.9917368018362662, "grad_norm": 2.48462633721508, "learning_rate": 8.954014791473509e-10, "loss": 0.2874, "step": 13016 }, { "epoch": 1.9918898240244836, "grad_norm": 1.883559842168018, "learning_rate": 8.62545998717712e-10, "loss": 0.1955, "step": 13017 }, { "epoch": 1.992042846212701, "grad_norm": 1.7267492856950604, "learning_rate": 8.303046044366713e-10, "loss": 0.2448, "step": 13018 }, { "epoch": 1.992195868400918, "grad_norm": 2.033471506716017, "learning_rate": 7.986772982826463e-10, "loss": 0.2754, "step": 13019 }, { "epoch": 1.9923488905891356, "grad_norm": 1.8562161515444404, "learning_rate": 7.676640821996373e-10, "loss": 0.2387, "step": 13020 }, { "epoch": 1.9925019127773527, "grad_norm": 2.150187407301561, "learning_rate": 7.372649580916768e-10, "loss": 0.2375, "step": 13021 }, { "epoch": 1.99265493496557, "grad_norm": 1.9946054431849656, "learning_rate": 7.074799278261602e-10, "loss": 0.2818, "step": 13022 }, { "epoch": 1.9928079571537873, "grad_norm": 1.7498242022545936, "learning_rate": 6.783089932305142e-10, "loss": 0.207, "step": 13023 }, { "epoch": 1.9929609793420044, "grad_norm": 2.1900542288629032, "learning_rate": 6.497521560977494e-10, "loss": 0.278, "step": 13024 }, { "epoch": 1.993114001530222, "grad_norm": 2.1975617817398447, "learning_rate": 6.218094181820177e-10, "loss": 0.237, "step": 13025 }, { "epoch": 1.9932670237184391, "grad_norm": 1.8776794834042783, "learning_rate": 5.944807811986142e-10, "loss": 0.2505, "step": 13026 }, { "epoch": 1.9934200459066564, "grad_norm": 1.9385495670411852, "learning_rate": 5.677662468250855e-10, "loss": 0.2213, "step": 13027 }, { "epoch": 1.9935730680948738, "grad_norm": 2.0533741177883713, "learning_rate": 5.416658167045619e-10, "loss": 0.285, "step": 13028 }, { "epoch": 1.993726090283091, "grad_norm": 2.1770054768883202, "learning_rate": 5.161794924368746e-10, "loss": 0.2512, "step": 13029 }, { "epoch": 1.9938791124713084, "grad_norm": 2.305230017581347, "learning_rate": 4.913072755896586e-10, "loss": 0.3359, "step": 13030 }, { "epoch": 1.9940321346595256, "grad_norm": 2.002526031764342, "learning_rate": 4.670491676894706e-10, "loss": 0.2198, "step": 13031 }, { "epoch": 1.994185156847743, "grad_norm": 2.0269799838385905, "learning_rate": 4.434051702262299e-10, "loss": 0.2523, "step": 13032 }, { "epoch": 1.9943381790359602, "grad_norm": 1.9888908831604968, "learning_rate": 4.203752846521081e-10, "loss": 0.2865, "step": 13033 }, { "epoch": 1.9944912012241776, "grad_norm": 1.9144641539035228, "learning_rate": 3.979595123815294e-10, "loss": 0.2994, "step": 13034 }, { "epoch": 1.994644223412395, "grad_norm": 1.776279721281176, "learning_rate": 3.761578547900602e-10, "loss": 0.1904, "step": 13035 }, { "epoch": 1.994797245600612, "grad_norm": 2.298937100217351, "learning_rate": 3.549703132188498e-10, "loss": 0.2974, "step": 13036 }, { "epoch": 1.9949502677888293, "grad_norm": 1.919240065983227, "learning_rate": 3.343968889668592e-10, "loss": 0.2379, "step": 13037 }, { "epoch": 1.9951032899770467, "grad_norm": 2.0107139718990346, "learning_rate": 3.1443758329752214e-10, "loss": 0.2555, "step": 13038 }, { "epoch": 1.995256312165264, "grad_norm": 2.1252763337912985, "learning_rate": 2.950923974387454e-10, "loss": 0.2751, "step": 13039 }, { "epoch": 1.9954093343534813, "grad_norm": 2.411812539166475, "learning_rate": 2.763613325773573e-10, "loss": 0.3436, "step": 13040 }, { "epoch": 1.9955623565416984, "grad_norm": 1.9107329649484788, "learning_rate": 2.5824438986354895e-10, "loss": 0.2166, "step": 13041 }, { "epoch": 1.995715378729916, "grad_norm": 2.358934465004862, "learning_rate": 2.4074157040976377e-10, "loss": 0.2485, "step": 13042 }, { "epoch": 1.9958684009181331, "grad_norm": 2.215031190581882, "learning_rate": 2.2385287529180788e-10, "loss": 0.2784, "step": 13043 }, { "epoch": 1.9960214231063504, "grad_norm": 2.236157444646783, "learning_rate": 2.075783055466296e-10, "loss": 0.4084, "step": 13044 }, { "epoch": 1.9961744452945678, "grad_norm": 2.0773031268943996, "learning_rate": 1.9191786217342967e-10, "loss": 0.2735, "step": 13045 }, { "epoch": 1.996327467482785, "grad_norm": 1.783273240684617, "learning_rate": 1.768715461336612e-10, "loss": 0.2927, "step": 13046 }, { "epoch": 1.9964804896710024, "grad_norm": 1.9397137676314884, "learning_rate": 1.6243935835213998e-10, "loss": 0.2428, "step": 13047 }, { "epoch": 1.9966335118592196, "grad_norm": 1.8943175353851476, "learning_rate": 1.4862129971371375e-10, "loss": 0.2924, "step": 13048 }, { "epoch": 1.996786534047437, "grad_norm": 2.1136284004737766, "learning_rate": 1.3541737106881336e-10, "loss": 0.2756, "step": 13049 }, { "epoch": 1.9969395562356542, "grad_norm": 1.8973543054141213, "learning_rate": 1.2282757322790162e-10, "loss": 0.2196, "step": 13050 }, { "epoch": 1.9970925784238713, "grad_norm": 2.0657394153839554, "learning_rate": 1.1085190696369374e-10, "loss": 0.2822, "step": 13051 }, { "epoch": 1.997245600612089, "grad_norm": 1.8308499398834328, "learning_rate": 9.94903730122676e-11, "loss": 0.2253, "step": 13052 }, { "epoch": 1.997398622800306, "grad_norm": 1.8283706378478568, "learning_rate": 8.874297207084326e-11, "loss": 0.2385, "step": 13053 }, { "epoch": 1.9975516449885233, "grad_norm": 1.9678986030662173, "learning_rate": 7.860970479889318e-11, "loss": 0.3014, "step": 13054 }, { "epoch": 1.9977046671767407, "grad_norm": 1.8653265671577366, "learning_rate": 6.909057181925249e-11, "loss": 0.2535, "step": 13055 }, { "epoch": 1.9978576893649578, "grad_norm": 2.581058041189355, "learning_rate": 6.018557371811895e-11, "loss": 0.3192, "step": 13056 }, { "epoch": 1.9980107115531753, "grad_norm": 2.005465358101594, "learning_rate": 5.1894711039501836e-11, "loss": 0.219, "step": 13057 }, { "epoch": 1.9981637337413924, "grad_norm": 2.0811354665325625, "learning_rate": 4.421798429521396e-11, "loss": 0.2814, "step": 13058 }, { "epoch": 1.9983167559296098, "grad_norm": 2.1460837784604236, "learning_rate": 3.715539395487966e-11, "loss": 0.2241, "step": 13059 }, { "epoch": 1.9984697781178271, "grad_norm": 2.081856996888235, "learning_rate": 3.0706940452596145e-11, "loss": 0.242, "step": 13060 }, { "epoch": 1.9986228003060442, "grad_norm": 2.227922939830152, "learning_rate": 2.4872624185823258e-11, "loss": 0.2824, "step": 13061 }, { "epoch": 1.9987758224942618, "grad_norm": 2.3071203921932977, "learning_rate": 1.9652445510942586e-11, "loss": 0.2726, "step": 13062 }, { "epoch": 1.998928844682479, "grad_norm": 2.4540772967670152, "learning_rate": 1.5046404748808585e-11, "loss": 0.2448, "step": 13063 }, { "epoch": 1.9990818668706962, "grad_norm": 1.9558988952277159, "learning_rate": 1.1054502183638349e-11, "loss": 0.2816, "step": 13064 }, { "epoch": 1.9992348890589136, "grad_norm": 2.437269378289497, "learning_rate": 7.676738059680944e-12, "loss": 0.366, "step": 13065 }, { "epoch": 1.999387911247131, "grad_norm": 2.309948760100383, "learning_rate": 4.913112584548074e-12, "loss": 0.2803, "step": 13066 }, { "epoch": 1.9995409334353482, "grad_norm": 2.4610462328848994, "learning_rate": 2.7636259281038637e-12, "loss": 0.3342, "step": 13067 }, { "epoch": 1.9996939556235653, "grad_norm": 2.4319473298190433, "learning_rate": 1.2282782213546284e-12, "loss": 0.2665, "step": 13068 }, { "epoch": 1.9998469778117827, "grad_norm": 1.9865301756122895, "learning_rate": 3.070695597795492e-13, "loss": 0.2713, "step": 13069 }, { "epoch": 2.0, "grad_norm": 2.437039631382003, "learning_rate": 0.0, "loss": 0.3371, "step": 13070 }, { "epoch": 2.0, "step": 13070, "total_flos": 2.021711277939753e+19, "train_loss": 0.5611064869115256, "train_runtime": 74118.7755, "train_samples_per_second": 39.019, "train_steps_per_second": 0.176 } ], "logging_steps": 1.0, "max_steps": 13070, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.021711277939753e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }