{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 100, "global_step": 5260, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 3.9179365634918213, "learning_rate": 1.2658227848101266e-07, "loss": 0.1337, "step": 1 }, { "epoch": 0.0, "grad_norm": 5.145163059234619, "learning_rate": 2.5316455696202533e-07, "loss": 0.1635, "step": 2 }, { "epoch": 0.01, "grad_norm": 5.640079021453857, "learning_rate": 3.79746835443038e-07, "loss": 0.1936, "step": 3 }, { "epoch": 0.01, "grad_norm": 5.515219211578369, "learning_rate": 5.063291139240507e-07, "loss": 0.1835, "step": 4 }, { "epoch": 0.01, "grad_norm": 5.030313014984131, "learning_rate": 6.329113924050634e-07, "loss": 0.1525, "step": 5 }, { "epoch": 0.01, "grad_norm": 4.31536865234375, "learning_rate": 7.59493670886076e-07, "loss": 0.1761, "step": 6 }, { "epoch": 0.01, "grad_norm": 5.193630218505859, "learning_rate": 8.860759493670887e-07, "loss": 0.1774, "step": 7 }, { "epoch": 0.02, "grad_norm": 5.203883171081543, "learning_rate": 1.0126582278481013e-06, "loss": 0.1199, "step": 8 }, { "epoch": 0.02, "grad_norm": 4.327480316162109, "learning_rate": 1.139240506329114e-06, "loss": 0.1621, "step": 9 }, { "epoch": 0.02, "grad_norm": 5.23154878616333, "learning_rate": 1.2658227848101267e-06, "loss": 0.1841, "step": 10 }, { "epoch": 0.02, "grad_norm": 4.512467384338379, "learning_rate": 1.3924050632911392e-06, "loss": 0.1329, "step": 11 }, { "epoch": 0.02, "grad_norm": 5.128291130065918, "learning_rate": 1.518987341772152e-06, "loss": 0.1503, "step": 12 }, { "epoch": 0.02, "grad_norm": 5.2294487953186035, "learning_rate": 1.6455696202531647e-06, "loss": 0.1402, "step": 13 }, { "epoch": 0.03, "grad_norm": 4.409157752990723, "learning_rate": 1.7721518987341774e-06, "loss": 0.1421, "step": 14 }, { "epoch": 0.03, "grad_norm": 5.557156562805176, "learning_rate": 1.8987341772151901e-06, "loss": 0.2255, "step": 15 }, { "epoch": 0.03, "grad_norm": 3.031378746032715, "learning_rate": 2.0253164556962026e-06, "loss": 0.0809, "step": 16 }, { "epoch": 0.03, "grad_norm": 4.350979328155518, "learning_rate": 2.1518987341772153e-06, "loss": 0.1456, "step": 17 }, { "epoch": 0.03, "grad_norm": 6.416922569274902, "learning_rate": 2.278481012658228e-06, "loss": 0.1914, "step": 18 }, { "epoch": 0.04, "grad_norm": 6.534358024597168, "learning_rate": 2.4050632911392408e-06, "loss": 0.1738, "step": 19 }, { "epoch": 0.04, "grad_norm": 4.483481407165527, "learning_rate": 2.5316455696202535e-06, "loss": 0.1256, "step": 20 }, { "epoch": 0.04, "grad_norm": 3.6892688274383545, "learning_rate": 2.6582278481012658e-06, "loss": 0.1122, "step": 21 }, { "epoch": 0.04, "grad_norm": 4.867861747741699, "learning_rate": 2.7848101265822785e-06, "loss": 0.1717, "step": 22 }, { "epoch": 0.04, "grad_norm": 4.9622955322265625, "learning_rate": 2.9113924050632912e-06, "loss": 0.1395, "step": 23 }, { "epoch": 0.05, "grad_norm": 4.213229179382324, "learning_rate": 3.037974683544304e-06, "loss": 0.0873, "step": 24 }, { "epoch": 0.05, "grad_norm": 5.974915981292725, "learning_rate": 3.164556962025317e-06, "loss": 0.1866, "step": 25 }, { "epoch": 0.05, "grad_norm": 5.407062530517578, "learning_rate": 3.2911392405063294e-06, "loss": 0.159, "step": 26 }, { "epoch": 0.05, "grad_norm": 5.643296718597412, "learning_rate": 3.417721518987342e-06, "loss": 0.1678, "step": 27 }, { "epoch": 0.05, "grad_norm": 4.408917427062988, "learning_rate": 3.544303797468355e-06, "loss": 0.1048, "step": 28 }, { "epoch": 0.06, "grad_norm": 5.902509689331055, "learning_rate": 3.6708860759493675e-06, "loss": 0.1866, "step": 29 }, { "epoch": 0.06, "grad_norm": 4.681512832641602, "learning_rate": 3.7974683544303802e-06, "loss": 0.1388, "step": 30 }, { "epoch": 0.06, "grad_norm": 6.25588846206665, "learning_rate": 3.924050632911393e-06, "loss": 0.1356, "step": 31 }, { "epoch": 0.06, "grad_norm": 6.080183029174805, "learning_rate": 4.050632911392405e-06, "loss": 0.1729, "step": 32 }, { "epoch": 0.06, "grad_norm": 6.282044410705566, "learning_rate": 4.177215189873418e-06, "loss": 0.1694, "step": 33 }, { "epoch": 0.06, "grad_norm": 6.562785625457764, "learning_rate": 4.303797468354431e-06, "loss": 0.1853, "step": 34 }, { "epoch": 0.07, "grad_norm": 7.224548816680908, "learning_rate": 4.430379746835443e-06, "loss": 0.1868, "step": 35 }, { "epoch": 0.07, "grad_norm": 5.909102916717529, "learning_rate": 4.556962025316456e-06, "loss": 0.1673, "step": 36 }, { "epoch": 0.07, "grad_norm": 6.294363498687744, "learning_rate": 4.683544303797468e-06, "loss": 0.1911, "step": 37 }, { "epoch": 0.07, "grad_norm": 6.2620673179626465, "learning_rate": 4.8101265822784815e-06, "loss": 0.1539, "step": 38 }, { "epoch": 0.07, "grad_norm": 5.994434833526611, "learning_rate": 4.936708860759495e-06, "loss": 0.1533, "step": 39 }, { "epoch": 0.08, "grad_norm": 3.8124520778656006, "learning_rate": 5.063291139240507e-06, "loss": 0.1304, "step": 40 }, { "epoch": 0.08, "grad_norm": 6.1676788330078125, "learning_rate": 5.189873417721519e-06, "loss": 0.1666, "step": 41 }, { "epoch": 0.08, "grad_norm": 5.9088053703308105, "learning_rate": 5.3164556962025316e-06, "loss": 0.1781, "step": 42 }, { "epoch": 0.08, "grad_norm": 6.09443998336792, "learning_rate": 5.443037974683545e-06, "loss": 0.1365, "step": 43 }, { "epoch": 0.08, "grad_norm": 8.398219108581543, "learning_rate": 5.569620253164557e-06, "loss": 0.1931, "step": 44 }, { "epoch": 0.09, "grad_norm": 7.978480339050293, "learning_rate": 5.69620253164557e-06, "loss": 0.1642, "step": 45 }, { "epoch": 0.09, "grad_norm": 5.482278823852539, "learning_rate": 5.8227848101265824e-06, "loss": 0.1027, "step": 46 }, { "epoch": 0.09, "grad_norm": 8.406912803649902, "learning_rate": 5.949367088607595e-06, "loss": 0.2059, "step": 47 }, { "epoch": 0.09, "grad_norm": 7.1611008644104, "learning_rate": 6.075949367088608e-06, "loss": 0.1893, "step": 48 }, { "epoch": 0.09, "grad_norm": 7.254922866821289, "learning_rate": 6.20253164556962e-06, "loss": 0.1945, "step": 49 }, { "epoch": 0.1, "grad_norm": 7.3462815284729, "learning_rate": 6.329113924050634e-06, "loss": 0.21, "step": 50 }, { "epoch": 0.1, "grad_norm": 7.054031848907471, "learning_rate": 6.4556962025316464e-06, "loss": 0.2421, "step": 51 }, { "epoch": 0.1, "grad_norm": 7.637986660003662, "learning_rate": 6.582278481012659e-06, "loss": 0.2536, "step": 52 }, { "epoch": 0.1, "grad_norm": 7.275852203369141, "learning_rate": 6.708860759493672e-06, "loss": 0.2508, "step": 53 }, { "epoch": 0.1, "grad_norm": 8.866172790527344, "learning_rate": 6.835443037974684e-06, "loss": 0.3086, "step": 54 }, { "epoch": 0.1, "grad_norm": 7.02511739730835, "learning_rate": 6.962025316455697e-06, "loss": 0.2206, "step": 55 }, { "epoch": 0.11, "grad_norm": 8.296602249145508, "learning_rate": 7.08860759493671e-06, "loss": 0.2487, "step": 56 }, { "epoch": 0.11, "grad_norm": 8.474047660827637, "learning_rate": 7.215189873417722e-06, "loss": 0.2285, "step": 57 }, { "epoch": 0.11, "grad_norm": 7.942759037017822, "learning_rate": 7.341772151898735e-06, "loss": 0.303, "step": 58 }, { "epoch": 0.11, "grad_norm": 7.095279693603516, "learning_rate": 7.468354430379747e-06, "loss": 0.2049, "step": 59 }, { "epoch": 0.11, "grad_norm": 8.033248901367188, "learning_rate": 7.5949367088607605e-06, "loss": 0.3687, "step": 60 }, { "epoch": 0.12, "grad_norm": 8.854182243347168, "learning_rate": 7.721518987341773e-06, "loss": 0.2485, "step": 61 }, { "epoch": 0.12, "grad_norm": 9.724520683288574, "learning_rate": 7.848101265822786e-06, "loss": 0.3384, "step": 62 }, { "epoch": 0.12, "grad_norm": 7.28740119934082, "learning_rate": 7.974683544303799e-06, "loss": 0.2338, "step": 63 }, { "epoch": 0.12, "grad_norm": 7.621365547180176, "learning_rate": 8.10126582278481e-06, "loss": 0.284, "step": 64 }, { "epoch": 0.12, "grad_norm": 9.283262252807617, "learning_rate": 8.227848101265824e-06, "loss": 0.2839, "step": 65 }, { "epoch": 0.13, "grad_norm": 8.825998306274414, "learning_rate": 8.354430379746837e-06, "loss": 0.2715, "step": 66 }, { "epoch": 0.13, "grad_norm": 9.356891632080078, "learning_rate": 8.481012658227848e-06, "loss": 0.3022, "step": 67 }, { "epoch": 0.13, "grad_norm": 7.697936534881592, "learning_rate": 8.607594936708861e-06, "loss": 0.2054, "step": 68 }, { "epoch": 0.13, "grad_norm": 8.605691909790039, "learning_rate": 8.734177215189874e-06, "loss": 0.3532, "step": 69 }, { "epoch": 0.13, "grad_norm": 7.4132304191589355, "learning_rate": 8.860759493670886e-06, "loss": 0.261, "step": 70 }, { "epoch": 0.13, "grad_norm": 9.916158676147461, "learning_rate": 8.987341772151899e-06, "loss": 0.3533, "step": 71 }, { "epoch": 0.14, "grad_norm": 9.933509826660156, "learning_rate": 9.113924050632912e-06, "loss": 0.3511, "step": 72 }, { "epoch": 0.14, "grad_norm": 8.82899284362793, "learning_rate": 9.240506329113925e-06, "loss": 0.3585, "step": 73 }, { "epoch": 0.14, "grad_norm": 8.161395072937012, "learning_rate": 9.367088607594937e-06, "loss": 0.343, "step": 74 }, { "epoch": 0.14, "grad_norm": 8.669229507446289, "learning_rate": 9.49367088607595e-06, "loss": 0.3558, "step": 75 }, { "epoch": 0.14, "grad_norm": 8.07867431640625, "learning_rate": 9.620253164556963e-06, "loss": 0.3399, "step": 76 }, { "epoch": 0.15, "grad_norm": 8.1488037109375, "learning_rate": 9.746835443037975e-06, "loss": 0.3527, "step": 77 }, { "epoch": 0.15, "grad_norm": 7.531495094299316, "learning_rate": 9.87341772151899e-06, "loss": 0.3461, "step": 78 }, { "epoch": 0.15, "grad_norm": 8.746057510375977, "learning_rate": 1e-05, "loss": 0.4467, "step": 79 }, { "epoch": 0.15, "grad_norm": 8.177553176879883, "learning_rate": 1.0126582278481014e-05, "loss": 0.3817, "step": 80 }, { "epoch": 0.15, "grad_norm": 8.582009315490723, "learning_rate": 1.0253164556962025e-05, "loss": 0.487, "step": 81 }, { "epoch": 0.16, "grad_norm": 8.806139945983887, "learning_rate": 1.0379746835443039e-05, "loss": 0.3465, "step": 82 }, { "epoch": 0.16, "grad_norm": 8.646577835083008, "learning_rate": 1.0506329113924052e-05, "loss": 0.4514, "step": 83 }, { "epoch": 0.16, "grad_norm": 7.7704339027404785, "learning_rate": 1.0632911392405063e-05, "loss": 0.3642, "step": 84 }, { "epoch": 0.16, "grad_norm": 9.139016151428223, "learning_rate": 1.0759493670886076e-05, "loss": 0.4105, "step": 85 }, { "epoch": 0.16, "grad_norm": 8.591004371643066, "learning_rate": 1.088607594936709e-05, "loss": 0.4216, "step": 86 }, { "epoch": 0.17, "grad_norm": 8.614941596984863, "learning_rate": 1.1012658227848103e-05, "loss": 0.4534, "step": 87 }, { "epoch": 0.17, "grad_norm": 7.824285984039307, "learning_rate": 1.1139240506329114e-05, "loss": 0.4152, "step": 88 }, { "epoch": 0.17, "grad_norm": 7.805992126464844, "learning_rate": 1.1265822784810127e-05, "loss": 0.3464, "step": 89 }, { "epoch": 0.17, "grad_norm": 8.54703140258789, "learning_rate": 1.139240506329114e-05, "loss": 0.5527, "step": 90 }, { "epoch": 0.17, "grad_norm": 7.4142842292785645, "learning_rate": 1.1518987341772152e-05, "loss": 0.408, "step": 91 }, { "epoch": 0.17, "grad_norm": 8.580718040466309, "learning_rate": 1.1645569620253165e-05, "loss": 0.3852, "step": 92 }, { "epoch": 0.18, "grad_norm": 7.746488571166992, "learning_rate": 1.1772151898734178e-05, "loss": 0.423, "step": 93 }, { "epoch": 0.18, "grad_norm": 8.523046493530273, "learning_rate": 1.189873417721519e-05, "loss": 0.4746, "step": 94 }, { "epoch": 0.18, "grad_norm": 8.797911643981934, "learning_rate": 1.2025316455696203e-05, "loss": 0.4246, "step": 95 }, { "epoch": 0.18, "grad_norm": 7.842344760894775, "learning_rate": 1.2151898734177216e-05, "loss": 0.4437, "step": 96 }, { "epoch": 0.18, "grad_norm": 8.411223411560059, "learning_rate": 1.227848101265823e-05, "loss": 0.4489, "step": 97 }, { "epoch": 0.19, "grad_norm": 8.294234275817871, "learning_rate": 1.240506329113924e-05, "loss": 0.4565, "step": 98 }, { "epoch": 0.19, "grad_norm": 8.5967435836792, "learning_rate": 1.2531645569620255e-05, "loss": 0.439, "step": 99 }, { "epoch": 0.19, "grad_norm": 8.397130966186523, "learning_rate": 1.2658227848101268e-05, "loss": 0.4572, "step": 100 }, { "epoch": 0.19, "eval_blimp_filtered_avg": 0.7285074626865672, "eval_blimp_filtered_std": 0.004910273855590704, "step": 100 }, { "epoch": 0.19, "eval_blimp_supplement_avg": 0.8038793103448276, "eval_blimp_supplement_std": 0.01723648076212719, "step": 100 }, { "epoch": 0.19, "eval_vqa_filtered_avg": 0.32, "eval_vqa_filtered_std": 0.04688261722621505, "step": 100 }, { "epoch": 0.19, "eval_winoground_filtered_avg": 0.51, "eval_winoground_filtered_std": 0.05024183937956912, "step": 100 }, { "epoch": 0.19, "grad_norm": 7.143470287322998, "learning_rate": 1.2784810126582278e-05, "loss": 0.3683, "step": 101 }, { "epoch": 0.19, "grad_norm": 8.535003662109375, "learning_rate": 1.2911392405063293e-05, "loss": 0.4021, "step": 102 }, { "epoch": 0.2, "grad_norm": 8.642658233642578, "learning_rate": 1.3037974683544306e-05, "loss": 0.4489, "step": 103 }, { "epoch": 0.2, "grad_norm": 8.19752025604248, "learning_rate": 1.3164556962025317e-05, "loss": 0.5054, "step": 104 }, { "epoch": 0.2, "grad_norm": 8.63571548461914, "learning_rate": 1.329113924050633e-05, "loss": 0.5524, "step": 105 }, { "epoch": 0.2, "grad_norm": 8.052386283874512, "learning_rate": 1.3417721518987344e-05, "loss": 0.5231, "step": 106 }, { "epoch": 0.2, "grad_norm": 6.74711799621582, "learning_rate": 1.3544303797468355e-05, "loss": 0.4193, "step": 107 }, { "epoch": 0.21, "grad_norm": 7.658453464508057, "learning_rate": 1.3670886075949368e-05, "loss": 0.5991, "step": 108 }, { "epoch": 0.21, "grad_norm": 7.337573051452637, "learning_rate": 1.3797468354430381e-05, "loss": 0.4844, "step": 109 }, { "epoch": 0.21, "grad_norm": 7.249210834503174, "learning_rate": 1.3924050632911395e-05, "loss": 0.4778, "step": 110 }, { "epoch": 0.21, "grad_norm": 7.00091552734375, "learning_rate": 1.4050632911392406e-05, "loss": 0.4629, "step": 111 }, { "epoch": 0.21, "grad_norm": 7.1031575202941895, "learning_rate": 1.417721518987342e-05, "loss": 0.411, "step": 112 }, { "epoch": 0.21, "grad_norm": 8.36849594116211, "learning_rate": 1.4303797468354432e-05, "loss": 0.4735, "step": 113 }, { "epoch": 0.22, "grad_norm": 7.817984580993652, "learning_rate": 1.4430379746835444e-05, "loss": 0.4219, "step": 114 }, { "epoch": 0.22, "grad_norm": 8.001487731933594, "learning_rate": 1.4556962025316457e-05, "loss": 0.4068, "step": 115 }, { "epoch": 0.22, "grad_norm": 8.72296142578125, "learning_rate": 1.468354430379747e-05, "loss": 0.5771, "step": 116 }, { "epoch": 0.22, "grad_norm": 7.332859039306641, "learning_rate": 1.4810126582278482e-05, "loss": 0.4084, "step": 117 }, { "epoch": 0.22, "grad_norm": 7.034232139587402, "learning_rate": 1.4936708860759495e-05, "loss": 0.4198, "step": 118 }, { "epoch": 0.23, "grad_norm": 7.014504432678223, "learning_rate": 1.5063291139240508e-05, "loss": 0.421, "step": 119 }, { "epoch": 0.23, "grad_norm": 6.548791885375977, "learning_rate": 1.5189873417721521e-05, "loss": 0.4012, "step": 120 }, { "epoch": 0.23, "grad_norm": 8.004155158996582, "learning_rate": 1.531645569620253e-05, "loss": 0.5149, "step": 121 }, { "epoch": 0.23, "grad_norm": 7.362954139709473, "learning_rate": 1.5443037974683546e-05, "loss": 0.46, "step": 122 }, { "epoch": 0.23, "grad_norm": 6.969780445098877, "learning_rate": 1.556962025316456e-05, "loss": 0.4782, "step": 123 }, { "epoch": 0.24, "grad_norm": 7.22606086730957, "learning_rate": 1.5696202531645572e-05, "loss": 0.4488, "step": 124 }, { "epoch": 0.24, "grad_norm": 7.959131240844727, "learning_rate": 1.5822784810126583e-05, "loss": 0.5301, "step": 125 }, { "epoch": 0.24, "grad_norm": 8.145837783813477, "learning_rate": 1.5949367088607598e-05, "loss": 0.585, "step": 126 }, { "epoch": 0.24, "grad_norm": 8.720370292663574, "learning_rate": 1.607594936708861e-05, "loss": 0.5409, "step": 127 }, { "epoch": 0.24, "grad_norm": 7.756037712097168, "learning_rate": 1.620253164556962e-05, "loss": 0.4977, "step": 128 }, { "epoch": 0.25, "grad_norm": 7.202569007873535, "learning_rate": 1.6329113924050636e-05, "loss": 0.4927, "step": 129 }, { "epoch": 0.25, "grad_norm": 7.173158645629883, "learning_rate": 1.6455696202531647e-05, "loss": 0.5295, "step": 130 }, { "epoch": 0.25, "grad_norm": 6.526828765869141, "learning_rate": 1.658227848101266e-05, "loss": 0.4473, "step": 131 }, { "epoch": 0.25, "grad_norm": 7.065744876861572, "learning_rate": 1.6708860759493674e-05, "loss": 0.538, "step": 132 }, { "epoch": 0.25, "grad_norm": 6.284942150115967, "learning_rate": 1.6835443037974685e-05, "loss": 0.4432, "step": 133 }, { "epoch": 0.25, "grad_norm": 6.785105228424072, "learning_rate": 1.6962025316455696e-05, "loss": 0.4613, "step": 134 }, { "epoch": 0.26, "grad_norm": 7.583116054534912, "learning_rate": 1.708860759493671e-05, "loss": 0.494, "step": 135 }, { "epoch": 0.26, "grad_norm": 6.013336658477783, "learning_rate": 1.7215189873417723e-05, "loss": 0.3808, "step": 136 }, { "epoch": 0.26, "grad_norm": 8.11418628692627, "learning_rate": 1.7341772151898734e-05, "loss": 0.5029, "step": 137 }, { "epoch": 0.26, "grad_norm": 6.614815711975098, "learning_rate": 1.746835443037975e-05, "loss": 0.4234, "step": 138 }, { "epoch": 0.26, "grad_norm": 6.577409744262695, "learning_rate": 1.759493670886076e-05, "loss": 0.4305, "step": 139 }, { "epoch": 0.27, "grad_norm": 6.5638346672058105, "learning_rate": 1.7721518987341772e-05, "loss": 0.4287, "step": 140 }, { "epoch": 0.27, "grad_norm": 7.177755832672119, "learning_rate": 1.7848101265822787e-05, "loss": 0.4893, "step": 141 }, { "epoch": 0.27, "grad_norm": 5.189929008483887, "learning_rate": 1.7974683544303798e-05, "loss": 0.3607, "step": 142 }, { "epoch": 0.27, "grad_norm": 6.592679500579834, "learning_rate": 1.8101265822784813e-05, "loss": 0.5059, "step": 143 }, { "epoch": 0.27, "grad_norm": 7.330121994018555, "learning_rate": 1.8227848101265824e-05, "loss": 0.4452, "step": 144 }, { "epoch": 0.28, "grad_norm": 6.300533294677734, "learning_rate": 1.8354430379746836e-05, "loss": 0.4297, "step": 145 }, { "epoch": 0.28, "grad_norm": 6.569092750549316, "learning_rate": 1.848101265822785e-05, "loss": 0.4895, "step": 146 }, { "epoch": 0.28, "grad_norm": 6.288923263549805, "learning_rate": 1.8607594936708862e-05, "loss": 0.3853, "step": 147 }, { "epoch": 0.28, "grad_norm": 6.512166500091553, "learning_rate": 1.8734177215189874e-05, "loss": 0.4028, "step": 148 }, { "epoch": 0.28, "grad_norm": 5.949954986572266, "learning_rate": 1.886075949367089e-05, "loss": 0.4528, "step": 149 }, { "epoch": 0.29, "grad_norm": 6.859171390533447, "learning_rate": 1.89873417721519e-05, "loss": 0.4944, "step": 150 }, { "epoch": 0.29, "grad_norm": 6.8777971267700195, "learning_rate": 1.911392405063291e-05, "loss": 0.4796, "step": 151 }, { "epoch": 0.29, "grad_norm": 6.574897766113281, "learning_rate": 1.9240506329113926e-05, "loss": 0.4843, "step": 152 }, { "epoch": 0.29, "grad_norm": 6.989874839782715, "learning_rate": 1.936708860759494e-05, "loss": 0.4309, "step": 153 }, { "epoch": 0.29, "grad_norm": 5.6489458084106445, "learning_rate": 1.949367088607595e-05, "loss": 0.3653, "step": 154 }, { "epoch": 0.29, "grad_norm": 5.906219482421875, "learning_rate": 1.9620253164556964e-05, "loss": 0.463, "step": 155 }, { "epoch": 0.3, "grad_norm": 6.560126304626465, "learning_rate": 1.974683544303798e-05, "loss": 0.447, "step": 156 }, { "epoch": 0.3, "grad_norm": 6.210864543914795, "learning_rate": 1.9873417721518987e-05, "loss": 0.3471, "step": 157 }, { "epoch": 0.3, "grad_norm": 6.949997425079346, "learning_rate": 2e-05, "loss": 0.4573, "step": 158 }, { "epoch": 0.3, "grad_norm": 6.795788764953613, "learning_rate": 1.9999998104216118e-05, "loss": 0.565, "step": 159 }, { "epoch": 0.3, "grad_norm": 5.916255474090576, "learning_rate": 1.9999992416865178e-05, "loss": 0.3889, "step": 160 }, { "epoch": 0.31, "grad_norm": 5.725336074829102, "learning_rate": 1.999998293794934e-05, "loss": 0.3326, "step": 161 }, { "epoch": 0.31, "grad_norm": 5.384416103363037, "learning_rate": 1.9999969667472205e-05, "loss": 0.4047, "step": 162 }, { "epoch": 0.31, "grad_norm": 5.758926868438721, "learning_rate": 1.9999952605438795e-05, "loss": 0.4176, "step": 163 }, { "epoch": 0.31, "grad_norm": 6.0324506759643555, "learning_rate": 1.9999931751855585e-05, "loss": 0.3851, "step": 164 }, { "epoch": 0.31, "grad_norm": 5.878495693206787, "learning_rate": 1.999990710673048e-05, "loss": 0.4522, "step": 165 }, { "epoch": 0.32, "grad_norm": 7.225064277648926, "learning_rate": 1.999987867007282e-05, "loss": 0.5251, "step": 166 }, { "epoch": 0.32, "grad_norm": 5.815621376037598, "learning_rate": 1.99998464418934e-05, "loss": 0.4508, "step": 167 }, { "epoch": 0.32, "grad_norm": 6.609476089477539, "learning_rate": 1.999981042220442e-05, "loss": 0.4953, "step": 168 }, { "epoch": 0.32, "grad_norm": 5.439825534820557, "learning_rate": 1.9999770611019555e-05, "loss": 0.4133, "step": 169 }, { "epoch": 0.32, "grad_norm": 5.363547325134277, "learning_rate": 1.999972700835389e-05, "loss": 0.4313, "step": 170 }, { "epoch": 0.33, "grad_norm": 5.519766807556152, "learning_rate": 1.9999679614223963e-05, "loss": 0.391, "step": 171 }, { "epoch": 0.33, "grad_norm": 5.298546314239502, "learning_rate": 1.9999628428647736e-05, "loss": 0.3421, "step": 172 }, { "epoch": 0.33, "grad_norm": 4.748227596282959, "learning_rate": 1.999957345164463e-05, "loss": 0.349, "step": 173 }, { "epoch": 0.33, "grad_norm": 6.7254252433776855, "learning_rate": 1.999951468323547e-05, "loss": 0.5158, "step": 174 }, { "epoch": 0.33, "grad_norm": 6.016841411590576, "learning_rate": 1.9999452123442556e-05, "loss": 0.4355, "step": 175 }, { "epoch": 0.33, "grad_norm": 6.009034633636475, "learning_rate": 1.99993857722896e-05, "loss": 0.5227, "step": 176 }, { "epoch": 0.34, "grad_norm": 5.370485782623291, "learning_rate": 1.9999315629801757e-05, "loss": 0.358, "step": 177 }, { "epoch": 0.34, "grad_norm": 5.226022243499756, "learning_rate": 1.999924169600563e-05, "loss": 0.4559, "step": 178 }, { "epoch": 0.34, "grad_norm": 5.5289411544799805, "learning_rate": 1.9999163970929248e-05, "loss": 0.4885, "step": 179 }, { "epoch": 0.34, "grad_norm": 4.926865577697754, "learning_rate": 1.999908245460208e-05, "loss": 0.4251, "step": 180 }, { "epoch": 0.34, "grad_norm": 5.9696431159973145, "learning_rate": 1.999899714705503e-05, "loss": 0.5481, "step": 181 }, { "epoch": 0.35, "grad_norm": 5.020255088806152, "learning_rate": 1.999890804832045e-05, "loss": 0.4829, "step": 182 }, { "epoch": 0.35, "grad_norm": 5.2074055671691895, "learning_rate": 1.999881515843212e-05, "loss": 0.4068, "step": 183 }, { "epoch": 0.35, "grad_norm": 4.671884059906006, "learning_rate": 1.9998718477425257e-05, "loss": 0.3535, "step": 184 }, { "epoch": 0.35, "grad_norm": 4.821014881134033, "learning_rate": 1.9998618005336522e-05, "loss": 0.3483, "step": 185 }, { "epoch": 0.35, "grad_norm": 5.817379474639893, "learning_rate": 1.9998513742204005e-05, "loss": 0.3669, "step": 186 }, { "epoch": 0.36, "grad_norm": 5.313624382019043, "learning_rate": 1.999840568806724e-05, "loss": 0.4527, "step": 187 }, { "epoch": 0.36, "grad_norm": 4.717206954956055, "learning_rate": 1.99982938429672e-05, "loss": 0.3479, "step": 188 }, { "epoch": 0.36, "grad_norm": 7.582536220550537, "learning_rate": 1.999817820694629e-05, "loss": 0.466, "step": 189 }, { "epoch": 0.36, "grad_norm": 5.469146251678467, "learning_rate": 1.999805878004835e-05, "loss": 0.4212, "step": 190 }, { "epoch": 0.36, "grad_norm": 5.7150139808654785, "learning_rate": 1.9997935562318668e-05, "loss": 0.366, "step": 191 }, { "epoch": 0.37, "grad_norm": 4.708168983459473, "learning_rate": 1.999780855380396e-05, "loss": 0.3328, "step": 192 }, { "epoch": 0.37, "grad_norm": 5.554585933685303, "learning_rate": 1.9997677754552383e-05, "loss": 0.4282, "step": 193 }, { "epoch": 0.37, "grad_norm": 4.4782562255859375, "learning_rate": 1.9997543164613525e-05, "loss": 0.3081, "step": 194 }, { "epoch": 0.37, "grad_norm": 4.7484130859375, "learning_rate": 1.9997404784038426e-05, "loss": 0.3135, "step": 195 }, { "epoch": 0.37, "grad_norm": 5.375766277313232, "learning_rate": 1.9997262612879544e-05, "loss": 0.3431, "step": 196 }, { "epoch": 0.37, "grad_norm": 5.190089225769043, "learning_rate": 1.9997116651190794e-05, "loss": 0.3516, "step": 197 }, { "epoch": 0.38, "grad_norm": 5.3348708152771, "learning_rate": 1.999696689902751e-05, "loss": 0.4358, "step": 198 }, { "epoch": 0.38, "grad_norm": 5.199974060058594, "learning_rate": 1.9996813356446477e-05, "loss": 0.3763, "step": 199 }, { "epoch": 0.38, "grad_norm": 5.626410961151123, "learning_rate": 1.9996656023505907e-05, "loss": 0.3324, "step": 200 }, { "epoch": 0.38, "eval_blimp_filtered_avg": 0.7344776119402985, "eval_blimp_filtered_std": 0.004834488986889231, "step": 200 }, { "epoch": 0.38, "eval_blimp_supplement_avg": 0.8038793103448276, "eval_blimp_supplement_std": 0.017265484895378016, "step": 200 }, { "epoch": 0.38, "eval_vqa_filtered_avg": 0.42, "eval_vqa_filtered_std": 0.049604496374885836, "step": 200 }, { "epoch": 0.38, "eval_winoground_filtered_avg": 0.55, "eval_winoground_filtered_std": 0.05, "step": 200 }, { "epoch": 0.38, "grad_norm": 5.524914741516113, "learning_rate": 1.999649490026546e-05, "loss": 0.383, "step": 201 }, { "epoch": 0.38, "grad_norm": 4.9166975021362305, "learning_rate": 1.999632998678622e-05, "loss": 0.2595, "step": 202 }, { "epoch": 0.39, "grad_norm": 5.417913436889648, "learning_rate": 1.999616128313072e-05, "loss": 0.3786, "step": 203 }, { "epoch": 0.39, "grad_norm": 5.277467727661133, "learning_rate": 1.9995988789362926e-05, "loss": 0.3962, "step": 204 }, { "epoch": 0.39, "grad_norm": 6.196109294891357, "learning_rate": 1.9995812505548235e-05, "loss": 0.4754, "step": 205 }, { "epoch": 0.39, "grad_norm": 4.8850860595703125, "learning_rate": 1.9995632431753493e-05, "loss": 0.3481, "step": 206 }, { "epoch": 0.39, "grad_norm": 4.816228866577148, "learning_rate": 1.999544856804697e-05, "loss": 0.3128, "step": 207 }, { "epoch": 0.4, "grad_norm": 5.116236686706543, "learning_rate": 1.999526091449838e-05, "loss": 0.3317, "step": 208 }, { "epoch": 0.4, "grad_norm": 4.8913655281066895, "learning_rate": 1.9995069471178875e-05, "loss": 0.3706, "step": 209 }, { "epoch": 0.4, "grad_norm": 4.422940731048584, "learning_rate": 1.9994874238161043e-05, "loss": 0.3679, "step": 210 }, { "epoch": 0.4, "grad_norm": 5.0612711906433105, "learning_rate": 1.9994675215518902e-05, "loss": 0.3863, "step": 211 }, { "epoch": 0.4, "grad_norm": 3.7714264392852783, "learning_rate": 1.9994472403327924e-05, "loss": 0.3063, "step": 212 }, { "epoch": 0.4, "grad_norm": 4.188554763793945, "learning_rate": 1.9994265801664995e-05, "loss": 0.2666, "step": 213 }, { "epoch": 0.41, "grad_norm": 4.179845333099365, "learning_rate": 1.999405541060846e-05, "loss": 0.3009, "step": 214 }, { "epoch": 0.41, "grad_norm": 4.937391757965088, "learning_rate": 1.999384123023808e-05, "loss": 0.3736, "step": 215 }, { "epoch": 0.41, "grad_norm": 5.369380474090576, "learning_rate": 1.9993623260635068e-05, "loss": 0.2944, "step": 216 }, { "epoch": 0.41, "grad_norm": 5.395383834838867, "learning_rate": 1.999340150188207e-05, "loss": 0.333, "step": 217 }, { "epoch": 0.41, "grad_norm": 4.934708118438721, "learning_rate": 1.9993175954063165e-05, "loss": 0.3449, "step": 218 }, { "epoch": 0.42, "grad_norm": 4.773044586181641, "learning_rate": 1.999294661726387e-05, "loss": 0.296, "step": 219 }, { "epoch": 0.42, "grad_norm": 5.016208171844482, "learning_rate": 1.9992713491571144e-05, "loss": 0.3251, "step": 220 }, { "epoch": 0.42, "grad_norm": 4.217487812042236, "learning_rate": 1.9992476577073372e-05, "loss": 0.2614, "step": 221 }, { "epoch": 0.42, "grad_norm": 5.025454044342041, "learning_rate": 1.999223587386039e-05, "loss": 0.3418, "step": 222 }, { "epoch": 0.42, "grad_norm": 5.24716329574585, "learning_rate": 1.999199138202345e-05, "loss": 0.4165, "step": 223 }, { "epoch": 0.43, "grad_norm": 4.2792582511901855, "learning_rate": 1.9991743101655265e-05, "loss": 0.3438, "step": 224 }, { "epoch": 0.43, "grad_norm": 3.797445774078369, "learning_rate": 1.9991491032849962e-05, "loss": 0.239, "step": 225 }, { "epoch": 0.43, "grad_norm": 4.5433735847473145, "learning_rate": 1.9991235175703126e-05, "loss": 0.3694, "step": 226 }, { "epoch": 0.43, "grad_norm": 4.721287727355957, "learning_rate": 1.999097553031176e-05, "loss": 0.2675, "step": 227 }, { "epoch": 0.43, "grad_norm": 4.8156561851501465, "learning_rate": 1.9990712096774308e-05, "loss": 0.308, "step": 228 }, { "epoch": 0.44, "grad_norm": 4.169462203979492, "learning_rate": 1.9990444875190658e-05, "loss": 0.3005, "step": 229 }, { "epoch": 0.44, "grad_norm": 4.897852420806885, "learning_rate": 1.9990173865662126e-05, "loss": 0.3491, "step": 230 }, { "epoch": 0.44, "grad_norm": 4.115499019622803, "learning_rate": 1.9989899068291466e-05, "loss": 0.2841, "step": 231 }, { "epoch": 0.44, "grad_norm": 4.5129594802856445, "learning_rate": 1.9989620483182874e-05, "loss": 0.2853, "step": 232 }, { "epoch": 0.44, "grad_norm": 4.081862449645996, "learning_rate": 1.9989338110441972e-05, "loss": 0.3264, "step": 233 }, { "epoch": 0.44, "grad_norm": 4.213527202606201, "learning_rate": 1.9989051950175828e-05, "loss": 0.2384, "step": 234 }, { "epoch": 0.45, "grad_norm": 5.283787250518799, "learning_rate": 1.998876200249294e-05, "loss": 0.2906, "step": 235 }, { "epoch": 0.45, "grad_norm": 4.278689861297607, "learning_rate": 1.998846826750324e-05, "loss": 0.2827, "step": 236 }, { "epoch": 0.45, "grad_norm": 4.934478282928467, "learning_rate": 1.9988170745318104e-05, "loss": 0.3404, "step": 237 }, { "epoch": 0.45, "grad_norm": 3.637840986251831, "learning_rate": 1.998786943605034e-05, "loss": 0.2881, "step": 238 }, { "epoch": 0.45, "grad_norm": 4.169079303741455, "learning_rate": 1.9987564339814185e-05, "loss": 0.267, "step": 239 }, { "epoch": 0.46, "grad_norm": 4.209916591644287, "learning_rate": 1.9987255456725326e-05, "loss": 0.2561, "step": 240 }, { "epoch": 0.46, "grad_norm": 5.267224311828613, "learning_rate": 1.9986942786900877e-05, "loss": 0.3752, "step": 241 }, { "epoch": 0.46, "grad_norm": 3.7737863063812256, "learning_rate": 1.9986626330459385e-05, "loss": 0.2942, "step": 242 }, { "epoch": 0.46, "grad_norm": 4.861094951629639, "learning_rate": 1.998630608752084e-05, "loss": 0.3573, "step": 243 }, { "epoch": 0.46, "grad_norm": 4.326540470123291, "learning_rate": 1.998598205820666e-05, "loss": 0.2728, "step": 244 }, { "epoch": 0.47, "grad_norm": 4.328089237213135, "learning_rate": 1.9985654242639708e-05, "loss": 0.2597, "step": 245 }, { "epoch": 0.47, "grad_norm": 4.542089939117432, "learning_rate": 1.9985322640944278e-05, "loss": 0.2922, "step": 246 }, { "epoch": 0.47, "grad_norm": 4.249187469482422, "learning_rate": 1.998498725324609e-05, "loss": 0.292, "step": 247 }, { "epoch": 0.47, "grad_norm": 4.852876663208008, "learning_rate": 1.9984648079672322e-05, "loss": 0.3847, "step": 248 }, { "epoch": 0.47, "grad_norm": 4.52667236328125, "learning_rate": 1.9984305120351562e-05, "loss": 0.3369, "step": 249 }, { "epoch": 0.48, "grad_norm": 4.062838554382324, "learning_rate": 1.998395837541385e-05, "loss": 0.2813, "step": 250 }, { "epoch": 0.48, "grad_norm": 4.058770656585693, "learning_rate": 1.998360784499066e-05, "loss": 0.2681, "step": 251 }, { "epoch": 0.48, "grad_norm": 4.274977207183838, "learning_rate": 1.9983253529214893e-05, "loss": 0.2924, "step": 252 }, { "epoch": 0.48, "grad_norm": 4.264193534851074, "learning_rate": 1.9982895428220896e-05, "loss": 0.2982, "step": 253 }, { "epoch": 0.48, "grad_norm": 4.792800426483154, "learning_rate": 1.9982533542144438e-05, "loss": 0.332, "step": 254 }, { "epoch": 0.48, "grad_norm": 3.7139153480529785, "learning_rate": 1.9982167871122733e-05, "loss": 0.2678, "step": 255 }, { "epoch": 0.49, "grad_norm": 4.893666744232178, "learning_rate": 1.998179841529443e-05, "loss": 0.3001, "step": 256 }, { "epoch": 0.49, "grad_norm": 5.056347370147705, "learning_rate": 1.9981425174799607e-05, "loss": 0.3154, "step": 257 }, { "epoch": 0.49, "grad_norm": 5.7921319007873535, "learning_rate": 1.9981048149779788e-05, "loss": 0.3674, "step": 258 }, { "epoch": 0.49, "grad_norm": 5.570481777191162, "learning_rate": 1.9980667340377916e-05, "loss": 0.2871, "step": 259 }, { "epoch": 0.49, "grad_norm": 4.4465107917785645, "learning_rate": 1.9980282746738385e-05, "loss": 0.3479, "step": 260 }, { "epoch": 0.5, "grad_norm": 4.204236030578613, "learning_rate": 1.997989436900701e-05, "loss": 0.2366, "step": 261 }, { "epoch": 0.5, "grad_norm": 5.011067867279053, "learning_rate": 1.9979502207331046e-05, "loss": 0.3267, "step": 262 }, { "epoch": 0.5, "grad_norm": 7.189114093780518, "learning_rate": 1.9979106261859195e-05, "loss": 0.2957, "step": 263 }, { "epoch": 0.5, "grad_norm": 3.916778326034546, "learning_rate": 1.997870653274157e-05, "loss": 0.284, "step": 264 }, { "epoch": 0.5, "grad_norm": 4.392454147338867, "learning_rate": 1.9978303020129734e-05, "loss": 0.2944, "step": 265 }, { "epoch": 0.51, "grad_norm": 3.857675790786743, "learning_rate": 1.9977895724176687e-05, "loss": 0.2427, "step": 266 }, { "epoch": 0.51, "grad_norm": 3.855292320251465, "learning_rate": 1.9977484645036853e-05, "loss": 0.3301, "step": 267 }, { "epoch": 0.51, "grad_norm": 4.313021659851074, "learning_rate": 1.9977069782866096e-05, "loss": 0.3851, "step": 268 }, { "epoch": 0.51, "grad_norm": 3.7359249591827393, "learning_rate": 1.997665113782171e-05, "loss": 0.2093, "step": 269 }, { "epoch": 0.51, "grad_norm": 4.3216471672058105, "learning_rate": 1.997622871006244e-05, "loss": 0.2848, "step": 270 }, { "epoch": 0.52, "grad_norm": 4.780280113220215, "learning_rate": 1.9975802499748438e-05, "loss": 0.2817, "step": 271 }, { "epoch": 0.52, "grad_norm": 4.455266952514648, "learning_rate": 1.9975372507041314e-05, "loss": 0.2971, "step": 272 }, { "epoch": 0.52, "grad_norm": 3.7722551822662354, "learning_rate": 1.9974938732104096e-05, "loss": 0.2787, "step": 273 }, { "epoch": 0.52, "grad_norm": 4.7231268882751465, "learning_rate": 1.997450117510126e-05, "loss": 0.2852, "step": 274 }, { "epoch": 0.52, "grad_norm": 3.78767991065979, "learning_rate": 1.9974059836198698e-05, "loss": 0.2491, "step": 275 }, { "epoch": 0.52, "grad_norm": 4.469750881195068, "learning_rate": 1.9973614715563757e-05, "loss": 0.3171, "step": 276 }, { "epoch": 0.53, "grad_norm": 4.442530632019043, "learning_rate": 1.9973165813365205e-05, "loss": 0.2843, "step": 277 }, { "epoch": 0.53, "grad_norm": 3.5770297050476074, "learning_rate": 1.9972713129773242e-05, "loss": 0.2482, "step": 278 }, { "epoch": 0.53, "grad_norm": 4.077334880828857, "learning_rate": 1.9972256664959514e-05, "loss": 0.2497, "step": 279 }, { "epoch": 0.53, "grad_norm": 4.1382856369018555, "learning_rate": 1.9971796419097082e-05, "loss": 0.3072, "step": 280 }, { "epoch": 0.53, "grad_norm": 4.029958248138428, "learning_rate": 1.997133239236046e-05, "loss": 0.2727, "step": 281 }, { "epoch": 0.54, "grad_norm": 3.7956156730651855, "learning_rate": 1.997086458492558e-05, "loss": 0.2423, "step": 282 }, { "epoch": 0.54, "grad_norm": 3.580672264099121, "learning_rate": 1.9970392996969826e-05, "loss": 0.2168, "step": 283 }, { "epoch": 0.54, "grad_norm": 4.7634077072143555, "learning_rate": 1.996991762867199e-05, "loss": 0.3595, "step": 284 }, { "epoch": 0.54, "grad_norm": 4.852893829345703, "learning_rate": 1.996943848021232e-05, "loss": 0.2856, "step": 285 }, { "epoch": 0.54, "grad_norm": 4.841672420501709, "learning_rate": 1.9968955551772483e-05, "loss": 0.2827, "step": 286 }, { "epoch": 0.55, "grad_norm": 3.739478588104248, "learning_rate": 1.9968468843535592e-05, "loss": 0.2404, "step": 287 }, { "epoch": 0.55, "grad_norm": 6.25899076461792, "learning_rate": 1.9967978355686176e-05, "loss": 0.2722, "step": 288 }, { "epoch": 0.55, "grad_norm": 3.480301856994629, "learning_rate": 1.9967484088410214e-05, "loss": 0.2543, "step": 289 }, { "epoch": 0.55, "grad_norm": 4.299169540405273, "learning_rate": 1.9966986041895106e-05, "loss": 0.332, "step": 290 }, { "epoch": 0.55, "grad_norm": 3.571326971054077, "learning_rate": 1.9966484216329696e-05, "loss": 0.1847, "step": 291 }, { "epoch": 0.56, "grad_norm": 4.099297523498535, "learning_rate": 1.996597861190425e-05, "loss": 0.2519, "step": 292 }, { "epoch": 0.56, "grad_norm": 4.418753623962402, "learning_rate": 1.996546922881047e-05, "loss": 0.3101, "step": 293 }, { "epoch": 0.56, "grad_norm": 4.899559020996094, "learning_rate": 1.9964956067241493e-05, "loss": 0.375, "step": 294 }, { "epoch": 0.56, "grad_norm": 3.362171173095703, "learning_rate": 1.996443912739189e-05, "loss": 0.2105, "step": 295 }, { "epoch": 0.56, "grad_norm": 3.4521994590759277, "learning_rate": 1.996391840945766e-05, "loss": 0.2447, "step": 296 }, { "epoch": 0.56, "grad_norm": 3.146463632583618, "learning_rate": 1.9963393913636242e-05, "loss": 0.2632, "step": 297 }, { "epoch": 0.57, "grad_norm": 3.9787914752960205, "learning_rate": 1.9962865640126497e-05, "loss": 0.2758, "step": 298 }, { "epoch": 0.57, "grad_norm": 2.852154493331909, "learning_rate": 1.9962333589128722e-05, "loss": 0.1546, "step": 299 }, { "epoch": 0.57, "grad_norm": 4.997208595275879, "learning_rate": 1.996179776084465e-05, "loss": 0.3144, "step": 300 }, { "epoch": 0.57, "eval_blimp_filtered_avg": 0.7370149253731343, "eval_blimp_filtered_std": 0.00485193481826363, "step": 300 }, { "epoch": 0.57, "eval_blimp_supplement_avg": 0.8060344827586207, "eval_blimp_supplement_std": 0.017224218015252448, "step": 300 }, { "epoch": 0.57, "eval_vqa_filtered_avg": 0.29, "eval_vqa_filtered_std": 0.045604802157206845, "step": 300 }, { "epoch": 0.57, "eval_winoground_filtered_avg": 0.45, "eval_winoground_filtered_std": 0.05, "step": 300 }, { "epoch": 0.57, "grad_norm": 3.778585910797119, "learning_rate": 1.9961258155477448e-05, "loss": 0.2561, "step": 301 }, { "epoch": 0.57, "grad_norm": 4.938632488250732, "learning_rate": 1.9960714773231704e-05, "loss": 0.3654, "step": 302 }, { "epoch": 0.58, "grad_norm": 4.031169891357422, "learning_rate": 1.996016761431345e-05, "loss": 0.3074, "step": 303 }, { "epoch": 0.58, "grad_norm": 4.3776326179504395, "learning_rate": 1.995961667893014e-05, "loss": 0.2471, "step": 304 }, { "epoch": 0.58, "grad_norm": 4.33348274230957, "learning_rate": 1.995906196729067e-05, "loss": 0.3063, "step": 305 }, { "epoch": 0.58, "grad_norm": 3.5247442722320557, "learning_rate": 1.995850347960536e-05, "loss": 0.255, "step": 306 }, { "epoch": 0.58, "grad_norm": 4.742520332336426, "learning_rate": 1.9957941216085968e-05, "loss": 0.2679, "step": 307 }, { "epoch": 0.59, "grad_norm": 4.119320392608643, "learning_rate": 1.9957375176945675e-05, "loss": 0.2295, "step": 308 }, { "epoch": 0.59, "grad_norm": 3.5538218021392822, "learning_rate": 1.99568053623991e-05, "loss": 0.2511, "step": 309 }, { "epoch": 0.59, "grad_norm": 4.626170635223389, "learning_rate": 1.9956231772662294e-05, "loss": 0.3929, "step": 310 }, { "epoch": 0.59, "grad_norm": 3.11904239654541, "learning_rate": 1.9955654407952735e-05, "loss": 0.2049, "step": 311 }, { "epoch": 0.59, "grad_norm": 3.680948495864868, "learning_rate": 1.9955073268489337e-05, "loss": 0.2436, "step": 312 }, { "epoch": 0.6, "grad_norm": 3.5279901027679443, "learning_rate": 1.9954488354492444e-05, "loss": 0.2492, "step": 313 }, { "epoch": 0.6, "grad_norm": 3.551846981048584, "learning_rate": 1.9953899666183823e-05, "loss": 0.2278, "step": 314 }, { "epoch": 0.6, "grad_norm": 3.0324833393096924, "learning_rate": 1.9953307203786688e-05, "loss": 0.1719, "step": 315 }, { "epoch": 0.6, "grad_norm": 3.5696136951446533, "learning_rate": 1.995271096752567e-05, "loss": 0.2257, "step": 316 }, { "epoch": 0.6, "grad_norm": 3.117539882659912, "learning_rate": 1.995211095762684e-05, "loss": 0.1694, "step": 317 }, { "epoch": 0.6, "grad_norm": 3.6812808513641357, "learning_rate": 1.995150717431769e-05, "loss": 0.1929, "step": 318 }, { "epoch": 0.61, "grad_norm": 4.785617351531982, "learning_rate": 1.9950899617827156e-05, "loss": 0.2391, "step": 319 }, { "epoch": 0.61, "grad_norm": 4.80804967880249, "learning_rate": 1.995028828838559e-05, "loss": 0.4002, "step": 320 }, { "epoch": 0.61, "grad_norm": 3.639134168624878, "learning_rate": 1.9949673186224784e-05, "loss": 0.2951, "step": 321 }, { "epoch": 0.61, "grad_norm": 3.3305680751800537, "learning_rate": 1.994905431157796e-05, "loss": 0.1789, "step": 322 }, { "epoch": 0.61, "grad_norm": 4.09285831451416, "learning_rate": 1.9948431664679763e-05, "loss": 0.2832, "step": 323 }, { "epoch": 0.62, "grad_norm": 3.0009891986846924, "learning_rate": 1.9947805245766284e-05, "loss": 0.1502, "step": 324 }, { "epoch": 0.62, "grad_norm": 3.702785015106201, "learning_rate": 1.9947175055075023e-05, "loss": 0.297, "step": 325 }, { "epoch": 0.62, "grad_norm": 3.0659260749816895, "learning_rate": 1.994654109284493e-05, "loss": 0.2056, "step": 326 }, { "epoch": 0.62, "grad_norm": 3.76467227935791, "learning_rate": 1.994590335931637e-05, "loss": 0.2801, "step": 327 }, { "epoch": 0.62, "grad_norm": 3.600243330001831, "learning_rate": 1.994526185473115e-05, "loss": 0.2344, "step": 328 }, { "epoch": 0.63, "grad_norm": 3.2525148391723633, "learning_rate": 1.9944616579332493e-05, "loss": 0.2228, "step": 329 }, { "epoch": 0.63, "grad_norm": 2.7308640480041504, "learning_rate": 1.9943967533365063e-05, "loss": 0.1545, "step": 330 }, { "epoch": 0.63, "grad_norm": 3.5000762939453125, "learning_rate": 1.9943314717074953e-05, "loss": 0.1695, "step": 331 }, { "epoch": 0.63, "grad_norm": 4.224757194519043, "learning_rate": 1.994265813070968e-05, "loss": 0.2561, "step": 332 }, { "epoch": 0.63, "grad_norm": 3.927216053009033, "learning_rate": 1.9941997774518194e-05, "loss": 0.2667, "step": 333 }, { "epoch": 0.63, "grad_norm": 3.6394364833831787, "learning_rate": 1.994133364875087e-05, "loss": 0.2042, "step": 334 }, { "epoch": 0.64, "grad_norm": 3.411041259765625, "learning_rate": 1.9940665753659523e-05, "loss": 0.1923, "step": 335 }, { "epoch": 0.64, "grad_norm": 3.71993088722229, "learning_rate": 1.9939994089497382e-05, "loss": 0.2379, "step": 336 }, { "epoch": 0.64, "grad_norm": 4.252372741699219, "learning_rate": 1.993931865651912e-05, "loss": 0.2927, "step": 337 }, { "epoch": 0.64, "grad_norm": 3.1608121395111084, "learning_rate": 1.993863945498083e-05, "loss": 0.1838, "step": 338 }, { "epoch": 0.64, "grad_norm": 3.5801095962524414, "learning_rate": 1.9937956485140032e-05, "loss": 0.2117, "step": 339 }, { "epoch": 0.65, "grad_norm": 3.596424102783203, "learning_rate": 1.993726974725568e-05, "loss": 0.2663, "step": 340 }, { "epoch": 0.65, "grad_norm": 3.5700104236602783, "learning_rate": 1.993657924158816e-05, "loss": 0.2228, "step": 341 }, { "epoch": 0.65, "grad_norm": 3.109243631362915, "learning_rate": 1.9935884968399277e-05, "loss": 0.1725, "step": 342 }, { "epoch": 0.65, "grad_norm": 3.5466387271881104, "learning_rate": 1.993518692795227e-05, "loss": 0.2229, "step": 343 }, { "epoch": 0.65, "grad_norm": 3.0342178344726562, "learning_rate": 1.9934485120511812e-05, "loss": 0.1884, "step": 344 }, { "epoch": 0.66, "grad_norm": 4.191493034362793, "learning_rate": 1.9933779546343984e-05, "loss": 0.2443, "step": 345 }, { "epoch": 0.66, "grad_norm": 3.2438552379608154, "learning_rate": 1.9933070205716326e-05, "loss": 0.1564, "step": 346 }, { "epoch": 0.66, "grad_norm": 3.7069149017333984, "learning_rate": 1.9932357098897776e-05, "loss": 0.2372, "step": 347 }, { "epoch": 0.66, "grad_norm": 3.504709243774414, "learning_rate": 1.9931640226158722e-05, "loss": 0.2245, "step": 348 }, { "epoch": 0.66, "grad_norm": 3.1954729557037354, "learning_rate": 1.9930919587770966e-05, "loss": 0.1707, "step": 349 }, { "epoch": 0.67, "grad_norm": 3.5508780479431152, "learning_rate": 1.9930195184007747e-05, "loss": 0.1955, "step": 350 }, { "epoch": 0.67, "grad_norm": 2.437272071838379, "learning_rate": 1.9929467015143725e-05, "loss": 0.1514, "step": 351 }, { "epoch": 0.67, "grad_norm": 3.4603111743927, "learning_rate": 1.9928735081454985e-05, "loss": 0.2081, "step": 352 }, { "epoch": 0.67, "grad_norm": 2.9902443885803223, "learning_rate": 1.9927999383219058e-05, "loss": 0.1721, "step": 353 }, { "epoch": 0.67, "grad_norm": 3.487504720687866, "learning_rate": 1.9927259920714876e-05, "loss": 0.2294, "step": 354 }, { "epoch": 0.67, "grad_norm": 3.66101336479187, "learning_rate": 1.9926516694222817e-05, "loss": 0.2235, "step": 355 }, { "epoch": 0.68, "grad_norm": 3.9375245571136475, "learning_rate": 1.992576970402468e-05, "loss": 0.1933, "step": 356 }, { "epoch": 0.68, "grad_norm": 3.1413791179656982, "learning_rate": 1.992501895040369e-05, "loss": 0.1564, "step": 357 }, { "epoch": 0.68, "grad_norm": 3.612968921661377, "learning_rate": 1.9924264433644504e-05, "loss": 0.2131, "step": 358 }, { "epoch": 0.68, "grad_norm": 3.599403142929077, "learning_rate": 1.9923506154033195e-05, "loss": 0.2208, "step": 359 }, { "epoch": 0.68, "grad_norm": 3.2121212482452393, "learning_rate": 1.992274411185728e-05, "loss": 0.1832, "step": 360 }, { "epoch": 0.69, "grad_norm": 3.548311948776245, "learning_rate": 1.9921978307405685e-05, "loss": 0.1961, "step": 361 }, { "epoch": 0.69, "grad_norm": 3.765929937362671, "learning_rate": 1.992120874096877e-05, "loss": 0.3691, "step": 362 }, { "epoch": 0.69, "grad_norm": 3.356154441833496, "learning_rate": 1.9920435412838325e-05, "loss": 0.1438, "step": 363 }, { "epoch": 0.69, "grad_norm": 3.3493995666503906, "learning_rate": 1.9919658323307557e-05, "loss": 0.2134, "step": 364 }, { "epoch": 0.69, "grad_norm": 3.226386785507202, "learning_rate": 1.991887747267111e-05, "loss": 0.1492, "step": 365 }, { "epoch": 0.7, "grad_norm": 3.1778604984283447, "learning_rate": 1.991809286122505e-05, "loss": 0.2149, "step": 366 }, { "epoch": 0.7, "grad_norm": 3.6704227924346924, "learning_rate": 1.991730448926686e-05, "loss": 0.2708, "step": 367 }, { "epoch": 0.7, "grad_norm": 3.9407691955566406, "learning_rate": 1.9916512357095468e-05, "loss": 0.2228, "step": 368 }, { "epoch": 0.7, "grad_norm": 3.062875270843506, "learning_rate": 1.991571646501121e-05, "loss": 0.1748, "step": 369 }, { "epoch": 0.7, "grad_norm": 3.3860437870025635, "learning_rate": 1.9914916813315847e-05, "loss": 0.2482, "step": 370 }, { "epoch": 0.71, "grad_norm": 3.843346357345581, "learning_rate": 1.991411340231258e-05, "loss": 0.2589, "step": 371 }, { "epoch": 0.71, "grad_norm": 3.336725950241089, "learning_rate": 1.991330623230603e-05, "loss": 0.2033, "step": 372 }, { "epoch": 0.71, "grad_norm": 3.0072455406188965, "learning_rate": 1.991249530360224e-05, "loss": 0.1783, "step": 373 }, { "epoch": 0.71, "grad_norm": 3.728128671646118, "learning_rate": 1.991168061650867e-05, "loss": 0.2918, "step": 374 }, { "epoch": 0.71, "grad_norm": 3.5659749507904053, "learning_rate": 1.9910862171334225e-05, "loss": 0.2243, "step": 375 }, { "epoch": 0.71, "grad_norm": 3.4301748275756836, "learning_rate": 1.9910039968389222e-05, "loss": 0.1785, "step": 376 }, { "epoch": 0.72, "grad_norm": 3.156949043273926, "learning_rate": 1.9909214007985404e-05, "loss": 0.1687, "step": 377 }, { "epoch": 0.72, "grad_norm": 3.1461236476898193, "learning_rate": 1.9908384290435933e-05, "loss": 0.1781, "step": 378 }, { "epoch": 0.72, "grad_norm": 3.504927635192871, "learning_rate": 1.990755081605541e-05, "loss": 0.2313, "step": 379 }, { "epoch": 0.72, "grad_norm": 2.7732787132263184, "learning_rate": 1.990671358515985e-05, "loss": 0.1455, "step": 380 }, { "epoch": 0.72, "grad_norm": 3.803184986114502, "learning_rate": 1.9905872598066693e-05, "loss": 0.189, "step": 381 }, { "epoch": 0.73, "grad_norm": 4.091057777404785, "learning_rate": 1.990502785509481e-05, "loss": 0.26, "step": 382 }, { "epoch": 0.73, "grad_norm": 3.980290651321411, "learning_rate": 1.9904179356564483e-05, "loss": 0.2114, "step": 383 }, { "epoch": 0.73, "grad_norm": 3.1558403968811035, "learning_rate": 1.9903327102797433e-05, "loss": 0.1842, "step": 384 }, { "epoch": 0.73, "grad_norm": 3.0750272274017334, "learning_rate": 1.9902471094116794e-05, "loss": 0.174, "step": 385 }, { "epoch": 0.73, "grad_norm": 3.7213973999023438, "learning_rate": 1.990161133084713e-05, "loss": 0.1559, "step": 386 }, { "epoch": 0.74, "grad_norm": 3.0193381309509277, "learning_rate": 1.990074781331443e-05, "loss": 0.168, "step": 387 }, { "epoch": 0.74, "grad_norm": 3.6271984577178955, "learning_rate": 1.9899880541846093e-05, "loss": 0.2884, "step": 388 }, { "epoch": 0.74, "grad_norm": 3.707174301147461, "learning_rate": 1.9899009516770954e-05, "loss": 0.2188, "step": 389 }, { "epoch": 0.74, "grad_norm": 4.660815238952637, "learning_rate": 1.989813473841927e-05, "loss": 0.3676, "step": 390 }, { "epoch": 0.74, "grad_norm": 3.0515992641448975, "learning_rate": 1.989725620712272e-05, "loss": 0.218, "step": 391 }, { "epoch": 0.75, "grad_norm": 3.037468194961548, "learning_rate": 1.98963739232144e-05, "loss": 0.2251, "step": 392 }, { "epoch": 0.75, "grad_norm": 3.248065233230591, "learning_rate": 1.989548788702884e-05, "loss": 0.188, "step": 393 }, { "epoch": 0.75, "grad_norm": 3.2065625190734863, "learning_rate": 1.9894598098901988e-05, "loss": 0.1929, "step": 394 }, { "epoch": 0.75, "grad_norm": 3.0597288608551025, "learning_rate": 1.9893704559171202e-05, "loss": 0.214, "step": 395 }, { "epoch": 0.75, "grad_norm": 3.277120590209961, "learning_rate": 1.9892807268175286e-05, "loss": 0.1807, "step": 396 }, { "epoch": 0.75, "grad_norm": 3.3772106170654297, "learning_rate": 1.9891906226254448e-05, "loss": 0.2476, "step": 397 }, { "epoch": 0.76, "grad_norm": 3.5787243843078613, "learning_rate": 1.9891001433750325e-05, "loss": 0.244, "step": 398 }, { "epoch": 0.76, "grad_norm": 3.4568214416503906, "learning_rate": 1.9890092891005974e-05, "loss": 0.2383, "step": 399 }, { "epoch": 0.76, "grad_norm": 3.2533681392669678, "learning_rate": 1.9889180598365878e-05, "loss": 0.2074, "step": 400 }, { "epoch": 0.76, "eval_blimp_filtered_avg": 0.7349253731343284, "eval_blimp_filtered_std": 0.004842325316479048, "step": 400 }, { "epoch": 0.76, "eval_blimp_supplement_avg": 0.790948275862069, "eval_blimp_supplement_std": 0.01755038252224676, "step": 400 }, { "epoch": 0.76, "eval_vqa_filtered_avg": 0.29, "eval_vqa_filtered_std": 0.045604802157206845, "step": 400 }, { "epoch": 0.76, "eval_winoground_filtered_avg": 0.52, "eval_winoground_filtered_std": 0.05021167315686779, "step": 400 }, { "epoch": 0.76, "grad_norm": 3.2349050045013428, "learning_rate": 1.9888264556175937e-05, "loss": 0.1772, "step": 401 }, { "epoch": 0.76, "grad_norm": 4.183887958526611, "learning_rate": 1.9887344764783475e-05, "loss": 0.2575, "step": 402 }, { "epoch": 0.77, "grad_norm": 3.4962480068206787, "learning_rate": 1.9886421224537237e-05, "loss": 0.1746, "step": 403 }, { "epoch": 0.77, "grad_norm": 3.747609853744507, "learning_rate": 1.988549393578739e-05, "loss": 0.2467, "step": 404 }, { "epoch": 0.77, "grad_norm": 3.232499361038208, "learning_rate": 1.988456289888552e-05, "loss": 0.1956, "step": 405 }, { "epoch": 0.77, "grad_norm": 4.260407447814941, "learning_rate": 1.988362811418464e-05, "loss": 0.2278, "step": 406 }, { "epoch": 0.77, "grad_norm": 3.7527835369110107, "learning_rate": 1.9882689582039176e-05, "loss": 0.2429, "step": 407 }, { "epoch": 0.78, "grad_norm": 3.1005353927612305, "learning_rate": 1.988174730280498e-05, "loss": 0.1717, "step": 408 }, { "epoch": 0.78, "grad_norm": 3.1222078800201416, "learning_rate": 1.9880801276839326e-05, "loss": 0.2355, "step": 409 }, { "epoch": 0.78, "grad_norm": 3.6404175758361816, "learning_rate": 1.98798515045009e-05, "loss": 0.2312, "step": 410 }, { "epoch": 0.78, "grad_norm": 3.6167383193969727, "learning_rate": 1.9878897986149824e-05, "loss": 0.2398, "step": 411 }, { "epoch": 0.78, "grad_norm": 3.7478132247924805, "learning_rate": 1.987794072214762e-05, "loss": 0.1744, "step": 412 }, { "epoch": 0.79, "grad_norm": 2.95676326751709, "learning_rate": 1.987697971285725e-05, "loss": 0.1644, "step": 413 }, { "epoch": 0.79, "grad_norm": 3.0400943756103516, "learning_rate": 1.9876014958643083e-05, "loss": 0.1689, "step": 414 }, { "epoch": 0.79, "grad_norm": 2.9553258419036865, "learning_rate": 1.987504645987091e-05, "loss": 0.1719, "step": 415 }, { "epoch": 0.79, "grad_norm": 3.0530238151550293, "learning_rate": 1.987407421690795e-05, "loss": 0.1967, "step": 416 }, { "epoch": 0.79, "grad_norm": 3.62567400932312, "learning_rate": 1.9873098230122832e-05, "loss": 0.2085, "step": 417 }, { "epoch": 0.79, "grad_norm": 3.106133222579956, "learning_rate": 1.987211849988561e-05, "loss": 0.1541, "step": 418 }, { "epoch": 0.8, "grad_norm": 3.636415719985962, "learning_rate": 1.987113502656775e-05, "loss": 0.2846, "step": 419 }, { "epoch": 0.8, "grad_norm": 3.1852667331695557, "learning_rate": 1.9870147810542148e-05, "loss": 0.1737, "step": 420 }, { "epoch": 0.8, "grad_norm": 3.308737277984619, "learning_rate": 1.9869156852183113e-05, "loss": 0.1451, "step": 421 }, { "epoch": 0.8, "grad_norm": 2.1335840225219727, "learning_rate": 1.9868162151866373e-05, "loss": 0.105, "step": 422 }, { "epoch": 0.8, "grad_norm": 3.8537020683288574, "learning_rate": 1.986716370996907e-05, "loss": 0.2126, "step": 423 }, { "epoch": 0.81, "grad_norm": 2.9467790126800537, "learning_rate": 1.986616152686978e-05, "loss": 0.1679, "step": 424 }, { "epoch": 0.81, "grad_norm": 4.200207233428955, "learning_rate": 1.9865155602948482e-05, "loss": 0.2948, "step": 425 }, { "epoch": 0.81, "grad_norm": 4.308751583099365, "learning_rate": 1.9864145938586575e-05, "loss": 0.2248, "step": 426 }, { "epoch": 0.81, "grad_norm": 3.9924919605255127, "learning_rate": 1.986313253416689e-05, "loss": 0.2531, "step": 427 }, { "epoch": 0.81, "grad_norm": 3.162315845489502, "learning_rate": 1.9862115390073656e-05, "loss": 0.1777, "step": 428 }, { "epoch": 0.82, "grad_norm": 3.4535696506500244, "learning_rate": 1.9861094506692536e-05, "loss": 0.214, "step": 429 }, { "epoch": 0.82, "grad_norm": 3.136573076248169, "learning_rate": 1.9860069884410605e-05, "loss": 0.2023, "step": 430 }, { "epoch": 0.82, "grad_norm": 3.3898823261260986, "learning_rate": 1.985904152361635e-05, "loss": 0.2008, "step": 431 }, { "epoch": 0.82, "grad_norm": 3.5036988258361816, "learning_rate": 1.9858009424699687e-05, "loss": 0.2036, "step": 432 }, { "epoch": 0.82, "grad_norm": 3.060732364654541, "learning_rate": 1.9856973588051942e-05, "loss": 0.2026, "step": 433 }, { "epoch": 0.83, "grad_norm": 3.991023302078247, "learning_rate": 1.985593401406586e-05, "loss": 0.2665, "step": 434 }, { "epoch": 0.83, "grad_norm": 2.8621795177459717, "learning_rate": 1.98548907031356e-05, "loss": 0.1686, "step": 435 }, { "epoch": 0.83, "grad_norm": 2.971616744995117, "learning_rate": 1.9853843655656736e-05, "loss": 0.1582, "step": 436 }, { "epoch": 0.83, "grad_norm": 2.277273654937744, "learning_rate": 1.9852792872026273e-05, "loss": 0.1353, "step": 437 }, { "epoch": 0.83, "grad_norm": 3.6978368759155273, "learning_rate": 1.985173835264262e-05, "loss": 0.234, "step": 438 }, { "epoch": 0.83, "grad_norm": 3.2035295963287354, "learning_rate": 1.9850680097905602e-05, "loss": 0.1798, "step": 439 }, { "epoch": 0.84, "grad_norm": 4.090407848358154, "learning_rate": 1.9849618108216465e-05, "loss": 0.2267, "step": 440 }, { "epoch": 0.84, "grad_norm": 2.893639326095581, "learning_rate": 1.9848552383977874e-05, "loss": 0.1444, "step": 441 }, { "epoch": 0.84, "grad_norm": 3.3095431327819824, "learning_rate": 1.9847482925593895e-05, "loss": 0.1288, "step": 442 }, { "epoch": 0.84, "grad_norm": 2.813152551651001, "learning_rate": 1.9846409733470032e-05, "loss": 0.1791, "step": 443 }, { "epoch": 0.84, "grad_norm": 2.9977710247039795, "learning_rate": 1.9845332808013184e-05, "loss": 0.146, "step": 444 }, { "epoch": 0.85, "grad_norm": 2.512772560119629, "learning_rate": 1.9844252149631678e-05, "loss": 0.1591, "step": 445 }, { "epoch": 0.85, "grad_norm": 3.810116767883301, "learning_rate": 1.9843167758735254e-05, "loss": 0.2931, "step": 446 }, { "epoch": 0.85, "grad_norm": 4.201424598693848, "learning_rate": 1.9842079635735066e-05, "loss": 0.2572, "step": 447 }, { "epoch": 0.85, "grad_norm": 2.920233726501465, "learning_rate": 1.9840987781043683e-05, "loss": 0.1681, "step": 448 }, { "epoch": 0.85, "grad_norm": 3.718073844909668, "learning_rate": 1.983989219507509e-05, "loss": 0.2468, "step": 449 }, { "epoch": 0.86, "grad_norm": 2.903028964996338, "learning_rate": 1.983879287824468e-05, "loss": 0.186, "step": 450 }, { "epoch": 0.86, "grad_norm": 2.8795056343078613, "learning_rate": 1.9837689830969277e-05, "loss": 0.1381, "step": 451 }, { "epoch": 0.86, "grad_norm": 3.913471221923828, "learning_rate": 1.9836583053667095e-05, "loss": 0.223, "step": 452 }, { "epoch": 0.86, "grad_norm": 2.923570394515991, "learning_rate": 1.983547254675779e-05, "loss": 0.1744, "step": 453 }, { "epoch": 0.86, "grad_norm": 3.3962337970733643, "learning_rate": 1.9834358310662408e-05, "loss": 0.197, "step": 454 }, { "epoch": 0.87, "grad_norm": 2.9443840980529785, "learning_rate": 1.9833240345803428e-05, "loss": 0.1753, "step": 455 }, { "epoch": 0.87, "grad_norm": 3.1862893104553223, "learning_rate": 1.983211865260473e-05, "loss": 0.1478, "step": 456 }, { "epoch": 0.87, "grad_norm": 3.0336663722991943, "learning_rate": 1.9830993231491605e-05, "loss": 0.1926, "step": 457 }, { "epoch": 0.87, "grad_norm": 3.0647475719451904, "learning_rate": 1.9829864082890772e-05, "loss": 0.1806, "step": 458 }, { "epoch": 0.87, "grad_norm": 3.560698986053467, "learning_rate": 1.9828731207230355e-05, "loss": 0.2019, "step": 459 }, { "epoch": 0.87, "grad_norm": 3.9414572715759277, "learning_rate": 1.982759460493989e-05, "loss": 0.2286, "step": 460 }, { "epoch": 0.88, "grad_norm": 3.853551149368286, "learning_rate": 1.9826454276450325e-05, "loss": 0.2212, "step": 461 }, { "epoch": 0.88, "grad_norm": 3.5356855392456055, "learning_rate": 1.9825310222194027e-05, "loss": 0.1774, "step": 462 }, { "epoch": 0.88, "grad_norm": 3.3152265548706055, "learning_rate": 1.982416244260477e-05, "loss": 0.2108, "step": 463 }, { "epoch": 0.88, "grad_norm": 2.7571210861206055, "learning_rate": 1.9823010938117745e-05, "loss": 0.1607, "step": 464 }, { "epoch": 0.88, "grad_norm": 2.5318071842193604, "learning_rate": 1.9821855709169547e-05, "loss": 0.1153, "step": 465 }, { "epoch": 0.89, "grad_norm": 2.771381139755249, "learning_rate": 1.9820696756198195e-05, "loss": 0.1885, "step": 466 }, { "epoch": 0.89, "grad_norm": 3.566437244415283, "learning_rate": 1.9819534079643108e-05, "loss": 0.1975, "step": 467 }, { "epoch": 0.89, "grad_norm": 2.9306039810180664, "learning_rate": 1.981836767994513e-05, "loss": 0.2214, "step": 468 }, { "epoch": 0.89, "grad_norm": 3.5378854274749756, "learning_rate": 1.98171975575465e-05, "loss": 0.2144, "step": 469 }, { "epoch": 0.89, "grad_norm": 3.829563617706299, "learning_rate": 1.9816023712890888e-05, "loss": 0.3002, "step": 470 }, { "epoch": 0.9, "grad_norm": 2.9882543087005615, "learning_rate": 1.9814846146423358e-05, "loss": 0.1464, "step": 471 }, { "epoch": 0.9, "grad_norm": 2.611069440841675, "learning_rate": 1.9813664858590398e-05, "loss": 0.133, "step": 472 }, { "epoch": 0.9, "grad_norm": 2.5388762950897217, "learning_rate": 1.9812479849839896e-05, "loss": 0.1581, "step": 473 }, { "epoch": 0.9, "grad_norm": 2.8439977169036865, "learning_rate": 1.9811291120621154e-05, "loss": 0.1972, "step": 474 }, { "epoch": 0.9, "grad_norm": 3.169632911682129, "learning_rate": 1.9810098671384894e-05, "loss": 0.1413, "step": 475 }, { "epoch": 0.9, "grad_norm": 2.8925063610076904, "learning_rate": 1.980890250258324e-05, "loss": 0.1499, "step": 476 }, { "epoch": 0.91, "grad_norm": 3.5568509101867676, "learning_rate": 1.9807702614669725e-05, "loss": 0.2183, "step": 477 }, { "epoch": 0.91, "grad_norm": 3.1945390701293945, "learning_rate": 1.980649900809929e-05, "loss": 0.1997, "step": 478 }, { "epoch": 0.91, "grad_norm": 3.3306751251220703, "learning_rate": 1.9805291683328303e-05, "loss": 0.1594, "step": 479 }, { "epoch": 0.91, "grad_norm": 3.2921245098114014, "learning_rate": 1.9804080640814517e-05, "loss": 0.1585, "step": 480 }, { "epoch": 0.91, "grad_norm": 3.97298526763916, "learning_rate": 1.9802865881017116e-05, "loss": 0.1907, "step": 481 }, { "epoch": 0.92, "grad_norm": 3.971144437789917, "learning_rate": 1.9801647404396676e-05, "loss": 0.2202, "step": 482 }, { "epoch": 0.92, "grad_norm": 3.272005558013916, "learning_rate": 1.9800425211415195e-05, "loss": 0.1552, "step": 483 }, { "epoch": 0.92, "grad_norm": 2.531613826751709, "learning_rate": 1.9799199302536078e-05, "loss": 0.1149, "step": 484 }, { "epoch": 0.92, "grad_norm": 3.3945765495300293, "learning_rate": 1.9797969678224137e-05, "loss": 0.1752, "step": 485 }, { "epoch": 0.92, "grad_norm": 3.505368709564209, "learning_rate": 1.979673633894559e-05, "loss": 0.1718, "step": 486 }, { "epoch": 0.93, "grad_norm": 2.8453898429870605, "learning_rate": 1.979549928516806e-05, "loss": 0.1697, "step": 487 }, { "epoch": 0.93, "grad_norm": 3.2004079818725586, "learning_rate": 1.9794258517360595e-05, "loss": 0.1521, "step": 488 }, { "epoch": 0.93, "grad_norm": 3.0890493392944336, "learning_rate": 1.9793014035993635e-05, "loss": 0.1935, "step": 489 }, { "epoch": 0.93, "grad_norm": 2.916118621826172, "learning_rate": 1.979176584153903e-05, "loss": 0.1498, "step": 490 }, { "epoch": 0.93, "grad_norm": 2.7905192375183105, "learning_rate": 1.979051393447005e-05, "loss": 0.1659, "step": 491 }, { "epoch": 0.94, "grad_norm": 3.089891195297241, "learning_rate": 1.978925831526136e-05, "loss": 0.1586, "step": 492 }, { "epoch": 0.94, "grad_norm": 3.576446533203125, "learning_rate": 1.9787998984389035e-05, "loss": 0.2186, "step": 493 }, { "epoch": 0.94, "grad_norm": 3.2937843799591064, "learning_rate": 1.9786735942330558e-05, "loss": 0.1569, "step": 494 }, { "epoch": 0.94, "grad_norm": 2.4948270320892334, "learning_rate": 1.978546918956483e-05, "loss": 0.1335, "step": 495 }, { "epoch": 0.94, "grad_norm": 3.0883712768554688, "learning_rate": 1.978419872657213e-05, "loss": 0.1575, "step": 496 }, { "epoch": 0.94, "grad_norm": 3.062037467956543, "learning_rate": 1.978292455383418e-05, "loss": 0.1364, "step": 497 }, { "epoch": 0.95, "grad_norm": 2.9698963165283203, "learning_rate": 1.9781646671834082e-05, "loss": 0.1482, "step": 498 }, { "epoch": 0.95, "grad_norm": 3.314328193664551, "learning_rate": 1.978036508105636e-05, "loss": 0.2209, "step": 499 }, { "epoch": 0.95, "grad_norm": 2.680018424987793, "learning_rate": 1.9779079781986933e-05, "loss": 0.132, "step": 500 }, { "epoch": 0.95, "eval_blimp_filtered_avg": 0.7353731343283582, "eval_blimp_filtered_std": 0.0048600208219852165, "step": 500 }, { "epoch": 0.95, "eval_blimp_supplement_avg": 0.8038793103448276, "eval_blimp_supplement_std": 0.01710179332111949, "step": 500 }, { "epoch": 0.95, "eval_vqa_filtered_avg": 0.32, "eval_vqa_filtered_std": 0.046882617226215034, "step": 500 }, { "epoch": 0.95, "eval_winoground_filtered_avg": 0.48, "eval_winoground_filtered_std": 0.05021167315686779, "step": 500 }, { "epoch": 0.95, "grad_norm": 2.581071376800537, "learning_rate": 1.977779077511313e-05, "loss": 0.1351, "step": 501 }, { "epoch": 0.95, "grad_norm": 3.139989137649536, "learning_rate": 1.977649806092369e-05, "loss": 0.184, "step": 502 }, { "epoch": 0.96, "grad_norm": 2.543422222137451, "learning_rate": 1.9775201639908756e-05, "loss": 0.1364, "step": 503 }, { "epoch": 0.96, "grad_norm": 2.847684860229492, "learning_rate": 1.9773901512559867e-05, "loss": 0.1298, "step": 504 }, { "epoch": 0.96, "grad_norm": 3.5308902263641357, "learning_rate": 1.977259767936998e-05, "loss": 0.2058, "step": 505 }, { "epoch": 0.96, "grad_norm": 4.0499162673950195, "learning_rate": 1.9771290140833455e-05, "loss": 0.2367, "step": 506 }, { "epoch": 0.96, "grad_norm": 2.7411231994628906, "learning_rate": 1.9769978897446053e-05, "loss": 0.1486, "step": 507 }, { "epoch": 0.97, "grad_norm": 3.935802936553955, "learning_rate": 1.9768663949704936e-05, "loss": 0.201, "step": 508 }, { "epoch": 0.97, "grad_norm": 2.6786391735076904, "learning_rate": 1.976734529810868e-05, "loss": 0.1857, "step": 509 }, { "epoch": 0.97, "grad_norm": 2.6916232109069824, "learning_rate": 1.976602294315726e-05, "loss": 0.1462, "step": 510 }, { "epoch": 0.97, "grad_norm": 1.9801383018493652, "learning_rate": 1.9764696885352053e-05, "loss": 0.0962, "step": 511 }, { "epoch": 0.97, "grad_norm": 3.1538186073303223, "learning_rate": 1.9763367125195848e-05, "loss": 0.1558, "step": 512 }, { "epoch": 0.98, "grad_norm": 3.3178906440734863, "learning_rate": 1.9762033663192827e-05, "loss": 0.2649, "step": 513 }, { "epoch": 0.98, "grad_norm": 2.9105613231658936, "learning_rate": 1.976069649984858e-05, "loss": 0.2239, "step": 514 }, { "epoch": 0.98, "grad_norm": 2.48042368888855, "learning_rate": 1.9759355635670108e-05, "loss": 0.1576, "step": 515 }, { "epoch": 0.98, "grad_norm": 2.3748509883880615, "learning_rate": 1.9758011071165807e-05, "loss": 0.1269, "step": 516 }, { "epoch": 0.98, "grad_norm": 2.419879913330078, "learning_rate": 1.9756662806845475e-05, "loss": 0.1292, "step": 517 }, { "epoch": 0.98, "grad_norm": 2.961528778076172, "learning_rate": 1.975531084322032e-05, "loss": 0.2402, "step": 518 }, { "epoch": 0.99, "grad_norm": 2.942119598388672, "learning_rate": 1.9753955180802945e-05, "loss": 0.1475, "step": 519 }, { "epoch": 0.99, "grad_norm": 2.921499252319336, "learning_rate": 1.9752595820107356e-05, "loss": 0.1857, "step": 520 }, { "epoch": 0.99, "grad_norm": 2.000281810760498, "learning_rate": 1.975123276164897e-05, "loss": 0.1107, "step": 521 }, { "epoch": 0.99, "grad_norm": 2.4972643852233887, "learning_rate": 1.9749866005944595e-05, "loss": 0.1548, "step": 522 }, { "epoch": 0.99, "grad_norm": 3.5422935485839844, "learning_rate": 1.974849555351245e-05, "loss": 0.156, "step": 523 }, { "epoch": 1.0, "grad_norm": 2.8197715282440186, "learning_rate": 1.9747121404872144e-05, "loss": 0.164, "step": 524 }, { "epoch": 1.0, "grad_norm": 3.1847317218780518, "learning_rate": 1.97457435605447e-05, "loss": 0.1719, "step": 525 }, { "epoch": 1.0, "grad_norm": 2.7228848934173584, "learning_rate": 1.974436202105254e-05, "loss": 0.1327, "step": 526 }, { "epoch": 1.0, "grad_norm": 2.0410289764404297, "learning_rate": 1.9742976786919478e-05, "loss": 0.0742, "step": 527 }, { "epoch": 1.0, "grad_norm": 2.553361177444458, "learning_rate": 1.9741587858670735e-05, "loss": 0.1073, "step": 528 }, { "epoch": 1.01, "grad_norm": 2.4504735469818115, "learning_rate": 1.974019523683294e-05, "loss": 0.0991, "step": 529 }, { "epoch": 1.01, "grad_norm": 2.5014171600341797, "learning_rate": 1.9738798921934105e-05, "loss": 0.0965, "step": 530 }, { "epoch": 1.01, "grad_norm": 2.9235010147094727, "learning_rate": 1.9737398914503658e-05, "loss": 0.1068, "step": 531 }, { "epoch": 1.01, "grad_norm": 2.3822357654571533, "learning_rate": 1.9735995215072424e-05, "loss": 0.0752, "step": 532 }, { "epoch": 1.01, "grad_norm": 2.2438721656799316, "learning_rate": 1.973458782417262e-05, "loss": 0.0876, "step": 533 }, { "epoch": 1.02, "grad_norm": 2.759418249130249, "learning_rate": 1.973317674233787e-05, "loss": 0.1014, "step": 534 }, { "epoch": 1.02, "grad_norm": 2.5352272987365723, "learning_rate": 1.9731761970103195e-05, "loss": 0.0953, "step": 535 }, { "epoch": 1.02, "grad_norm": 3.7814486026763916, "learning_rate": 1.973034350800501e-05, "loss": 0.1252, "step": 536 }, { "epoch": 1.02, "grad_norm": 1.898605465888977, "learning_rate": 1.9728921356581146e-05, "loss": 0.0518, "step": 537 }, { "epoch": 1.02, "grad_norm": 3.1824612617492676, "learning_rate": 1.972749551637081e-05, "loss": 0.0717, "step": 538 }, { "epoch": 1.02, "grad_norm": 2.295339584350586, "learning_rate": 1.9726065987914626e-05, "loss": 0.0699, "step": 539 }, { "epoch": 1.03, "grad_norm": 2.1143815517425537, "learning_rate": 1.972463277175461e-05, "loss": 0.0845, "step": 540 }, { "epoch": 1.03, "grad_norm": 2.4848153591156006, "learning_rate": 1.9723195868434174e-05, "loss": 0.1006, "step": 541 }, { "epoch": 1.03, "grad_norm": 3.169520616531372, "learning_rate": 1.9721755278498126e-05, "loss": 0.1071, "step": 542 }, { "epoch": 1.03, "grad_norm": 2.988433599472046, "learning_rate": 1.9720311002492677e-05, "loss": 0.0869, "step": 543 }, { "epoch": 1.03, "grad_norm": 2.579894542694092, "learning_rate": 1.971886304096543e-05, "loss": 0.1156, "step": 544 }, { "epoch": 1.04, "grad_norm": 2.12581467628479, "learning_rate": 1.9717411394465404e-05, "loss": 0.0825, "step": 545 }, { "epoch": 1.04, "grad_norm": 2.313169479370117, "learning_rate": 1.9715956063542987e-05, "loss": 0.1023, "step": 546 }, { "epoch": 1.04, "grad_norm": 3.411513328552246, "learning_rate": 1.9714497048749983e-05, "loss": 0.1363, "step": 547 }, { "epoch": 1.04, "grad_norm": 2.458254337310791, "learning_rate": 1.9713034350639587e-05, "loss": 0.0868, "step": 548 }, { "epoch": 1.04, "grad_norm": 2.4696383476257324, "learning_rate": 1.9711567969766387e-05, "loss": 0.0882, "step": 549 }, { "epoch": 1.05, "grad_norm": 3.0647501945495605, "learning_rate": 1.971009790668638e-05, "loss": 0.1177, "step": 550 }, { "epoch": 1.05, "grad_norm": 2.4523589611053467, "learning_rate": 1.9708624161956937e-05, "loss": 0.1071, "step": 551 }, { "epoch": 1.05, "grad_norm": 2.2256698608398438, "learning_rate": 1.970714673613685e-05, "loss": 0.0773, "step": 552 }, { "epoch": 1.05, "grad_norm": 2.238497734069824, "learning_rate": 1.970566562978629e-05, "loss": 0.0864, "step": 553 }, { "epoch": 1.05, "grad_norm": 2.3855180740356445, "learning_rate": 1.9704180843466832e-05, "loss": 0.0769, "step": 554 }, { "epoch": 1.06, "grad_norm": 1.8529298305511475, "learning_rate": 1.9702692377741436e-05, "loss": 0.0726, "step": 555 }, { "epoch": 1.06, "grad_norm": 3.163813829421997, "learning_rate": 1.9701200233174473e-05, "loss": 0.1354, "step": 556 }, { "epoch": 1.06, "grad_norm": 2.618946075439453, "learning_rate": 1.969970441033169e-05, "loss": 0.0933, "step": 557 }, { "epoch": 1.06, "grad_norm": 2.4150633811950684, "learning_rate": 1.9698204909780245e-05, "loss": 0.0914, "step": 558 }, { "epoch": 1.06, "grad_norm": 2.3075432777404785, "learning_rate": 1.9696701732088682e-05, "loss": 0.0865, "step": 559 }, { "epoch": 1.06, "grad_norm": 2.3446035385131836, "learning_rate": 1.9695194877826943e-05, "loss": 0.0877, "step": 560 }, { "epoch": 1.07, "grad_norm": 2.488875389099121, "learning_rate": 1.9693684347566357e-05, "loss": 0.0858, "step": 561 }, { "epoch": 1.07, "grad_norm": 1.7620638608932495, "learning_rate": 1.9692170141879657e-05, "loss": 0.0593, "step": 562 }, { "epoch": 1.07, "grad_norm": 2.5567786693573, "learning_rate": 1.969065226134096e-05, "loss": 0.0886, "step": 563 }, { "epoch": 1.07, "grad_norm": 2.6546542644500732, "learning_rate": 1.9689130706525783e-05, "loss": 0.0909, "step": 564 }, { "epoch": 1.07, "grad_norm": 1.863936185836792, "learning_rate": 1.9687605478011038e-05, "loss": 0.0581, "step": 565 }, { "epoch": 1.08, "grad_norm": 2.917421340942383, "learning_rate": 1.9686076576375016e-05, "loss": 0.1148, "step": 566 }, { "epoch": 1.08, "grad_norm": 2.977630853652954, "learning_rate": 1.9684544002197418e-05, "loss": 0.1098, "step": 567 }, { "epoch": 1.08, "grad_norm": 1.799079179763794, "learning_rate": 1.9683007756059325e-05, "loss": 0.0475, "step": 568 }, { "epoch": 1.08, "grad_norm": 3.1164944171905518, "learning_rate": 1.9681467838543225e-05, "loss": 0.0993, "step": 569 }, { "epoch": 1.08, "grad_norm": 2.2246084213256836, "learning_rate": 1.9679924250232977e-05, "loss": 0.0656, "step": 570 }, { "epoch": 1.09, "grad_norm": 2.4692776203155518, "learning_rate": 1.9678376991713844e-05, "loss": 0.0946, "step": 571 }, { "epoch": 1.09, "grad_norm": 3.208848237991333, "learning_rate": 1.9676826063572485e-05, "loss": 0.1074, "step": 572 }, { "epoch": 1.09, "grad_norm": 3.2965247631073, "learning_rate": 1.9675271466396942e-05, "loss": 0.0916, "step": 573 }, { "epoch": 1.09, "grad_norm": 2.2634117603302, "learning_rate": 1.9673713200776654e-05, "loss": 0.0832, "step": 574 }, { "epoch": 1.09, "grad_norm": 3.555225133895874, "learning_rate": 1.9672151267302443e-05, "loss": 0.1363, "step": 575 }, { "epoch": 1.1, "grad_norm": 2.29260516166687, "learning_rate": 1.967058566656653e-05, "loss": 0.0641, "step": 576 }, { "epoch": 1.1, "grad_norm": 2.849818229675293, "learning_rate": 1.9669016399162523e-05, "loss": 0.116, "step": 577 }, { "epoch": 1.1, "grad_norm": 2.650031328201294, "learning_rate": 1.9667443465685418e-05, "loss": 0.1193, "step": 578 }, { "epoch": 1.1, "grad_norm": 2.1810801029205322, "learning_rate": 1.9665866866731604e-05, "loss": 0.0732, "step": 579 }, { "epoch": 1.1, "grad_norm": 2.3637495040893555, "learning_rate": 1.966428660289886e-05, "loss": 0.0736, "step": 580 }, { "epoch": 1.1, "grad_norm": 2.454347610473633, "learning_rate": 1.9662702674786357e-05, "loss": 0.0826, "step": 581 }, { "epoch": 1.11, "grad_norm": 2.824099540710449, "learning_rate": 1.9661115082994648e-05, "loss": 0.0913, "step": 582 }, { "epoch": 1.11, "grad_norm": 2.358539342880249, "learning_rate": 1.965952382812568e-05, "loss": 0.1004, "step": 583 }, { "epoch": 1.11, "grad_norm": 2.648507833480835, "learning_rate": 1.965792891078279e-05, "loss": 0.1068, "step": 584 }, { "epoch": 1.11, "grad_norm": 2.257201910018921, "learning_rate": 1.9656330331570697e-05, "loss": 0.1002, "step": 585 }, { "epoch": 1.11, "grad_norm": 2.778611183166504, "learning_rate": 1.9654728091095517e-05, "loss": 0.0932, "step": 586 }, { "epoch": 1.12, "grad_norm": 2.4621307849884033, "learning_rate": 1.965312218996475e-05, "loss": 0.0989, "step": 587 }, { "epoch": 1.12, "grad_norm": 2.1632823944091797, "learning_rate": 1.9651512628787284e-05, "loss": 0.0667, "step": 588 }, { "epoch": 1.12, "grad_norm": 2.8375799655914307, "learning_rate": 1.9649899408173394e-05, "loss": 0.0992, "step": 589 }, { "epoch": 1.12, "grad_norm": 2.928056240081787, "learning_rate": 1.9648282528734744e-05, "loss": 0.0836, "step": 590 }, { "epoch": 1.12, "grad_norm": 2.624230146408081, "learning_rate": 1.9646661991084388e-05, "loss": 0.1033, "step": 591 }, { "epoch": 1.13, "grad_norm": 1.979159951210022, "learning_rate": 1.9645037795836755e-05, "loss": 0.0638, "step": 592 }, { "epoch": 1.13, "grad_norm": 2.745840311050415, "learning_rate": 1.964340994360768e-05, "loss": 0.1158, "step": 593 }, { "epoch": 1.13, "grad_norm": 3.027583122253418, "learning_rate": 1.964177843501437e-05, "loss": 0.1346, "step": 594 }, { "epoch": 1.13, "grad_norm": 2.399686574935913, "learning_rate": 1.964014327067542e-05, "loss": 0.0732, "step": 595 }, { "epoch": 1.13, "grad_norm": 2.995936393737793, "learning_rate": 1.9638504451210818e-05, "loss": 0.064, "step": 596 }, { "epoch": 1.13, "grad_norm": 2.7167351245880127, "learning_rate": 1.963686197724193e-05, "loss": 0.0996, "step": 597 }, { "epoch": 1.14, "grad_norm": 3.1226511001586914, "learning_rate": 1.9635215849391513e-05, "loss": 0.1243, "step": 598 }, { "epoch": 1.14, "grad_norm": 2.419196844100952, "learning_rate": 1.9633566068283708e-05, "loss": 0.0726, "step": 599 }, { "epoch": 1.14, "grad_norm": 1.7638850212097168, "learning_rate": 1.963191263454404e-05, "loss": 0.0592, "step": 600 }, { "epoch": 1.14, "eval_blimp_filtered_avg": 0.7373134328358208, "eval_blimp_filtered_std": 0.004863700245944564, "step": 600 }, { "epoch": 1.14, "eval_blimp_supplement_avg": 0.7974137931034483, "eval_blimp_supplement_std": 0.01754101625179308, "step": 600 }, { "epoch": 1.14, "eval_vqa_filtered_avg": 0.36, "eval_vqa_filtered_std": 0.04824181513244218, "step": 600 }, { "epoch": 1.14, "eval_winoground_filtered_avg": 0.54, "eval_winoground_filtered_std": 0.05009082659620333, "step": 600 }, { "epoch": 1.14, "grad_norm": 2.467848539352417, "learning_rate": 1.9630255548799417e-05, "loss": 0.086, "step": 601 }, { "epoch": 1.14, "grad_norm": 2.5395681858062744, "learning_rate": 1.962859481167814e-05, "loss": 0.0742, "step": 602 }, { "epoch": 1.15, "grad_norm": 2.090263605117798, "learning_rate": 1.9626930423809885e-05, "loss": 0.0752, "step": 603 }, { "epoch": 1.15, "grad_norm": 2.260776996612549, "learning_rate": 1.9625262385825712e-05, "loss": 0.0897, "step": 604 }, { "epoch": 1.15, "grad_norm": 3.5364627838134766, "learning_rate": 1.9623590698358078e-05, "loss": 0.1452, "step": 605 }, { "epoch": 1.15, "grad_norm": 2.4582839012145996, "learning_rate": 1.9621915362040808e-05, "loss": 0.0774, "step": 606 }, { "epoch": 1.15, "grad_norm": 2.3200948238372803, "learning_rate": 1.962023637750912e-05, "loss": 0.0861, "step": 607 }, { "epoch": 1.16, "grad_norm": 2.121504545211792, "learning_rate": 1.9618553745399612e-05, "loss": 0.0687, "step": 608 }, { "epoch": 1.16, "grad_norm": 2.560075283050537, "learning_rate": 1.9616867466350264e-05, "loss": 0.0858, "step": 609 }, { "epoch": 1.16, "grad_norm": 2.6458985805511475, "learning_rate": 1.961517754100044e-05, "loss": 0.0996, "step": 610 }, { "epoch": 1.16, "grad_norm": 2.1184961795806885, "learning_rate": 1.961348396999089e-05, "loss": 0.0588, "step": 611 }, { "epoch": 1.16, "grad_norm": 2.373845338821411, "learning_rate": 1.961178675396374e-05, "loss": 0.0791, "step": 612 }, { "epoch": 1.17, "grad_norm": 2.3906753063201904, "learning_rate": 1.96100858935625e-05, "loss": 0.0853, "step": 613 }, { "epoch": 1.17, "grad_norm": 3.57039475440979, "learning_rate": 1.9608381389432063e-05, "loss": 0.1272, "step": 614 }, { "epoch": 1.17, "grad_norm": 2.490534543991089, "learning_rate": 1.9606673242218706e-05, "loss": 0.083, "step": 615 }, { "epoch": 1.17, "grad_norm": 3.1958765983581543, "learning_rate": 1.9604961452570083e-05, "loss": 0.102, "step": 616 }, { "epoch": 1.17, "grad_norm": 3.1603119373321533, "learning_rate": 1.9603246021135232e-05, "loss": 0.093, "step": 617 }, { "epoch": 1.17, "grad_norm": 1.8974733352661133, "learning_rate": 1.9601526948564567e-05, "loss": 0.0621, "step": 618 }, { "epoch": 1.18, "grad_norm": 3.6179957389831543, "learning_rate": 1.959980423550989e-05, "loss": 0.1039, "step": 619 }, { "epoch": 1.18, "grad_norm": 2.1595287322998047, "learning_rate": 1.959807788262438e-05, "loss": 0.064, "step": 620 }, { "epoch": 1.18, "grad_norm": 2.727121353149414, "learning_rate": 1.9596347890562586e-05, "loss": 0.0856, "step": 621 }, { "epoch": 1.18, "grad_norm": 2.4271838665008545, "learning_rate": 1.9594614259980455e-05, "loss": 0.0933, "step": 622 }, { "epoch": 1.18, "grad_norm": 2.4891998767852783, "learning_rate": 1.9592876991535305e-05, "loss": 0.1103, "step": 623 }, { "epoch": 1.19, "grad_norm": 2.7741658687591553, "learning_rate": 1.9591136085885832e-05, "loss": 0.0774, "step": 624 }, { "epoch": 1.19, "grad_norm": 2.441477060317993, "learning_rate": 1.958939154369211e-05, "loss": 0.0882, "step": 625 }, { "epoch": 1.19, "grad_norm": 2.3069701194763184, "learning_rate": 1.9587643365615595e-05, "loss": 0.0747, "step": 626 }, { "epoch": 1.19, "grad_norm": 1.6924126148223877, "learning_rate": 1.9585891552319123e-05, "loss": 0.0561, "step": 627 }, { "epoch": 1.19, "grad_norm": 2.4660251140594482, "learning_rate": 1.95841361044669e-05, "loss": 0.0654, "step": 628 }, { "epoch": 1.2, "grad_norm": 2.287003993988037, "learning_rate": 1.9582377022724523e-05, "loss": 0.0815, "step": 629 }, { "epoch": 1.2, "grad_norm": 2.1821272373199463, "learning_rate": 1.9580614307758954e-05, "loss": 0.0746, "step": 630 }, { "epoch": 1.2, "grad_norm": 2.788135290145874, "learning_rate": 1.957884796023854e-05, "loss": 0.1048, "step": 631 }, { "epoch": 1.2, "grad_norm": 2.5871200561523438, "learning_rate": 1.9577077980833006e-05, "loss": 0.1044, "step": 632 }, { "epoch": 1.2, "grad_norm": 2.8930718898773193, "learning_rate": 1.957530437021345e-05, "loss": 0.0828, "step": 633 }, { "epoch": 1.21, "grad_norm": 2.2883105278015137, "learning_rate": 1.9573527129052346e-05, "loss": 0.0864, "step": 634 }, { "epoch": 1.21, "grad_norm": 2.9197452068328857, "learning_rate": 1.9571746258023556e-05, "loss": 0.0928, "step": 635 }, { "epoch": 1.21, "grad_norm": 1.7592474222183228, "learning_rate": 1.95699617578023e-05, "loss": 0.0494, "step": 636 }, { "epoch": 1.21, "grad_norm": 2.264357328414917, "learning_rate": 1.9568173629065183e-05, "loss": 0.0732, "step": 637 }, { "epoch": 1.21, "grad_norm": 3.4252021312713623, "learning_rate": 1.956638187249019e-05, "loss": 0.1158, "step": 638 }, { "epoch": 1.21, "grad_norm": 2.355846881866455, "learning_rate": 1.956458648875668e-05, "loss": 0.1093, "step": 639 }, { "epoch": 1.22, "grad_norm": 2.9100420475006104, "learning_rate": 1.9562787478545378e-05, "loss": 0.0899, "step": 640 }, { "epoch": 1.22, "grad_norm": 3.1080851554870605, "learning_rate": 1.9560984842538398e-05, "loss": 0.1443, "step": 641 }, { "epoch": 1.22, "grad_norm": 3.0809216499328613, "learning_rate": 1.9559178581419214e-05, "loss": 0.124, "step": 642 }, { "epoch": 1.22, "grad_norm": 2.850717306137085, "learning_rate": 1.9557368695872694e-05, "loss": 0.0968, "step": 643 }, { "epoch": 1.22, "grad_norm": 1.9187813997268677, "learning_rate": 1.9555555186585054e-05, "loss": 0.0778, "step": 644 }, { "epoch": 1.23, "grad_norm": 2.4152493476867676, "learning_rate": 1.955373805424391e-05, "loss": 0.094, "step": 645 }, { "epoch": 1.23, "grad_norm": 2.1748087406158447, "learning_rate": 1.9551917299538235e-05, "loss": 0.0604, "step": 646 }, { "epoch": 1.23, "grad_norm": 2.204104423522949, "learning_rate": 1.955009292315838e-05, "loss": 0.0986, "step": 647 }, { "epoch": 1.23, "grad_norm": 2.47126841545105, "learning_rate": 1.9548264925796066e-05, "loss": 0.119, "step": 648 }, { "epoch": 1.23, "grad_norm": 3.034508466720581, "learning_rate": 1.9546433308144403e-05, "loss": 0.1427, "step": 649 }, { "epoch": 1.24, "grad_norm": 2.1595828533172607, "learning_rate": 1.9544598070897847e-05, "loss": 0.0778, "step": 650 }, { "epoch": 1.24, "grad_norm": 2.906886577606201, "learning_rate": 1.954275921475225e-05, "loss": 0.1152, "step": 651 }, { "epoch": 1.24, "grad_norm": 1.921394944190979, "learning_rate": 1.9540916740404823e-05, "loss": 0.0824, "step": 652 }, { "epoch": 1.24, "grad_norm": 2.374051332473755, "learning_rate": 1.9539070648554156e-05, "loss": 0.0713, "step": 653 }, { "epoch": 1.24, "grad_norm": 1.9409077167510986, "learning_rate": 1.95372209399002e-05, "loss": 0.0729, "step": 654 }, { "epoch": 1.25, "grad_norm": 2.309786558151245, "learning_rate": 1.953536761514429e-05, "loss": 0.0808, "step": 655 }, { "epoch": 1.25, "grad_norm": 2.4581828117370605, "learning_rate": 1.953351067498913e-05, "loss": 0.1038, "step": 656 }, { "epoch": 1.25, "grad_norm": 3.7096030712127686, "learning_rate": 1.9531650120138784e-05, "loss": 0.1514, "step": 657 }, { "epoch": 1.25, "grad_norm": 2.632063627243042, "learning_rate": 1.9529785951298698e-05, "loss": 0.0943, "step": 658 }, { "epoch": 1.25, "grad_norm": 2.52223801612854, "learning_rate": 1.9527918169175684e-05, "loss": 0.0716, "step": 659 }, { "epoch": 1.25, "grad_norm": 2.1479392051696777, "learning_rate": 1.9526046774477923e-05, "loss": 0.0638, "step": 660 }, { "epoch": 1.26, "grad_norm": 2.4667627811431885, "learning_rate": 1.9524171767914967e-05, "loss": 0.064, "step": 661 }, { "epoch": 1.26, "grad_norm": 2.051370859146118, "learning_rate": 1.952229315019774e-05, "loss": 0.0873, "step": 662 }, { "epoch": 1.26, "grad_norm": 3.2622275352478027, "learning_rate": 1.9520410922038532e-05, "loss": 0.1025, "step": 663 }, { "epoch": 1.26, "grad_norm": 2.5033862590789795, "learning_rate": 1.9518525084150997e-05, "loss": 0.0818, "step": 664 }, { "epoch": 1.26, "grad_norm": 2.83266282081604, "learning_rate": 1.951663563725017e-05, "loss": 0.0819, "step": 665 }, { "epoch": 1.27, "grad_norm": 2.3601884841918945, "learning_rate": 1.9514742582052445e-05, "loss": 0.1048, "step": 666 }, { "epoch": 1.27, "grad_norm": 1.8065261840820312, "learning_rate": 1.9512845919275588e-05, "loss": 0.0592, "step": 667 }, { "epoch": 1.27, "grad_norm": 2.524171829223633, "learning_rate": 1.9510945649638732e-05, "loss": 0.0882, "step": 668 }, { "epoch": 1.27, "grad_norm": 2.0903100967407227, "learning_rate": 1.950904177386237e-05, "loss": 0.0788, "step": 669 }, { "epoch": 1.27, "grad_norm": 2.2345569133758545, "learning_rate": 1.9507134292668377e-05, "loss": 0.0708, "step": 670 }, { "epoch": 1.28, "grad_norm": 2.00701904296875, "learning_rate": 1.9505223206779987e-05, "loss": 0.06, "step": 671 }, { "epoch": 1.28, "grad_norm": 2.792975902557373, "learning_rate": 1.95033085169218e-05, "loss": 0.0871, "step": 672 }, { "epoch": 1.28, "grad_norm": 2.663280487060547, "learning_rate": 1.950139022381978e-05, "loss": 0.1198, "step": 673 }, { "epoch": 1.28, "grad_norm": 1.7861485481262207, "learning_rate": 1.949946832820127e-05, "loss": 0.0689, "step": 674 }, { "epoch": 1.28, "grad_norm": 2.0840001106262207, "learning_rate": 1.949754283079496e-05, "loss": 0.0802, "step": 675 }, { "epoch": 1.29, "grad_norm": 2.4872374534606934, "learning_rate": 1.949561373233092e-05, "loss": 0.0523, "step": 676 }, { "epoch": 1.29, "grad_norm": 2.6125199794769287, "learning_rate": 1.9493681033540578e-05, "loss": 0.0878, "step": 677 }, { "epoch": 1.29, "grad_norm": 3.130873203277588, "learning_rate": 1.9491744735156733e-05, "loss": 0.0694, "step": 678 }, { "epoch": 1.29, "grad_norm": 2.5600178241729736, "learning_rate": 1.9489804837913545e-05, "loss": 0.0755, "step": 679 }, { "epoch": 1.29, "grad_norm": 2.0018534660339355, "learning_rate": 1.9487861342546538e-05, "loss": 0.0667, "step": 680 }, { "epoch": 1.29, "grad_norm": 2.6040499210357666, "learning_rate": 1.9485914249792602e-05, "loss": 0.0923, "step": 681 }, { "epoch": 1.3, "grad_norm": 2.4093971252441406, "learning_rate": 1.9483963560389992e-05, "loss": 0.0912, "step": 682 }, { "epoch": 1.3, "grad_norm": 2.26521635055542, "learning_rate": 1.948200927507832e-05, "loss": 0.0664, "step": 683 }, { "epoch": 1.3, "grad_norm": 1.7541139125823975, "learning_rate": 1.9480051394598575e-05, "loss": 0.0556, "step": 684 }, { "epoch": 1.3, "grad_norm": 2.990689516067505, "learning_rate": 1.9478089919693093e-05, "loss": 0.0918, "step": 685 }, { "epoch": 1.3, "grad_norm": 1.972128987312317, "learning_rate": 1.947612485110558e-05, "loss": 0.0858, "step": 686 }, { "epoch": 1.31, "grad_norm": 1.7815457582473755, "learning_rate": 1.9474156189581112e-05, "loss": 0.0693, "step": 687 }, { "epoch": 1.31, "grad_norm": 2.2382655143737793, "learning_rate": 1.9472183935866117e-05, "loss": 0.0714, "step": 688 }, { "epoch": 1.31, "grad_norm": 2.3676931858062744, "learning_rate": 1.9470208090708386e-05, "loss": 0.0782, "step": 689 }, { "epoch": 1.31, "grad_norm": 2.0858001708984375, "learning_rate": 1.946822865485708e-05, "loss": 0.0757, "step": 690 }, { "epoch": 1.31, "grad_norm": 2.2965869903564453, "learning_rate": 1.946624562906271e-05, "loss": 0.0953, "step": 691 }, { "epoch": 1.32, "grad_norm": 2.7906394004821777, "learning_rate": 1.9464259014077153e-05, "loss": 0.097, "step": 692 }, { "epoch": 1.32, "grad_norm": 2.6373369693756104, "learning_rate": 1.9462268810653652e-05, "loss": 0.0751, "step": 693 }, { "epoch": 1.32, "grad_norm": 2.094658613204956, "learning_rate": 1.9460275019546802e-05, "loss": 0.06, "step": 694 }, { "epoch": 1.32, "grad_norm": 2.451282024383545, "learning_rate": 1.9458277641512566e-05, "loss": 0.0835, "step": 695 }, { "epoch": 1.32, "grad_norm": 2.320418119430542, "learning_rate": 1.9456276677308263e-05, "loss": 0.0846, "step": 696 }, { "epoch": 1.33, "grad_norm": 1.7940441370010376, "learning_rate": 1.945427212769257e-05, "loss": 0.0553, "step": 697 }, { "epoch": 1.33, "grad_norm": 2.1462337970733643, "learning_rate": 1.9452263993425525e-05, "loss": 0.088, "step": 698 }, { "epoch": 1.33, "grad_norm": 2.0590434074401855, "learning_rate": 1.9450252275268528e-05, "loss": 0.065, "step": 699 }, { "epoch": 1.33, "grad_norm": 2.705392599105835, "learning_rate": 1.9448236973984334e-05, "loss": 0.1206, "step": 700 }, { "epoch": 1.33, "eval_blimp_filtered_avg": 0.7379104477611941, "eval_blimp_filtered_std": 0.004860182867820668, "step": 700 }, { "epoch": 1.33, "eval_blimp_supplement_avg": 0.7952586206896551, "eval_blimp_supplement_std": 0.017860575174285596, "step": 700 }, { "epoch": 1.33, "eval_vqa_filtered_avg": 0.3, "eval_vqa_filtered_std": 0.046056618647183814, "step": 700 }, { "epoch": 1.33, "eval_winoground_filtered_avg": 0.53, "eval_winoground_filtered_std": 0.05016135580465919, "step": 700 }, { "epoch": 1.33, "grad_norm": 2.251582622528076, "learning_rate": 1.9446218090337063e-05, "loss": 0.0747, "step": 701 }, { "epoch": 1.33, "grad_norm": 1.9641352891921997, "learning_rate": 1.944419562509218e-05, "loss": 0.0529, "step": 702 }, { "epoch": 1.34, "grad_norm": 2.84973406791687, "learning_rate": 1.9442169579016524e-05, "loss": 0.1094, "step": 703 }, { "epoch": 1.34, "grad_norm": 2.6607868671417236, "learning_rate": 1.9440139952878276e-05, "loss": 0.1057, "step": 704 }, { "epoch": 1.34, "grad_norm": 1.9801902770996094, "learning_rate": 1.943810674744699e-05, "loss": 0.076, "step": 705 }, { "epoch": 1.34, "grad_norm": 2.271721839904785, "learning_rate": 1.9436069963493567e-05, "loss": 0.1032, "step": 706 }, { "epoch": 1.34, "grad_norm": 1.4693667888641357, "learning_rate": 1.9434029601790268e-05, "loss": 0.0576, "step": 707 }, { "epoch": 1.35, "grad_norm": 1.848992109298706, "learning_rate": 1.9431985663110712e-05, "loss": 0.0798, "step": 708 }, { "epoch": 1.35, "grad_norm": 2.276714563369751, "learning_rate": 1.9429938148229867e-05, "loss": 0.091, "step": 709 }, { "epoch": 1.35, "grad_norm": 1.8082990646362305, "learning_rate": 1.9427887057924062e-05, "loss": 0.0674, "step": 710 }, { "epoch": 1.35, "grad_norm": 2.8296356201171875, "learning_rate": 1.9425832392970985e-05, "loss": 0.1032, "step": 711 }, { "epoch": 1.35, "grad_norm": 2.1154062747955322, "learning_rate": 1.942377415414968e-05, "loss": 0.0815, "step": 712 }, { "epoch": 1.36, "grad_norm": 2.561992883682251, "learning_rate": 1.942171234224054e-05, "loss": 0.078, "step": 713 }, { "epoch": 1.36, "grad_norm": 2.1136510372161865, "learning_rate": 1.9419646958025304e-05, "loss": 0.0875, "step": 714 }, { "epoch": 1.36, "grad_norm": 2.9700143337249756, "learning_rate": 1.9417578002287092e-05, "loss": 0.1054, "step": 715 }, { "epoch": 1.36, "grad_norm": 2.7547826766967773, "learning_rate": 1.941550547581035e-05, "loss": 0.0958, "step": 716 }, { "epoch": 1.36, "grad_norm": 2.0940723419189453, "learning_rate": 1.9413429379380903e-05, "loss": 0.0712, "step": 717 }, { "epoch": 1.37, "grad_norm": 2.3606419563293457, "learning_rate": 1.9411349713785905e-05, "loss": 0.0727, "step": 718 }, { "epoch": 1.37, "grad_norm": 2.744149923324585, "learning_rate": 1.9409266479813882e-05, "loss": 0.1259, "step": 719 }, { "epoch": 1.37, "grad_norm": 1.9855893850326538, "learning_rate": 1.940717967825471e-05, "loss": 0.0508, "step": 720 }, { "epoch": 1.37, "grad_norm": 2.371931552886963, "learning_rate": 1.9405089309899602e-05, "loss": 0.0722, "step": 721 }, { "epoch": 1.37, "grad_norm": 2.1700737476348877, "learning_rate": 1.9402995375541144e-05, "loss": 0.0612, "step": 722 }, { "epoch": 1.37, "grad_norm": 2.1332762241363525, "learning_rate": 1.9400897875973265e-05, "loss": 0.0826, "step": 723 }, { "epoch": 1.38, "grad_norm": 2.2127697467803955, "learning_rate": 1.9398796811991242e-05, "loss": 0.0909, "step": 724 }, { "epoch": 1.38, "grad_norm": 2.9636991024017334, "learning_rate": 1.9396692184391715e-05, "loss": 0.0732, "step": 725 }, { "epoch": 1.38, "grad_norm": 3.143033027648926, "learning_rate": 1.939458399397266e-05, "loss": 0.0888, "step": 726 }, { "epoch": 1.38, "grad_norm": 2.1526615619659424, "learning_rate": 1.9392472241533418e-05, "loss": 0.0734, "step": 727 }, { "epoch": 1.38, "grad_norm": 1.928637981414795, "learning_rate": 1.9390356927874666e-05, "loss": 0.0732, "step": 728 }, { "epoch": 1.39, "grad_norm": 2.3318088054656982, "learning_rate": 1.938823805379845e-05, "loss": 0.0699, "step": 729 }, { "epoch": 1.39, "grad_norm": 2.2766647338867188, "learning_rate": 1.9386115620108147e-05, "loss": 0.0825, "step": 730 }, { "epoch": 1.39, "grad_norm": 4.741824626922607, "learning_rate": 1.9383989627608496e-05, "loss": 0.1083, "step": 731 }, { "epoch": 1.39, "grad_norm": 2.113506555557251, "learning_rate": 1.938186007710558e-05, "loss": 0.0755, "step": 732 }, { "epoch": 1.39, "grad_norm": 2.3114418983459473, "learning_rate": 1.9379726969406834e-05, "loss": 0.0943, "step": 733 }, { "epoch": 1.4, "grad_norm": 3.109471559524536, "learning_rate": 1.937759030532104e-05, "loss": 0.1084, "step": 734 }, { "epoch": 1.4, "grad_norm": 2.4331843852996826, "learning_rate": 1.937545008565833e-05, "loss": 0.077, "step": 735 }, { "epoch": 1.4, "grad_norm": 2.0720815658569336, "learning_rate": 1.9373306311230177e-05, "loss": 0.0709, "step": 736 }, { "epoch": 1.4, "grad_norm": 3.169248580932617, "learning_rate": 1.9371158982849412e-05, "loss": 0.1156, "step": 737 }, { "epoch": 1.4, "grad_norm": 3.020043134689331, "learning_rate": 1.936900810133021e-05, "loss": 0.1054, "step": 738 }, { "epoch": 1.4, "grad_norm": 2.205514430999756, "learning_rate": 1.936685366748809e-05, "loss": 0.0936, "step": 739 }, { "epoch": 1.41, "grad_norm": 2.9094367027282715, "learning_rate": 1.9364695682139923e-05, "loss": 0.1136, "step": 740 }, { "epoch": 1.41, "grad_norm": 2.4873595237731934, "learning_rate": 1.9362534146103923e-05, "loss": 0.114, "step": 741 }, { "epoch": 1.41, "grad_norm": 3.2683944702148438, "learning_rate": 1.9360369060199645e-05, "loss": 0.1147, "step": 742 }, { "epoch": 1.41, "grad_norm": 2.4601738452911377, "learning_rate": 1.9358200425248003e-05, "loss": 0.0729, "step": 743 }, { "epoch": 1.41, "grad_norm": 2.4751133918762207, "learning_rate": 1.935602824207125e-05, "loss": 0.0867, "step": 744 }, { "epoch": 1.42, "grad_norm": 2.494518995285034, "learning_rate": 1.935385251149298e-05, "loss": 0.1097, "step": 745 }, { "epoch": 1.42, "grad_norm": 1.9208500385284424, "learning_rate": 1.935167323433814e-05, "loss": 0.0788, "step": 746 }, { "epoch": 1.42, "grad_norm": 1.945523977279663, "learning_rate": 1.934949041143301e-05, "loss": 0.0707, "step": 747 }, { "epoch": 1.42, "grad_norm": 2.0394389629364014, "learning_rate": 1.934730404360523e-05, "loss": 0.0723, "step": 748 }, { "epoch": 1.42, "grad_norm": 3.118859052658081, "learning_rate": 1.9345114131683774e-05, "loss": 0.1456, "step": 749 }, { "epoch": 1.43, "grad_norm": 2.309175729751587, "learning_rate": 1.9342920676498962e-05, "loss": 0.1084, "step": 750 }, { "epoch": 1.43, "grad_norm": 2.027498245239258, "learning_rate": 1.9340723678882452e-05, "loss": 0.1009, "step": 751 }, { "epoch": 1.43, "grad_norm": 2.198833703994751, "learning_rate": 1.9338523139667264e-05, "loss": 0.0616, "step": 752 }, { "epoch": 1.43, "grad_norm": 2.2878949642181396, "learning_rate": 1.933631905968773e-05, "loss": 0.0748, "step": 753 }, { "epoch": 1.43, "grad_norm": 2.5560646057128906, "learning_rate": 1.9334111439779556e-05, "loss": 0.1043, "step": 754 }, { "epoch": 1.44, "grad_norm": 3.4941911697387695, "learning_rate": 1.933190028077977e-05, "loss": 0.0914, "step": 755 }, { "epoch": 1.44, "grad_norm": 2.1908040046691895, "learning_rate": 1.9329685583526748e-05, "loss": 0.0871, "step": 756 }, { "epoch": 1.44, "grad_norm": 2.955183982849121, "learning_rate": 1.932746734886021e-05, "loss": 0.0897, "step": 757 }, { "epoch": 1.44, "grad_norm": 2.2072198390960693, "learning_rate": 1.932524557762121e-05, "loss": 0.0805, "step": 758 }, { "epoch": 1.44, "grad_norm": 2.4493863582611084, "learning_rate": 1.9323020270652152e-05, "loss": 0.0952, "step": 759 }, { "epoch": 1.44, "grad_norm": 2.913856267929077, "learning_rate": 1.9320791428796774e-05, "loss": 0.1244, "step": 760 }, { "epoch": 1.45, "grad_norm": 1.8853522539138794, "learning_rate": 1.9318559052900158e-05, "loss": 0.0835, "step": 761 }, { "epoch": 1.45, "grad_norm": 1.964979648590088, "learning_rate": 1.931632314380872e-05, "loss": 0.0799, "step": 762 }, { "epoch": 1.45, "grad_norm": 2.5223193168640137, "learning_rate": 1.931408370237023e-05, "loss": 0.0943, "step": 763 }, { "epoch": 1.45, "grad_norm": 1.9679696559906006, "learning_rate": 1.9311840729433776e-05, "loss": 0.0608, "step": 764 }, { "epoch": 1.45, "grad_norm": 2.617488145828247, "learning_rate": 1.93095942258498e-05, "loss": 0.0945, "step": 765 }, { "epoch": 1.46, "grad_norm": 2.612457752227783, "learning_rate": 1.9307344192470085e-05, "loss": 0.0762, "step": 766 }, { "epoch": 1.46, "grad_norm": 2.093069076538086, "learning_rate": 1.9305090630147744e-05, "loss": 0.0932, "step": 767 }, { "epoch": 1.46, "grad_norm": 2.3248441219329834, "learning_rate": 1.9302833539737222e-05, "loss": 0.1017, "step": 768 }, { "epoch": 1.46, "grad_norm": 2.120042324066162, "learning_rate": 1.930057292209432e-05, "loss": 0.067, "step": 769 }, { "epoch": 1.46, "grad_norm": 3.4351179599761963, "learning_rate": 1.929830877807616e-05, "loss": 0.1063, "step": 770 }, { "epoch": 1.47, "grad_norm": 2.0463767051696777, "learning_rate": 1.9296041108541216e-05, "loss": 0.0661, "step": 771 }, { "epoch": 1.47, "grad_norm": 2.75675630569458, "learning_rate": 1.9293769914349278e-05, "loss": 0.1199, "step": 772 }, { "epoch": 1.47, "grad_norm": 2.4220542907714844, "learning_rate": 1.9291495196361495e-05, "loss": 0.0686, "step": 773 }, { "epoch": 1.47, "grad_norm": 1.9499846696853638, "learning_rate": 1.928921695544034e-05, "loss": 0.0591, "step": 774 }, { "epoch": 1.47, "grad_norm": 3.693667411804199, "learning_rate": 1.928693519244962e-05, "loss": 0.0988, "step": 775 }, { "epoch": 1.48, "grad_norm": 2.645510196685791, "learning_rate": 1.9284649908254482e-05, "loss": 0.0913, "step": 776 }, { "epoch": 1.48, "grad_norm": 2.0048766136169434, "learning_rate": 1.9282361103721404e-05, "loss": 0.0734, "step": 777 }, { "epoch": 1.48, "grad_norm": 2.723994016647339, "learning_rate": 1.928006877971821e-05, "loss": 0.1142, "step": 778 }, { "epoch": 1.48, "grad_norm": 1.8056988716125488, "learning_rate": 1.9277772937114044e-05, "loss": 0.0575, "step": 779 }, { "epoch": 1.48, "grad_norm": 2.392977237701416, "learning_rate": 1.9275473576779396e-05, "loss": 0.1101, "step": 780 }, { "epoch": 1.48, "grad_norm": 2.3248465061187744, "learning_rate": 1.9273170699586075e-05, "loss": 0.0541, "step": 781 }, { "epoch": 1.49, "grad_norm": 2.0545172691345215, "learning_rate": 1.9270864306407237e-05, "loss": 0.0622, "step": 782 }, { "epoch": 1.49, "grad_norm": 2.1796298027038574, "learning_rate": 1.926855439811737e-05, "loss": 0.094, "step": 783 }, { "epoch": 1.49, "grad_norm": 2.2871692180633545, "learning_rate": 1.926624097559229e-05, "loss": 0.0645, "step": 784 }, { "epoch": 1.49, "grad_norm": 3.003572463989258, "learning_rate": 1.9263924039709138e-05, "loss": 0.1174, "step": 785 }, { "epoch": 1.49, "grad_norm": 3.5080957412719727, "learning_rate": 1.9261603591346406e-05, "loss": 0.1193, "step": 786 }, { "epoch": 1.5, "grad_norm": 3.472388982772827, "learning_rate": 1.9259279631383908e-05, "loss": 0.1078, "step": 787 }, { "epoch": 1.5, "grad_norm": 2.866504430770874, "learning_rate": 1.9256952160702785e-05, "loss": 0.0753, "step": 788 }, { "epoch": 1.5, "grad_norm": 2.0387120246887207, "learning_rate": 1.925462118018551e-05, "loss": 0.0704, "step": 789 }, { "epoch": 1.5, "grad_norm": 2.309417247772217, "learning_rate": 1.92522866907159e-05, "loss": 0.0655, "step": 790 }, { "epoch": 1.5, "grad_norm": 2.3540709018707275, "learning_rate": 1.9249948693179084e-05, "loss": 0.1048, "step": 791 }, { "epoch": 1.51, "grad_norm": 1.9896612167358398, "learning_rate": 1.9247607188461535e-05, "loss": 0.0815, "step": 792 }, { "epoch": 1.51, "grad_norm": 2.376601457595825, "learning_rate": 1.9245262177451044e-05, "loss": 0.1155, "step": 793 }, { "epoch": 1.51, "grad_norm": 2.372421979904175, "learning_rate": 1.9242913661036744e-05, "loss": 0.0987, "step": 794 }, { "epoch": 1.51, "grad_norm": 2.76837158203125, "learning_rate": 1.9240561640109087e-05, "loss": 0.1251, "step": 795 }, { "epoch": 1.51, "grad_norm": 3.243274450302124, "learning_rate": 1.923820611555986e-05, "loss": 0.1279, "step": 796 }, { "epoch": 1.52, "grad_norm": 2.187790870666504, "learning_rate": 1.9235847088282177e-05, "loss": 0.0945, "step": 797 }, { "epoch": 1.52, "grad_norm": 1.8867666721343994, "learning_rate": 1.9233484559170475e-05, "loss": 0.0737, "step": 798 }, { "epoch": 1.52, "grad_norm": 2.139051914215088, "learning_rate": 1.923111852912053e-05, "loss": 0.0866, "step": 799 }, { "epoch": 1.52, "grad_norm": 2.554103374481201, "learning_rate": 1.9228748999029424e-05, "loss": 0.0794, "step": 800 }, { "epoch": 1.52, "eval_blimp_filtered_avg": 0.7370149253731343, "eval_blimp_filtered_std": 0.0048643016479908855, "step": 800 }, { "epoch": 1.52, "eval_blimp_supplement_avg": 0.790948275862069, "eval_blimp_supplement_std": 0.017716472396762863, "step": 800 }, { "epoch": 1.52, "eval_vqa_filtered_avg": 0.29, "eval_vqa_filtered_std": 0.045604802157206845, "step": 800 }, { "epoch": 1.52, "eval_winoground_filtered_avg": 0.54, "eval_winoground_filtered_std": 0.05009082659620333, "step": 800 }, { "epoch": 1.52, "grad_norm": 3.0619189739227295, "learning_rate": 1.9226375969795595e-05, "loss": 0.0971, "step": 801 }, { "epoch": 1.52, "grad_norm": 2.4612746238708496, "learning_rate": 1.9223999442318794e-05, "loss": 0.1016, "step": 802 }, { "epoch": 1.53, "grad_norm": 1.8746435642242432, "learning_rate": 1.9221619417500084e-05, "loss": 0.0653, "step": 803 }, { "epoch": 1.53, "grad_norm": 2.9355764389038086, "learning_rate": 1.9219235896241878e-05, "loss": 0.118, "step": 804 }, { "epoch": 1.53, "grad_norm": 2.8274567127227783, "learning_rate": 1.9216848879447905e-05, "loss": 0.0969, "step": 805 }, { "epoch": 1.53, "grad_norm": 2.2501721382141113, "learning_rate": 1.921445836802321e-05, "loss": 0.0878, "step": 806 }, { "epoch": 1.53, "grad_norm": 2.487299919128418, "learning_rate": 1.921206436287418e-05, "loss": 0.1005, "step": 807 }, { "epoch": 1.54, "grad_norm": 2.408630847930908, "learning_rate": 1.9209666864908514e-05, "loss": 0.1026, "step": 808 }, { "epoch": 1.54, "grad_norm": 2.665334701538086, "learning_rate": 1.9207265875035242e-05, "loss": 0.0733, "step": 809 }, { "epoch": 1.54, "grad_norm": 2.687852382659912, "learning_rate": 1.9204861394164715e-05, "loss": 0.088, "step": 810 }, { "epoch": 1.54, "grad_norm": 2.7219526767730713, "learning_rate": 1.9202453423208604e-05, "loss": 0.1001, "step": 811 }, { "epoch": 1.54, "grad_norm": 1.8596088886260986, "learning_rate": 1.920004196307991e-05, "loss": 0.0828, "step": 812 }, { "epoch": 1.55, "grad_norm": 2.3644325733184814, "learning_rate": 1.919762701469296e-05, "loss": 0.0814, "step": 813 }, { "epoch": 1.55, "grad_norm": 1.9829598665237427, "learning_rate": 1.9195208578963388e-05, "loss": 0.0669, "step": 814 }, { "epoch": 1.55, "grad_norm": 2.35880708694458, "learning_rate": 1.919278665680817e-05, "loss": 0.0802, "step": 815 }, { "epoch": 1.55, "grad_norm": 2.240539789199829, "learning_rate": 1.9190361249145592e-05, "loss": 0.0727, "step": 816 }, { "epoch": 1.55, "grad_norm": 2.540884256362915, "learning_rate": 1.9187932356895256e-05, "loss": 0.0687, "step": 817 }, { "epoch": 1.56, "grad_norm": 1.9566580057144165, "learning_rate": 1.91854999809781e-05, "loss": 0.0563, "step": 818 }, { "epoch": 1.56, "grad_norm": 2.242234945297241, "learning_rate": 1.9183064122316376e-05, "loss": 0.1012, "step": 819 }, { "epoch": 1.56, "grad_norm": 2.5079007148742676, "learning_rate": 1.9180624781833653e-05, "loss": 0.1192, "step": 820 }, { "epoch": 1.56, "grad_norm": 2.199638843536377, "learning_rate": 1.917818196045483e-05, "loss": 0.0642, "step": 821 }, { "epoch": 1.56, "grad_norm": 2.263848304748535, "learning_rate": 1.917573565910611e-05, "loss": 0.0861, "step": 822 }, { "epoch": 1.56, "grad_norm": 2.802103281021118, "learning_rate": 1.9173285878715033e-05, "loss": 0.1179, "step": 823 }, { "epoch": 1.57, "grad_norm": 3.1066813468933105, "learning_rate": 1.917083262021044e-05, "loss": 0.113, "step": 824 }, { "epoch": 1.57, "grad_norm": 1.9475431442260742, "learning_rate": 1.9168375884522513e-05, "loss": 0.0748, "step": 825 }, { "epoch": 1.57, "grad_norm": 2.269454002380371, "learning_rate": 1.916591567258273e-05, "loss": 0.1066, "step": 826 }, { "epoch": 1.57, "grad_norm": 2.3758788108825684, "learning_rate": 1.91634519853239e-05, "loss": 0.0596, "step": 827 }, { "epoch": 1.57, "grad_norm": 1.9440513849258423, "learning_rate": 1.916098482368015e-05, "loss": 0.0617, "step": 828 }, { "epoch": 1.58, "grad_norm": 2.3173036575317383, "learning_rate": 1.915851418858692e-05, "loss": 0.0939, "step": 829 }, { "epoch": 1.58, "grad_norm": 2.3507986068725586, "learning_rate": 1.915604008098096e-05, "loss": 0.0805, "step": 830 }, { "epoch": 1.58, "grad_norm": 2.6995105743408203, "learning_rate": 1.9153562501800355e-05, "loss": 0.0613, "step": 831 }, { "epoch": 1.58, "grad_norm": 2.2569594383239746, "learning_rate": 1.9151081451984494e-05, "loss": 0.0877, "step": 832 }, { "epoch": 1.58, "grad_norm": 2.232556104660034, "learning_rate": 1.914859693247408e-05, "loss": 0.0881, "step": 833 }, { "epoch": 1.59, "grad_norm": 3.3468306064605713, "learning_rate": 1.9146108944211138e-05, "loss": 0.1109, "step": 834 }, { "epoch": 1.59, "grad_norm": 2.180777072906494, "learning_rate": 1.9143617488139003e-05, "loss": 0.0822, "step": 835 }, { "epoch": 1.59, "grad_norm": 1.8496824502944946, "learning_rate": 1.9141122565202333e-05, "loss": 0.0615, "step": 836 }, { "epoch": 1.59, "grad_norm": 1.7573002576828003, "learning_rate": 1.9138624176347092e-05, "loss": 0.0691, "step": 837 }, { "epoch": 1.59, "grad_norm": 2.215951919555664, "learning_rate": 1.9136122322520556e-05, "loss": 0.0777, "step": 838 }, { "epoch": 1.6, "grad_norm": 2.9230620861053467, "learning_rate": 1.9133617004671325e-05, "loss": 0.1223, "step": 839 }, { "epoch": 1.6, "grad_norm": 2.5987846851348877, "learning_rate": 1.9131108223749312e-05, "loss": 0.084, "step": 840 }, { "epoch": 1.6, "grad_norm": 3.113614559173584, "learning_rate": 1.9128595980705725e-05, "loss": 0.1129, "step": 841 }, { "epoch": 1.6, "grad_norm": 2.5220417976379395, "learning_rate": 1.912608027649311e-05, "loss": 0.0942, "step": 842 }, { "epoch": 1.6, "grad_norm": 2.2174901962280273, "learning_rate": 1.912356111206531e-05, "loss": 0.0665, "step": 843 }, { "epoch": 1.6, "grad_norm": 2.286224126815796, "learning_rate": 1.9121038488377482e-05, "loss": 0.1016, "step": 844 }, { "epoch": 1.61, "grad_norm": 1.889460563659668, "learning_rate": 1.9118512406386092e-05, "loss": 0.0622, "step": 845 }, { "epoch": 1.61, "grad_norm": 1.8117440938949585, "learning_rate": 1.911598286704893e-05, "loss": 0.07, "step": 846 }, { "epoch": 1.61, "grad_norm": 2.2873005867004395, "learning_rate": 1.9113449871325083e-05, "loss": 0.0861, "step": 847 }, { "epoch": 1.61, "grad_norm": 3.1308677196502686, "learning_rate": 1.911091342017495e-05, "loss": 0.1054, "step": 848 }, { "epoch": 1.61, "grad_norm": 1.861289620399475, "learning_rate": 1.910837351456025e-05, "loss": 0.0576, "step": 849 }, { "epoch": 1.62, "grad_norm": 1.8464075326919556, "learning_rate": 1.9105830155444005e-05, "loss": 0.0715, "step": 850 }, { "epoch": 1.62, "grad_norm": 2.3886423110961914, "learning_rate": 1.9103283343790545e-05, "loss": 0.0917, "step": 851 }, { "epoch": 1.62, "grad_norm": 2.30192494392395, "learning_rate": 1.9100733080565507e-05, "loss": 0.0614, "step": 852 }, { "epoch": 1.62, "grad_norm": 2.1195313930511475, "learning_rate": 1.9098179366735846e-05, "loss": 0.087, "step": 853 }, { "epoch": 1.62, "grad_norm": 2.0535223484039307, "learning_rate": 1.909562220326982e-05, "loss": 0.0652, "step": 854 }, { "epoch": 1.63, "grad_norm": 3.2269039154052734, "learning_rate": 1.909306159113699e-05, "loss": 0.0673, "step": 855 }, { "epoch": 1.63, "grad_norm": 2.1840617656707764, "learning_rate": 1.909049753130823e-05, "loss": 0.0838, "step": 856 }, { "epoch": 1.63, "grad_norm": 2.3641319274902344, "learning_rate": 1.908793002475573e-05, "loss": 0.0815, "step": 857 }, { "epoch": 1.63, "grad_norm": 2.470411777496338, "learning_rate": 1.9085359072452965e-05, "loss": 0.0898, "step": 858 }, { "epoch": 1.63, "grad_norm": 2.4886116981506348, "learning_rate": 1.908278467537474e-05, "loss": 0.0798, "step": 859 }, { "epoch": 1.63, "grad_norm": 2.1166532039642334, "learning_rate": 1.9080206834497145e-05, "loss": 0.086, "step": 860 }, { "epoch": 1.64, "grad_norm": 2.5657804012298584, "learning_rate": 1.907762555079759e-05, "loss": 0.0853, "step": 861 }, { "epoch": 1.64, "grad_norm": 2.244382381439209, "learning_rate": 1.907504082525479e-05, "loss": 0.0863, "step": 862 }, { "epoch": 1.64, "grad_norm": 2.7423882484436035, "learning_rate": 1.9072452658848758e-05, "loss": 0.0961, "step": 863 }, { "epoch": 1.64, "grad_norm": 2.6940784454345703, "learning_rate": 1.9069861052560812e-05, "loss": 0.116, "step": 864 }, { "epoch": 1.64, "grad_norm": 2.101179361343384, "learning_rate": 1.906726600737358e-05, "loss": 0.0611, "step": 865 }, { "epoch": 1.65, "grad_norm": 2.029892683029175, "learning_rate": 1.9064667524270996e-05, "loss": 0.0728, "step": 866 }, { "epoch": 1.65, "grad_norm": 2.4457576274871826, "learning_rate": 1.906206560423828e-05, "loss": 0.0868, "step": 867 }, { "epoch": 1.65, "grad_norm": 1.8028603792190552, "learning_rate": 1.905946024826198e-05, "loss": 0.0646, "step": 868 }, { "epoch": 1.65, "grad_norm": 1.7506927251815796, "learning_rate": 1.9056851457329927e-05, "loss": 0.083, "step": 869 }, { "epoch": 1.65, "grad_norm": 2.170807361602783, "learning_rate": 1.9054239232431263e-05, "loss": 0.0613, "step": 870 }, { "epoch": 1.66, "grad_norm": 1.9736117124557495, "learning_rate": 1.9051623574556436e-05, "loss": 0.0669, "step": 871 }, { "epoch": 1.66, "grad_norm": 2.2347800731658936, "learning_rate": 1.9049004484697183e-05, "loss": 0.0772, "step": 872 }, { "epoch": 1.66, "grad_norm": 2.573329448699951, "learning_rate": 1.9046381963846555e-05, "loss": 0.0842, "step": 873 }, { "epoch": 1.66, "grad_norm": 3.180800437927246, "learning_rate": 1.9043756012998895e-05, "loss": 0.079, "step": 874 }, { "epoch": 1.66, "grad_norm": 3.067786693572998, "learning_rate": 1.904112663314985e-05, "loss": 0.1264, "step": 875 }, { "epoch": 1.67, "grad_norm": 2.6712124347686768, "learning_rate": 1.9038493825296372e-05, "loss": 0.0702, "step": 876 }, { "epoch": 1.67, "grad_norm": 2.3819327354431152, "learning_rate": 1.9035857590436704e-05, "loss": 0.083, "step": 877 }, { "epoch": 1.67, "grad_norm": 3.0472631454467773, "learning_rate": 1.903321792957039e-05, "loss": 0.1156, "step": 878 }, { "epoch": 1.67, "grad_norm": 2.5019190311431885, "learning_rate": 1.9030574843698283e-05, "loss": 0.0639, "step": 879 }, { "epoch": 1.67, "grad_norm": 2.3293728828430176, "learning_rate": 1.9027928333822516e-05, "loss": 0.0778, "step": 880 }, { "epoch": 1.67, "grad_norm": 2.966494560241699, "learning_rate": 1.9025278400946544e-05, "loss": 0.1075, "step": 881 }, { "epoch": 1.68, "grad_norm": 2.2674405574798584, "learning_rate": 1.9022625046075097e-05, "loss": 0.065, "step": 882 }, { "epoch": 1.68, "grad_norm": 3.0500733852386475, "learning_rate": 1.9019968270214217e-05, "loss": 0.0929, "step": 883 }, { "epoch": 1.68, "grad_norm": 1.6741544008255005, "learning_rate": 1.9017308074371235e-05, "loss": 0.0563, "step": 884 }, { "epoch": 1.68, "grad_norm": 2.4929358959198, "learning_rate": 1.9014644459554788e-05, "loss": 0.0811, "step": 885 }, { "epoch": 1.68, "grad_norm": 2.6133105754852295, "learning_rate": 1.9011977426774797e-05, "loss": 0.0859, "step": 886 }, { "epoch": 1.69, "grad_norm": 2.447399616241455, "learning_rate": 1.900930697704249e-05, "loss": 0.0871, "step": 887 }, { "epoch": 1.69, "grad_norm": 2.6230318546295166, "learning_rate": 1.9006633111370388e-05, "loss": 0.0913, "step": 888 }, { "epoch": 1.69, "grad_norm": 2.8458218574523926, "learning_rate": 1.90039558307723e-05, "loss": 0.0929, "step": 889 }, { "epoch": 1.69, "grad_norm": 2.4990596771240234, "learning_rate": 1.9001275136263334e-05, "loss": 0.0934, "step": 890 }, { "epoch": 1.69, "grad_norm": 1.8630250692367554, "learning_rate": 1.8998591028859903e-05, "loss": 0.0657, "step": 891 }, { "epoch": 1.7, "grad_norm": 2.7558746337890625, "learning_rate": 1.8995903509579694e-05, "loss": 0.077, "step": 892 }, { "epoch": 1.7, "grad_norm": 2.1536619663238525, "learning_rate": 1.8993212579441703e-05, "loss": 0.0915, "step": 893 }, { "epoch": 1.7, "grad_norm": 3.676095485687256, "learning_rate": 1.8990518239466216e-05, "loss": 0.1497, "step": 894 }, { "epoch": 1.7, "grad_norm": 2.694395065307617, "learning_rate": 1.8987820490674805e-05, "loss": 0.0912, "step": 895 }, { "epoch": 1.7, "grad_norm": 2.196674346923828, "learning_rate": 1.8985119334090348e-05, "loss": 0.0735, "step": 896 }, { "epoch": 1.71, "grad_norm": 2.2136459350585938, "learning_rate": 1.8982414770736998e-05, "loss": 0.054, "step": 897 }, { "epoch": 1.71, "grad_norm": 2.1547272205352783, "learning_rate": 1.897970680164021e-05, "loss": 0.0937, "step": 898 }, { "epoch": 1.71, "grad_norm": 1.782524824142456, "learning_rate": 1.8976995427826737e-05, "loss": 0.0835, "step": 899 }, { "epoch": 1.71, "grad_norm": 2.128607988357544, "learning_rate": 1.8974280650324606e-05, "loss": 0.0768, "step": 900 }, { "epoch": 1.71, "eval_blimp_filtered_avg": 0.7329850746268657, "eval_blimp_filtered_std": 0.004892369173470942, "step": 900 }, { "epoch": 1.71, "eval_blimp_supplement_avg": 0.7887931034482759, "eval_blimp_supplement_std": 0.01798827746544513, "step": 900 }, { "epoch": 1.71, "eval_vqa_filtered_avg": 0.39, "eval_vqa_filtered_std": 0.04902071300001974, "step": 900 }, { "epoch": 1.71, "eval_winoground_filtered_avg": 0.51, "eval_winoground_filtered_std": 0.05024183937956912, "step": 900 }, { "epoch": 1.71, "grad_norm": 1.9412286281585693, "learning_rate": 1.8971562470163146e-05, "loss": 0.075, "step": 901 }, { "epoch": 1.71, "grad_norm": 2.4000446796417236, "learning_rate": 1.8968840888372973e-05, "loss": 0.0828, "step": 902 }, { "epoch": 1.72, "grad_norm": 1.90057373046875, "learning_rate": 1.8966115905985995e-05, "loss": 0.0681, "step": 903 }, { "epoch": 1.72, "grad_norm": 2.2316670417785645, "learning_rate": 1.8963387524035405e-05, "loss": 0.0742, "step": 904 }, { "epoch": 1.72, "grad_norm": 2.6256821155548096, "learning_rate": 1.8960655743555692e-05, "loss": 0.0998, "step": 905 }, { "epoch": 1.72, "grad_norm": 2.282691478729248, "learning_rate": 1.895792056558262e-05, "loss": 0.0793, "step": 906 }, { "epoch": 1.72, "grad_norm": 2.2881240844726562, "learning_rate": 1.895518199115326e-05, "loss": 0.1023, "step": 907 }, { "epoch": 1.73, "grad_norm": 2.023365020751953, "learning_rate": 1.8952440021305956e-05, "loss": 0.081, "step": 908 }, { "epoch": 1.73, "grad_norm": 2.378483772277832, "learning_rate": 1.894969465708035e-05, "loss": 0.0639, "step": 909 }, { "epoch": 1.73, "grad_norm": 1.8454512357711792, "learning_rate": 1.8946945899517355e-05, "loss": 0.0746, "step": 910 }, { "epoch": 1.73, "grad_norm": 2.3330085277557373, "learning_rate": 1.894419374965919e-05, "loss": 0.0941, "step": 911 }, { "epoch": 1.73, "grad_norm": 2.877992868423462, "learning_rate": 1.894143820854935e-05, "loss": 0.0899, "step": 912 }, { "epoch": 1.74, "grad_norm": 2.19382905960083, "learning_rate": 1.893867927723261e-05, "loss": 0.0698, "step": 913 }, { "epoch": 1.74, "grad_norm": 2.7925713062286377, "learning_rate": 1.893591695675505e-05, "loss": 0.0804, "step": 914 }, { "epoch": 1.74, "grad_norm": 1.7684322595596313, "learning_rate": 1.8933151248164014e-05, "loss": 0.0597, "step": 915 }, { "epoch": 1.74, "grad_norm": 2.382556915283203, "learning_rate": 1.8930382152508135e-05, "loss": 0.0689, "step": 916 }, { "epoch": 1.74, "grad_norm": 3.060804605484009, "learning_rate": 1.8927609670837344e-05, "loss": 0.1061, "step": 917 }, { "epoch": 1.75, "grad_norm": 2.870842456817627, "learning_rate": 1.8924833804202844e-05, "loss": 0.1363, "step": 918 }, { "epoch": 1.75, "grad_norm": 1.7192883491516113, "learning_rate": 1.892205455365712e-05, "loss": 0.0431, "step": 919 }, { "epoch": 1.75, "grad_norm": 2.406428575515747, "learning_rate": 1.8919271920253946e-05, "loss": 0.0698, "step": 920 }, { "epoch": 1.75, "grad_norm": 2.085662364959717, "learning_rate": 1.8916485905048375e-05, "loss": 0.0614, "step": 921 }, { "epoch": 1.75, "grad_norm": 2.3504300117492676, "learning_rate": 1.8913696509096746e-05, "loss": 0.0556, "step": 922 }, { "epoch": 1.75, "grad_norm": 2.627202033996582, "learning_rate": 1.8910903733456675e-05, "loss": 0.1053, "step": 923 }, { "epoch": 1.76, "grad_norm": 2.956897497177124, "learning_rate": 1.8908107579187064e-05, "loss": 0.0881, "step": 924 }, { "epoch": 1.76, "grad_norm": 3.0335328578948975, "learning_rate": 1.8905308047348093e-05, "loss": 0.104, "step": 925 }, { "epoch": 1.76, "grad_norm": 2.7031667232513428, "learning_rate": 1.8902505139001218e-05, "loss": 0.0855, "step": 926 }, { "epoch": 1.76, "grad_norm": 2.289470911026001, "learning_rate": 1.8899698855209193e-05, "loss": 0.1014, "step": 927 }, { "epoch": 1.76, "grad_norm": 2.8086438179016113, "learning_rate": 1.8896889197036027e-05, "loss": 0.1087, "step": 928 }, { "epoch": 1.77, "grad_norm": 3.1485776901245117, "learning_rate": 1.889407616554703e-05, "loss": 0.0748, "step": 929 }, { "epoch": 1.77, "grad_norm": 2.8415887355804443, "learning_rate": 1.8891259761808772e-05, "loss": 0.1079, "step": 930 }, { "epoch": 1.77, "grad_norm": 2.836724281311035, "learning_rate": 1.8888439986889122e-05, "loss": 0.0719, "step": 931 }, { "epoch": 1.77, "grad_norm": 2.687558174133301, "learning_rate": 1.888561684185721e-05, "loss": 0.0828, "step": 932 }, { "epoch": 1.77, "grad_norm": 1.8658231496810913, "learning_rate": 1.8882790327783456e-05, "loss": 0.0789, "step": 933 }, { "epoch": 1.78, "grad_norm": 1.7682075500488281, "learning_rate": 1.8879960445739548e-05, "loss": 0.0591, "step": 934 }, { "epoch": 1.78, "grad_norm": 2.322110891342163, "learning_rate": 1.8877127196798456e-05, "loss": 0.0821, "step": 935 }, { "epoch": 1.78, "grad_norm": 2.0193557739257812, "learning_rate": 1.8874290582034427e-05, "loss": 0.0833, "step": 936 }, { "epoch": 1.78, "grad_norm": 2.0426313877105713, "learning_rate": 1.887145060252298e-05, "loss": 0.0648, "step": 937 }, { "epoch": 1.78, "grad_norm": 2.1219727993011475, "learning_rate": 1.8868607259340912e-05, "loss": 0.0823, "step": 938 }, { "epoch": 1.79, "grad_norm": 2.1303117275238037, "learning_rate": 1.8865760553566296e-05, "loss": 0.0561, "step": 939 }, { "epoch": 1.79, "grad_norm": 2.9563798904418945, "learning_rate": 1.8862910486278484e-05, "loss": 0.1212, "step": 940 }, { "epoch": 1.79, "grad_norm": 2.583066463470459, "learning_rate": 1.8860057058558096e-05, "loss": 0.0789, "step": 941 }, { "epoch": 1.79, "grad_norm": 2.5063252449035645, "learning_rate": 1.885720027148703e-05, "loss": 0.0718, "step": 942 }, { "epoch": 1.79, "grad_norm": 2.405871629714966, "learning_rate": 1.885434012614845e-05, "loss": 0.0754, "step": 943 }, { "epoch": 1.79, "grad_norm": 2.0459015369415283, "learning_rate": 1.8851476623626806e-05, "loss": 0.0812, "step": 944 }, { "epoch": 1.8, "grad_norm": 1.8213294744491577, "learning_rate": 1.884860976500781e-05, "loss": 0.0623, "step": 945 }, { "epoch": 1.8, "grad_norm": 2.532500982284546, "learning_rate": 1.8845739551378453e-05, "loss": 0.0656, "step": 946 }, { "epoch": 1.8, "grad_norm": 2.4361085891723633, "learning_rate": 1.8842865983827003e-05, "loss": 0.0976, "step": 947 }, { "epoch": 1.8, "grad_norm": 1.9808062314987183, "learning_rate": 1.8839989063442977e-05, "loss": 0.0675, "step": 948 }, { "epoch": 1.8, "grad_norm": 2.2927308082580566, "learning_rate": 1.883710879131719e-05, "loss": 0.0919, "step": 949 }, { "epoch": 1.81, "grad_norm": 1.962715983390808, "learning_rate": 1.8834225168541716e-05, "loss": 0.0584, "step": 950 }, { "epoch": 1.81, "grad_norm": 3.1217799186706543, "learning_rate": 1.8831338196209898e-05, "loss": 0.1324, "step": 951 }, { "epoch": 1.81, "grad_norm": 2.902688980102539, "learning_rate": 1.882844787541635e-05, "loss": 0.105, "step": 952 }, { "epoch": 1.81, "grad_norm": 2.4672770500183105, "learning_rate": 1.8825554207256957e-05, "loss": 0.0893, "step": 953 }, { "epoch": 1.81, "grad_norm": 1.6950104236602783, "learning_rate": 1.8822657192828878e-05, "loss": 0.0553, "step": 954 }, { "epoch": 1.82, "grad_norm": 1.9746572971343994, "learning_rate": 1.8819756833230527e-05, "loss": 0.0918, "step": 955 }, { "epoch": 1.82, "grad_norm": 2.4441652297973633, "learning_rate": 1.88168531295616e-05, "loss": 0.1045, "step": 956 }, { "epoch": 1.82, "grad_norm": 1.9801048040390015, "learning_rate": 1.8813946082923057e-05, "loss": 0.0671, "step": 957 }, { "epoch": 1.82, "grad_norm": 2.6172757148742676, "learning_rate": 1.8811035694417125e-05, "loss": 0.0774, "step": 958 }, { "epoch": 1.82, "grad_norm": 1.782094955444336, "learning_rate": 1.8808121965147295e-05, "loss": 0.0527, "step": 959 }, { "epoch": 1.83, "grad_norm": 2.3888332843780518, "learning_rate": 1.8805204896218326e-05, "loss": 0.1183, "step": 960 }, { "epoch": 1.83, "grad_norm": 2.4042036533355713, "learning_rate": 1.8802284488736246e-05, "loss": 0.0892, "step": 961 }, { "epoch": 1.83, "grad_norm": 2.0361814498901367, "learning_rate": 1.8799360743808348e-05, "loss": 0.0842, "step": 962 }, { "epoch": 1.83, "grad_norm": 2.397382974624634, "learning_rate": 1.879643366254319e-05, "loss": 0.0831, "step": 963 }, { "epoch": 1.83, "grad_norm": 1.8954051733016968, "learning_rate": 1.8793503246050593e-05, "loss": 0.0607, "step": 964 }, { "epoch": 1.83, "grad_norm": 1.7987644672393799, "learning_rate": 1.8790569495441647e-05, "loss": 0.0557, "step": 965 }, { "epoch": 1.84, "grad_norm": 2.097402572631836, "learning_rate": 1.8787632411828697e-05, "loss": 0.0743, "step": 966 }, { "epoch": 1.84, "grad_norm": 2.8834872245788574, "learning_rate": 1.878469199632537e-05, "loss": 0.0873, "step": 967 }, { "epoch": 1.84, "grad_norm": 2.664329767227173, "learning_rate": 1.878174825004653e-05, "loss": 0.0809, "step": 968 }, { "epoch": 1.84, "grad_norm": 2.5048258304595947, "learning_rate": 1.8778801174108327e-05, "loss": 0.071, "step": 969 }, { "epoch": 1.84, "grad_norm": 2.1221189498901367, "learning_rate": 1.8775850769628167e-05, "loss": 0.0754, "step": 970 }, { "epoch": 1.85, "grad_norm": 1.9124075174331665, "learning_rate": 1.877289703772471e-05, "loss": 0.0591, "step": 971 }, { "epoch": 1.85, "grad_norm": 2.453085422515869, "learning_rate": 1.8769939979517883e-05, "loss": 0.1091, "step": 972 }, { "epoch": 1.85, "grad_norm": 2.947727680206299, "learning_rate": 1.876697959612888e-05, "loss": 0.0866, "step": 973 }, { "epoch": 1.85, "grad_norm": 2.35176682472229, "learning_rate": 1.876401588868015e-05, "loss": 0.0678, "step": 974 }, { "epoch": 1.85, "grad_norm": 2.309992790222168, "learning_rate": 1.8761048858295396e-05, "loss": 0.0671, "step": 975 }, { "epoch": 1.86, "grad_norm": 1.9476330280303955, "learning_rate": 1.8758078506099596e-05, "loss": 0.0829, "step": 976 }, { "epoch": 1.86, "grad_norm": 2.3074254989624023, "learning_rate": 1.8755104833218972e-05, "loss": 0.0602, "step": 977 }, { "epoch": 1.86, "grad_norm": 1.9904569387435913, "learning_rate": 1.8752127840781018e-05, "loss": 0.0529, "step": 978 }, { "epoch": 1.86, "grad_norm": 1.962105393409729, "learning_rate": 1.8749147529914476e-05, "loss": 0.0937, "step": 979 }, { "epoch": 1.86, "grad_norm": 2.5717499256134033, "learning_rate": 1.8746163901749357e-05, "loss": 0.0865, "step": 980 }, { "epoch": 1.87, "grad_norm": 2.245312213897705, "learning_rate": 1.874317695741692e-05, "loss": 0.0853, "step": 981 }, { "epoch": 1.87, "grad_norm": 2.433260917663574, "learning_rate": 1.8740186698049683e-05, "loss": 0.0878, "step": 982 }, { "epoch": 1.87, "grad_norm": 2.031048059463501, "learning_rate": 1.8737193124781426e-05, "loss": 0.0732, "step": 983 }, { "epoch": 1.87, "grad_norm": 1.9851237535476685, "learning_rate": 1.8734196238747183e-05, "loss": 0.0779, "step": 984 }, { "epoch": 1.87, "grad_norm": 2.3031280040740967, "learning_rate": 1.8731196041083243e-05, "loss": 0.1075, "step": 985 }, { "epoch": 1.87, "grad_norm": 2.1409337520599365, "learning_rate": 1.8728192532927153e-05, "loss": 0.0572, "step": 986 }, { "epoch": 1.88, "grad_norm": 1.9340320825576782, "learning_rate": 1.872518571541771e-05, "loss": 0.0713, "step": 987 }, { "epoch": 1.88, "grad_norm": 2.13228178024292, "learning_rate": 1.872217558969497e-05, "loss": 0.0894, "step": 988 }, { "epoch": 1.88, "grad_norm": 2.16324782371521, "learning_rate": 1.8719162156900244e-05, "loss": 0.0731, "step": 989 }, { "epoch": 1.88, "grad_norm": 2.4928464889526367, "learning_rate": 1.8716145418176092e-05, "loss": 0.1293, "step": 990 }, { "epoch": 1.88, "grad_norm": 2.111593246459961, "learning_rate": 1.871312537466634e-05, "loss": 0.1078, "step": 991 }, { "epoch": 1.89, "grad_norm": 2.0513651371002197, "learning_rate": 1.8710102027516046e-05, "loss": 0.0866, "step": 992 }, { "epoch": 1.89, "grad_norm": 2.1780552864074707, "learning_rate": 1.8707075377871543e-05, "loss": 0.0758, "step": 993 }, { "epoch": 1.89, "grad_norm": 2.6857268810272217, "learning_rate": 1.8704045426880396e-05, "loss": 0.0904, "step": 994 }, { "epoch": 1.89, "grad_norm": 1.9247783422470093, "learning_rate": 1.870101217569144e-05, "loss": 0.0608, "step": 995 }, { "epoch": 1.89, "grad_norm": 1.5526390075683594, "learning_rate": 1.869797562545475e-05, "loss": 0.0612, "step": 996 }, { "epoch": 1.9, "grad_norm": 2.9125380516052246, "learning_rate": 1.8694935777321653e-05, "loss": 0.1013, "step": 997 }, { "epoch": 1.9, "grad_norm": 2.3809564113616943, "learning_rate": 1.869189263244473e-05, "loss": 0.0611, "step": 998 }, { "epoch": 1.9, "grad_norm": 2.7366461753845215, "learning_rate": 1.8688846191977807e-05, "loss": 0.0938, "step": 999 }, { "epoch": 1.9, "grad_norm": 2.4825265407562256, "learning_rate": 1.8685796457075965e-05, "loss": 0.0934, "step": 1000 }, { "epoch": 1.9, "eval_blimp_filtered_avg": 0.7343283582089553, "eval_blimp_filtered_std": 0.004860321759950948, "step": 1000 }, { "epoch": 1.9, "eval_blimp_supplement_avg": 0.7974137931034483, "eval_blimp_supplement_std": 0.017627048158934195, "step": 1000 }, { "epoch": 1.9, "eval_vqa_filtered_avg": 0.34, "eval_vqa_filtered_std": 0.04760952285695235, "step": 1000 }, { "epoch": 1.9, "eval_winoground_filtered_avg": 0.55, "eval_winoground_filtered_std": 0.05, "step": 1000 }, { "epoch": 1.9, "grad_norm": 2.914161443710327, "learning_rate": 1.868274342889553e-05, "loss": 0.0708, "step": 1001 }, { "epoch": 1.9, "grad_norm": 2.70162034034729, "learning_rate": 1.8679687108594084e-05, "loss": 0.1205, "step": 1002 }, { "epoch": 1.91, "grad_norm": 2.4711403846740723, "learning_rate": 1.8676627497330442e-05, "loss": 0.093, "step": 1003 }, { "epoch": 1.91, "grad_norm": 2.468491792678833, "learning_rate": 1.867356459626468e-05, "loss": 0.1073, "step": 1004 }, { "epoch": 1.91, "grad_norm": 1.7171294689178467, "learning_rate": 1.8670498406558123e-05, "loss": 0.0611, "step": 1005 }, { "epoch": 1.91, "grad_norm": 2.3509175777435303, "learning_rate": 1.8667428929373335e-05, "loss": 0.0779, "step": 1006 }, { "epoch": 1.91, "grad_norm": 2.21705961227417, "learning_rate": 1.8664356165874123e-05, "loss": 0.0703, "step": 1007 }, { "epoch": 1.92, "grad_norm": 2.5045785903930664, "learning_rate": 1.8661280117225554e-05, "loss": 0.0809, "step": 1008 }, { "epoch": 1.92, "grad_norm": 2.4906423091888428, "learning_rate": 1.865820078459393e-05, "loss": 0.0831, "step": 1009 }, { "epoch": 1.92, "grad_norm": 2.212308645248413, "learning_rate": 1.8655118169146797e-05, "loss": 0.0608, "step": 1010 }, { "epoch": 1.92, "grad_norm": 2.2575128078460693, "learning_rate": 1.8652032272052957e-05, "loss": 0.0844, "step": 1011 }, { "epoch": 1.92, "grad_norm": 2.157594919204712, "learning_rate": 1.864894309448244e-05, "loss": 0.0677, "step": 1012 }, { "epoch": 1.93, "grad_norm": 2.781226873397827, "learning_rate": 1.864585063760654e-05, "loss": 0.0789, "step": 1013 }, { "epoch": 1.93, "grad_norm": 2.611783742904663, "learning_rate": 1.864275490259777e-05, "loss": 0.0706, "step": 1014 }, { "epoch": 1.93, "grad_norm": 2.707029104232788, "learning_rate": 1.863965589062991e-05, "loss": 0.0986, "step": 1015 }, { "epoch": 1.93, "grad_norm": 2.118633270263672, "learning_rate": 1.8636553602877962e-05, "loss": 0.0776, "step": 1016 }, { "epoch": 1.93, "grad_norm": 2.139946699142456, "learning_rate": 1.863344804051819e-05, "loss": 0.088, "step": 1017 }, { "epoch": 1.94, "grad_norm": 2.570171594619751, "learning_rate": 1.8630339204728077e-05, "loss": 0.1091, "step": 1018 }, { "epoch": 1.94, "grad_norm": 2.7221834659576416, "learning_rate": 1.8627227096686365e-05, "loss": 0.1147, "step": 1019 }, { "epoch": 1.94, "grad_norm": 2.286804676055908, "learning_rate": 1.8624111717573035e-05, "loss": 0.0664, "step": 1020 }, { "epoch": 1.94, "grad_norm": 2.4048259258270264, "learning_rate": 1.86209930685693e-05, "loss": 0.0894, "step": 1021 }, { "epoch": 1.94, "grad_norm": 2.2822072505950928, "learning_rate": 1.8617871150857616e-05, "loss": 0.0769, "step": 1022 }, { "epoch": 1.94, "grad_norm": 1.8440276384353638, "learning_rate": 1.8614745965621676e-05, "loss": 0.0758, "step": 1023 }, { "epoch": 1.95, "grad_norm": 1.6894372701644897, "learning_rate": 1.8611617514046426e-05, "loss": 0.0569, "step": 1024 }, { "epoch": 1.95, "grad_norm": 2.0050442218780518, "learning_rate": 1.860848579731803e-05, "loss": 0.0678, "step": 1025 }, { "epoch": 1.95, "grad_norm": 2.3473896980285645, "learning_rate": 1.8605350816623905e-05, "loss": 0.0894, "step": 1026 }, { "epoch": 1.95, "grad_norm": 1.567386269569397, "learning_rate": 1.8602212573152693e-05, "loss": 0.0626, "step": 1027 }, { "epoch": 1.95, "grad_norm": 2.0074219703674316, "learning_rate": 1.859907106809429e-05, "loss": 0.0826, "step": 1028 }, { "epoch": 1.96, "grad_norm": 1.922435998916626, "learning_rate": 1.8595926302639814e-05, "loss": 0.0693, "step": 1029 }, { "epoch": 1.96, "grad_norm": 2.2840707302093506, "learning_rate": 1.8592778277981623e-05, "loss": 0.0923, "step": 1030 }, { "epoch": 1.96, "grad_norm": 2.0053789615631104, "learning_rate": 1.8589626995313312e-05, "loss": 0.0492, "step": 1031 }, { "epoch": 1.96, "grad_norm": 1.9126862287521362, "learning_rate": 1.8586472455829712e-05, "loss": 0.0963, "step": 1032 }, { "epoch": 1.96, "grad_norm": 2.340989589691162, "learning_rate": 1.858331466072689e-05, "loss": 0.0587, "step": 1033 }, { "epoch": 1.97, "grad_norm": 2.676450252532959, "learning_rate": 1.8580153611202143e-05, "loss": 0.0534, "step": 1034 }, { "epoch": 1.97, "grad_norm": 2.0042495727539062, "learning_rate": 1.8576989308454005e-05, "loss": 0.0525, "step": 1035 }, { "epoch": 1.97, "grad_norm": 2.3334503173828125, "learning_rate": 1.8573821753682245e-05, "loss": 0.0705, "step": 1036 }, { "epoch": 1.97, "grad_norm": 2.2797703742980957, "learning_rate": 1.857065094808786e-05, "loss": 0.0914, "step": 1037 }, { "epoch": 1.97, "grad_norm": 2.392958879470825, "learning_rate": 1.8567476892873082e-05, "loss": 0.1007, "step": 1038 }, { "epoch": 1.98, "grad_norm": 3.3312342166900635, "learning_rate": 1.8564299589241375e-05, "loss": 0.0929, "step": 1039 }, { "epoch": 1.98, "grad_norm": 1.6529061794281006, "learning_rate": 1.856111903839744e-05, "loss": 0.0702, "step": 1040 }, { "epoch": 1.98, "grad_norm": 2.246068239212036, "learning_rate": 1.85579352415472e-05, "loss": 0.0914, "step": 1041 }, { "epoch": 1.98, "grad_norm": 1.9330739974975586, "learning_rate": 1.8554748199897816e-05, "loss": 0.1011, "step": 1042 }, { "epoch": 1.98, "grad_norm": 2.89658522605896, "learning_rate": 1.8551557914657672e-05, "loss": 0.071, "step": 1043 }, { "epoch": 1.98, "grad_norm": 1.7785552740097046, "learning_rate": 1.854836438703639e-05, "loss": 0.0671, "step": 1044 }, { "epoch": 1.99, "grad_norm": 2.1652719974517822, "learning_rate": 1.8545167618244818e-05, "loss": 0.0821, "step": 1045 }, { "epoch": 1.99, "grad_norm": 2.6355738639831543, "learning_rate": 1.854196760949503e-05, "loss": 0.1093, "step": 1046 }, { "epoch": 1.99, "grad_norm": 2.0613670349121094, "learning_rate": 1.8538764362000338e-05, "loss": 0.0691, "step": 1047 }, { "epoch": 1.99, "grad_norm": 1.8577030897140503, "learning_rate": 1.8535557876975265e-05, "loss": 0.0718, "step": 1048 }, { "epoch": 1.99, "grad_norm": 2.398498773574829, "learning_rate": 1.8532348155635575e-05, "loss": 0.08, "step": 1049 }, { "epoch": 2.0, "grad_norm": 2.850620985031128, "learning_rate": 1.8529135199198257e-05, "loss": 0.1221, "step": 1050 }, { "epoch": 2.0, "grad_norm": 2.2389535903930664, "learning_rate": 1.8525919008881527e-05, "loss": 0.0929, "step": 1051 }, { "epoch": 2.0, "grad_norm": 2.418889045715332, "learning_rate": 1.852269958590482e-05, "loss": 0.067, "step": 1052 }, { "epoch": 2.0, "grad_norm": 1.1587530374526978, "learning_rate": 1.8519476931488806e-05, "loss": 0.0356, "step": 1053 }, { "epoch": 2.0, "grad_norm": 2.318089008331299, "learning_rate": 1.851625104685538e-05, "loss": 0.0437, "step": 1054 }, { "epoch": 2.01, "grad_norm": 2.007568597793579, "learning_rate": 1.8513021933227648e-05, "loss": 0.051, "step": 1055 }, { "epoch": 2.01, "grad_norm": 1.7513086795806885, "learning_rate": 1.8509789591829957e-05, "loss": 0.0505, "step": 1056 }, { "epoch": 2.01, "grad_norm": 2.1185507774353027, "learning_rate": 1.850655402388787e-05, "loss": 0.049, "step": 1057 }, { "epoch": 2.01, "grad_norm": 1.9943206310272217, "learning_rate": 1.8503315230628176e-05, "loss": 0.0672, "step": 1058 }, { "epoch": 2.01, "grad_norm": 2.2401931285858154, "learning_rate": 1.8500073213278883e-05, "loss": 0.0492, "step": 1059 }, { "epoch": 2.02, "grad_norm": 1.886414647102356, "learning_rate": 1.8496827973069223e-05, "loss": 0.0414, "step": 1060 }, { "epoch": 2.02, "grad_norm": 2.27547025680542, "learning_rate": 1.8493579511229657e-05, "loss": 0.0609, "step": 1061 }, { "epoch": 2.02, "grad_norm": 1.9332878589630127, "learning_rate": 1.8490327828991852e-05, "loss": 0.045, "step": 1062 }, { "epoch": 2.02, "grad_norm": 1.931746006011963, "learning_rate": 1.8487072927588713e-05, "loss": 0.0557, "step": 1063 }, { "epoch": 2.02, "grad_norm": 2.2391581535339355, "learning_rate": 1.848381480825435e-05, "loss": 0.0532, "step": 1064 }, { "epoch": 2.02, "grad_norm": 1.9779335260391235, "learning_rate": 1.8480553472224113e-05, "loss": 0.0447, "step": 1065 }, { "epoch": 2.03, "grad_norm": 1.9138058423995972, "learning_rate": 1.847728892073455e-05, "loss": 0.0507, "step": 1066 }, { "epoch": 2.03, "grad_norm": 2.317338466644287, "learning_rate": 1.8474021155023442e-05, "loss": 0.048, "step": 1067 }, { "epoch": 2.03, "grad_norm": 1.5921686887741089, "learning_rate": 1.8470750176329782e-05, "loss": 0.0376, "step": 1068 }, { "epoch": 2.03, "grad_norm": 1.9282227754592896, "learning_rate": 1.8467475985893784e-05, "loss": 0.0442, "step": 1069 }, { "epoch": 2.03, "grad_norm": 1.993407130241394, "learning_rate": 1.846419858495688e-05, "loss": 0.0586, "step": 1070 }, { "epoch": 2.04, "grad_norm": 2.278686285018921, "learning_rate": 1.846091797476172e-05, "loss": 0.0526, "step": 1071 }, { "epoch": 2.04, "grad_norm": 2.6277616024017334, "learning_rate": 1.8457634156552167e-05, "loss": 0.0383, "step": 1072 }, { "epoch": 2.04, "grad_norm": 2.2279744148254395, "learning_rate": 1.8454347131573306e-05, "loss": 0.05, "step": 1073 }, { "epoch": 2.04, "grad_norm": 2.5356991291046143, "learning_rate": 1.8451056901071437e-05, "loss": 0.047, "step": 1074 }, { "epoch": 2.04, "grad_norm": 1.9142693281173706, "learning_rate": 1.8447763466294065e-05, "loss": 0.0503, "step": 1075 }, { "epoch": 2.05, "grad_norm": 2.0985963344573975, "learning_rate": 1.8444466828489925e-05, "loss": 0.075, "step": 1076 }, { "epoch": 2.05, "grad_norm": 1.4713847637176514, "learning_rate": 1.8441166988908956e-05, "loss": 0.0349, "step": 1077 }, { "epoch": 2.05, "grad_norm": 1.7398107051849365, "learning_rate": 1.8437863948802317e-05, "loss": 0.0409, "step": 1078 }, { "epoch": 2.05, "grad_norm": 1.501332402229309, "learning_rate": 1.843455770942238e-05, "loss": 0.0454, "step": 1079 }, { "epoch": 2.05, "grad_norm": 1.905275583267212, "learning_rate": 1.843124827202272e-05, "loss": 0.0508, "step": 1080 }, { "epoch": 2.06, "grad_norm": 2.047034978866577, "learning_rate": 1.8427935637858136e-05, "loss": 0.0612, "step": 1081 }, { "epoch": 2.06, "grad_norm": 2.239392042160034, "learning_rate": 1.842461980818464e-05, "loss": 0.0428, "step": 1082 }, { "epoch": 2.06, "grad_norm": 1.9377552270889282, "learning_rate": 1.842130078425945e-05, "loss": 0.0353, "step": 1083 }, { "epoch": 2.06, "grad_norm": 2.4704997539520264, "learning_rate": 1.8417978567340996e-05, "loss": 0.0768, "step": 1084 }, { "epoch": 2.06, "grad_norm": 2.523285150527954, "learning_rate": 1.8414653158688915e-05, "loss": 0.072, "step": 1085 }, { "epoch": 2.06, "grad_norm": 1.787269115447998, "learning_rate": 1.841132455956406e-05, "loss": 0.0417, "step": 1086 }, { "epoch": 2.07, "grad_norm": 1.4717763662338257, "learning_rate": 1.8407992771228498e-05, "loss": 0.0347, "step": 1087 }, { "epoch": 2.07, "grad_norm": 1.5180251598358154, "learning_rate": 1.8404657794945492e-05, "loss": 0.0327, "step": 1088 }, { "epoch": 2.07, "grad_norm": 1.8626291751861572, "learning_rate": 1.8401319631979523e-05, "loss": 0.036, "step": 1089 }, { "epoch": 2.07, "grad_norm": 2.126142740249634, "learning_rate": 1.8397978283596276e-05, "loss": 0.0467, "step": 1090 }, { "epoch": 2.07, "grad_norm": 1.1293895244598389, "learning_rate": 1.8394633751062653e-05, "loss": 0.0227, "step": 1091 }, { "epoch": 2.08, "grad_norm": 1.7428995370864868, "learning_rate": 1.8391286035646748e-05, "loss": 0.0513, "step": 1092 }, { "epoch": 2.08, "grad_norm": 1.7292149066925049, "learning_rate": 1.8387935138617873e-05, "loss": 0.0412, "step": 1093 }, { "epoch": 2.08, "grad_norm": 2.2472784519195557, "learning_rate": 1.838458106124655e-05, "loss": 0.0463, "step": 1094 }, { "epoch": 2.08, "grad_norm": 1.6136966943740845, "learning_rate": 1.8381223804804485e-05, "loss": 0.0448, "step": 1095 }, { "epoch": 2.08, "grad_norm": 1.5768202543258667, "learning_rate": 1.837786337056462e-05, "loss": 0.0432, "step": 1096 }, { "epoch": 2.09, "grad_norm": 1.7894152402877808, "learning_rate": 1.8374499759801075e-05, "loss": 0.0572, "step": 1097 }, { "epoch": 2.09, "grad_norm": 1.8659389019012451, "learning_rate": 1.8371132973789194e-05, "loss": 0.0566, "step": 1098 }, { "epoch": 2.09, "grad_norm": 2.244833469390869, "learning_rate": 1.8367763013805508e-05, "loss": 0.0617, "step": 1099 }, { "epoch": 2.09, "grad_norm": 1.9449526071548462, "learning_rate": 1.836438988112777e-05, "loss": 0.0586, "step": 1100 }, { "epoch": 2.09, "eval_blimp_filtered_avg": 0.7326865671641791, "eval_blimp_filtered_std": 0.0048596504112136995, "step": 1100 }, { "epoch": 2.09, "eval_blimp_supplement_avg": 0.7952586206896551, "eval_blimp_supplement_std": 0.017650090986091094, "step": 1100 }, { "epoch": 2.09, "eval_vqa_filtered_avg": 0.37, "eval_vqa_filtered_std": 0.04852365870939099, "step": 1100 }, { "epoch": 2.09, "eval_winoground_filtered_avg": 0.51, "eval_winoground_filtered_std": 0.05024183937956912, "step": 1100 }, { "epoch": 2.09, "grad_norm": 1.7739733457565308, "learning_rate": 1.8361013577034922e-05, "loss": 0.0494, "step": 1101 }, { "epoch": 2.1, "grad_norm": 2.269840955734253, "learning_rate": 1.835763410280711e-05, "loss": 0.0531, "step": 1102 }, { "epoch": 2.1, "grad_norm": 2.002411127090454, "learning_rate": 1.835425145972569e-05, "loss": 0.0377, "step": 1103 }, { "epoch": 2.1, "grad_norm": 1.5225259065628052, "learning_rate": 1.8350865649073208e-05, "loss": 0.0373, "step": 1104 }, { "epoch": 2.1, "grad_norm": 1.7489503622055054, "learning_rate": 1.834747667213342e-05, "loss": 0.0348, "step": 1105 }, { "epoch": 2.1, "grad_norm": 1.543177843093872, "learning_rate": 1.834408453019128e-05, "loss": 0.031, "step": 1106 }, { "epoch": 2.1, "grad_norm": 1.7618612051010132, "learning_rate": 1.8340689224532942e-05, "loss": 0.0392, "step": 1107 }, { "epoch": 2.11, "grad_norm": 1.2924379110336304, "learning_rate": 1.833729075644576e-05, "loss": 0.0236, "step": 1108 }, { "epoch": 2.11, "grad_norm": 1.1198445558547974, "learning_rate": 1.8333889127218278e-05, "loss": 0.0333, "step": 1109 }, { "epoch": 2.11, "grad_norm": 1.4607149362564087, "learning_rate": 1.8330484338140258e-05, "loss": 0.0289, "step": 1110 }, { "epoch": 2.11, "grad_norm": 1.833954095840454, "learning_rate": 1.8327076390502647e-05, "loss": 0.0329, "step": 1111 }, { "epoch": 2.11, "grad_norm": 1.691361427307129, "learning_rate": 1.832366528559758e-05, "loss": 0.0321, "step": 1112 }, { "epoch": 2.12, "grad_norm": 1.7744460105895996, "learning_rate": 1.8320251024718414e-05, "loss": 0.0377, "step": 1113 }, { "epoch": 2.12, "grad_norm": 1.7173166275024414, "learning_rate": 1.8316833609159683e-05, "loss": 0.0294, "step": 1114 }, { "epoch": 2.12, "grad_norm": 2.054166316986084, "learning_rate": 1.8313413040217126e-05, "loss": 0.0546, "step": 1115 }, { "epoch": 2.12, "grad_norm": 2.004732608795166, "learning_rate": 1.8309989319187672e-05, "loss": 0.0399, "step": 1116 }, { "epoch": 2.12, "grad_norm": 1.865647554397583, "learning_rate": 1.8306562447369448e-05, "loss": 0.0307, "step": 1117 }, { "epoch": 2.13, "grad_norm": 1.8298345804214478, "learning_rate": 1.8303132426061782e-05, "loss": 0.0427, "step": 1118 }, { "epoch": 2.13, "grad_norm": 2.0782103538513184, "learning_rate": 1.829969925656518e-05, "loss": 0.0613, "step": 1119 }, { "epoch": 2.13, "grad_norm": 2.6170341968536377, "learning_rate": 1.829626294018136e-05, "loss": 0.0713, "step": 1120 }, { "epoch": 2.13, "grad_norm": 1.7805688381195068, "learning_rate": 1.8292823478213218e-05, "loss": 0.0457, "step": 1121 }, { "epoch": 2.13, "grad_norm": 1.8772631883621216, "learning_rate": 1.8289380871964854e-05, "loss": 0.052, "step": 1122 }, { "epoch": 2.13, "grad_norm": 2.4757118225097656, "learning_rate": 1.8285935122741555e-05, "loss": 0.0623, "step": 1123 }, { "epoch": 2.14, "grad_norm": 2.242863416671753, "learning_rate": 1.8282486231849796e-05, "loss": 0.0415, "step": 1124 }, { "epoch": 2.14, "grad_norm": 2.0758557319641113, "learning_rate": 1.8279034200597248e-05, "loss": 0.028, "step": 1125 }, { "epoch": 2.14, "grad_norm": 1.9741967916488647, "learning_rate": 1.827557903029278e-05, "loss": 0.0572, "step": 1126 }, { "epoch": 2.14, "grad_norm": 1.550026774406433, "learning_rate": 1.8272120722246436e-05, "loss": 0.0279, "step": 1127 }, { "epoch": 2.14, "grad_norm": 1.300110936164856, "learning_rate": 1.8268659277769457e-05, "loss": 0.025, "step": 1128 }, { "epoch": 2.15, "grad_norm": 2.382019281387329, "learning_rate": 1.8265194698174275e-05, "loss": 0.0633, "step": 1129 }, { "epoch": 2.15, "grad_norm": 1.9719476699829102, "learning_rate": 1.8261726984774507e-05, "loss": 0.0467, "step": 1130 }, { "epoch": 2.15, "grad_norm": 1.5416756868362427, "learning_rate": 1.8258256138884967e-05, "loss": 0.0315, "step": 1131 }, { "epoch": 2.15, "grad_norm": 1.5409598350524902, "learning_rate": 1.825478216182164e-05, "loss": 0.0535, "step": 1132 }, { "epoch": 2.15, "grad_norm": 1.1938165426254272, "learning_rate": 1.8251305054901714e-05, "loss": 0.0228, "step": 1133 }, { "epoch": 2.16, "grad_norm": 1.635364055633545, "learning_rate": 1.8247824819443553e-05, "loss": 0.0329, "step": 1134 }, { "epoch": 2.16, "grad_norm": 2.2010622024536133, "learning_rate": 1.8244341456766718e-05, "loss": 0.0587, "step": 1135 }, { "epoch": 2.16, "grad_norm": 1.995345115661621, "learning_rate": 1.8240854968191945e-05, "loss": 0.0465, "step": 1136 }, { "epoch": 2.16, "grad_norm": 2.0071136951446533, "learning_rate": 1.823736535504116e-05, "loss": 0.0421, "step": 1137 }, { "epoch": 2.16, "grad_norm": 1.4143203496932983, "learning_rate": 1.8233872618637477e-05, "loss": 0.0317, "step": 1138 }, { "epoch": 2.17, "grad_norm": 1.5105375051498413, "learning_rate": 1.8230376760305185e-05, "loss": 0.0332, "step": 1139 }, { "epoch": 2.17, "grad_norm": 1.111763596534729, "learning_rate": 1.822687778136977e-05, "loss": 0.0227, "step": 1140 }, { "epoch": 2.17, "grad_norm": 2.6184029579162598, "learning_rate": 1.8223375683157883e-05, "loss": 0.0516, "step": 1141 }, { "epoch": 2.17, "grad_norm": 2.438631057739258, "learning_rate": 1.821987046699738e-05, "loss": 0.051, "step": 1142 }, { "epoch": 2.17, "grad_norm": 2.5494542121887207, "learning_rate": 1.8216362134217283e-05, "loss": 0.0519, "step": 1143 }, { "epoch": 2.17, "grad_norm": 1.8488720655441284, "learning_rate": 1.8212850686147793e-05, "loss": 0.0326, "step": 1144 }, { "epoch": 2.18, "grad_norm": 2.2716434001922607, "learning_rate": 1.820933612412031e-05, "loss": 0.0498, "step": 1145 }, { "epoch": 2.18, "grad_norm": 2.771681785583496, "learning_rate": 1.82058184494674e-05, "loss": 0.0603, "step": 1146 }, { "epoch": 2.18, "grad_norm": 2.2100980281829834, "learning_rate": 1.8202297663522807e-05, "loss": 0.05, "step": 1147 }, { "epoch": 2.18, "grad_norm": 1.4644036293029785, "learning_rate": 1.8198773767621473e-05, "loss": 0.0237, "step": 1148 }, { "epoch": 2.18, "grad_norm": 2.238835096359253, "learning_rate": 1.8195246763099494e-05, "loss": 0.0421, "step": 1149 }, { "epoch": 2.19, "grad_norm": 2.4142684936523438, "learning_rate": 1.8191716651294168e-05, "loss": 0.0617, "step": 1150 }, { "epoch": 2.19, "grad_norm": 1.6578234434127808, "learning_rate": 1.818818343354396e-05, "loss": 0.06, "step": 1151 }, { "epoch": 2.19, "grad_norm": 1.8742181062698364, "learning_rate": 1.8184647111188508e-05, "loss": 0.0357, "step": 1152 }, { "epoch": 2.19, "grad_norm": 1.4244056940078735, "learning_rate": 1.818110768556863e-05, "loss": 0.0407, "step": 1153 }, { "epoch": 2.19, "grad_norm": 2.3481154441833496, "learning_rate": 1.8177565158026334e-05, "loss": 0.0422, "step": 1154 }, { "epoch": 2.2, "grad_norm": 1.3145090341567993, "learning_rate": 1.8174019529904785e-05, "loss": 0.0368, "step": 1155 }, { "epoch": 2.2, "grad_norm": 1.4768626689910889, "learning_rate": 1.817047080254834e-05, "loss": 0.0397, "step": 1156 }, { "epoch": 2.2, "grad_norm": 1.965342402458191, "learning_rate": 1.8166918977302508e-05, "loss": 0.0613, "step": 1157 }, { "epoch": 2.2, "grad_norm": 1.7167062759399414, "learning_rate": 1.8163364055514002e-05, "loss": 0.0387, "step": 1158 }, { "epoch": 2.2, "grad_norm": 2.1012585163116455, "learning_rate": 1.815980603853069e-05, "loss": 0.0543, "step": 1159 }, { "epoch": 2.21, "grad_norm": 1.7874516248703003, "learning_rate": 1.815624492770162e-05, "loss": 0.0415, "step": 1160 }, { "epoch": 2.21, "grad_norm": 1.4437814950942993, "learning_rate": 1.8152680724377005e-05, "loss": 0.0427, "step": 1161 }, { "epoch": 2.21, "grad_norm": 1.6602773666381836, "learning_rate": 1.8149113429908243e-05, "loss": 0.0314, "step": 1162 }, { "epoch": 2.21, "grad_norm": 1.9921170473098755, "learning_rate": 1.81455430456479e-05, "loss": 0.0356, "step": 1163 }, { "epoch": 2.21, "grad_norm": 1.8965617418289185, "learning_rate": 1.81419695729497e-05, "loss": 0.0436, "step": 1164 }, { "epoch": 2.21, "grad_norm": 1.9918878078460693, "learning_rate": 1.813839301316856e-05, "loss": 0.0402, "step": 1165 }, { "epoch": 2.22, "grad_norm": 2.0555272102355957, "learning_rate": 1.8134813367660556e-05, "loss": 0.0559, "step": 1166 }, { "epoch": 2.22, "grad_norm": 2.0106561183929443, "learning_rate": 1.813123063778293e-05, "loss": 0.032, "step": 1167 }, { "epoch": 2.22, "grad_norm": 1.5036998987197876, "learning_rate": 1.81276448248941e-05, "loss": 0.0362, "step": 1168 }, { "epoch": 2.22, "grad_norm": 1.6915639638900757, "learning_rate": 1.8124055930353656e-05, "loss": 0.0269, "step": 1169 }, { "epoch": 2.22, "grad_norm": 1.8022494316101074, "learning_rate": 1.8120463955522346e-05, "loss": 0.0522, "step": 1170 }, { "epoch": 2.23, "grad_norm": 1.1336395740509033, "learning_rate": 1.8116868901762092e-05, "loss": 0.0314, "step": 1171 }, { "epoch": 2.23, "grad_norm": 2.390012502670288, "learning_rate": 1.8113270770435988e-05, "loss": 0.0717, "step": 1172 }, { "epoch": 2.23, "grad_norm": 1.6601948738098145, "learning_rate": 1.810966956290828e-05, "loss": 0.0492, "step": 1173 }, { "epoch": 2.23, "grad_norm": 1.5352363586425781, "learning_rate": 1.8106065280544404e-05, "loss": 0.0316, "step": 1174 }, { "epoch": 2.23, "grad_norm": 1.3511008024215698, "learning_rate": 1.8102457924710934e-05, "loss": 0.0268, "step": 1175 }, { "epoch": 2.24, "grad_norm": 1.865675687789917, "learning_rate": 1.8098847496775637e-05, "loss": 0.0436, "step": 1176 }, { "epoch": 2.24, "grad_norm": 1.7107669115066528, "learning_rate": 1.809523399810742e-05, "loss": 0.0461, "step": 1177 }, { "epoch": 2.24, "grad_norm": 1.6600165367126465, "learning_rate": 1.809161743007637e-05, "loss": 0.0406, "step": 1178 }, { "epoch": 2.24, "grad_norm": 1.4277052879333496, "learning_rate": 1.8087997794053732e-05, "loss": 0.0331, "step": 1179 }, { "epoch": 2.24, "grad_norm": 1.7597533464431763, "learning_rate": 1.808437509141192e-05, "loss": 0.0347, "step": 1180 }, { "epoch": 2.25, "grad_norm": 1.1868308782577515, "learning_rate": 1.80807493235245e-05, "loss": 0.0229, "step": 1181 }, { "epoch": 2.25, "grad_norm": 2.6198644638061523, "learning_rate": 1.807712049176621e-05, "loss": 0.0632, "step": 1182 }, { "epoch": 2.25, "grad_norm": 1.728234887123108, "learning_rate": 1.8073488597512944e-05, "loss": 0.0373, "step": 1183 }, { "epoch": 2.25, "grad_norm": 1.4919641017913818, "learning_rate": 1.8069853642141765e-05, "loss": 0.0331, "step": 1184 }, { "epoch": 2.25, "grad_norm": 1.5148357152938843, "learning_rate": 1.8066215627030882e-05, "loss": 0.041, "step": 1185 }, { "epoch": 2.25, "grad_norm": 1.382712721824646, "learning_rate": 1.806257455355968e-05, "loss": 0.0369, "step": 1186 }, { "epoch": 2.26, "grad_norm": 1.6617794036865234, "learning_rate": 1.8058930423108695e-05, "loss": 0.049, "step": 1187 }, { "epoch": 2.26, "grad_norm": 2.0403733253479004, "learning_rate": 1.8055283237059626e-05, "loss": 0.0404, "step": 1188 }, { "epoch": 2.26, "grad_norm": 1.9207392930984497, "learning_rate": 1.8051632996795317e-05, "loss": 0.0468, "step": 1189 }, { "epoch": 2.26, "grad_norm": 1.4529156684875488, "learning_rate": 1.8047979703699797e-05, "loss": 0.032, "step": 1190 }, { "epoch": 2.26, "grad_norm": 1.7582253217697144, "learning_rate": 1.8044323359158228e-05, "loss": 0.045, "step": 1191 }, { "epoch": 2.27, "grad_norm": 1.578919529914856, "learning_rate": 1.8040663964556943e-05, "loss": 0.0358, "step": 1192 }, { "epoch": 2.27, "grad_norm": 1.830207109451294, "learning_rate": 1.8037001521283418e-05, "loss": 0.0591, "step": 1193 }, { "epoch": 2.27, "grad_norm": 1.2396471500396729, "learning_rate": 1.80333360307263e-05, "loss": 0.0335, "step": 1194 }, { "epoch": 2.27, "grad_norm": 2.121774196624756, "learning_rate": 1.802966749427538e-05, "loss": 0.0496, "step": 1195 }, { "epoch": 2.27, "grad_norm": 2.3997881412506104, "learning_rate": 1.8025995913321613e-05, "loss": 0.0413, "step": 1196 }, { "epoch": 2.28, "grad_norm": 1.3850466012954712, "learning_rate": 1.8022321289257102e-05, "loss": 0.0412, "step": 1197 }, { "epoch": 2.28, "grad_norm": 1.8841205835342407, "learning_rate": 1.8018643623475106e-05, "loss": 0.0336, "step": 1198 }, { "epoch": 2.28, "grad_norm": 1.897559404373169, "learning_rate": 1.8014962917370037e-05, "loss": 0.043, "step": 1199 }, { "epoch": 2.28, "grad_norm": 1.5645986795425415, "learning_rate": 1.8011279172337456e-05, "loss": 0.0358, "step": 1200 }, { "epoch": 2.28, "eval_blimp_filtered_avg": 0.7341791044776119, "eval_blimp_filtered_std": 0.004858955814908564, "step": 1200 }, { "epoch": 2.28, "eval_blimp_supplement_avg": 0.7995689655172413, "eval_blimp_supplement_std": 0.017607058871003865, "step": 1200 }, { "epoch": 2.28, "eval_vqa_filtered_avg": 0.41, "eval_vqa_filtered_std": 0.049431107042371025, "step": 1200 }, { "epoch": 2.28, "eval_winoground_filtered_avg": 0.52, "eval_winoground_filtered_std": 0.05021167315686779, "step": 1200 }, { "epoch": 2.28, "grad_norm": 1.998749017715454, "learning_rate": 1.8007592389774088e-05, "loss": 0.0569, "step": 1201 }, { "epoch": 2.29, "grad_norm": 1.4670095443725586, "learning_rate": 1.8003902571077793e-05, "loss": 0.0272, "step": 1202 }, { "epoch": 2.29, "grad_norm": 1.5024313926696777, "learning_rate": 1.8000209717647593e-05, "loss": 0.0427, "step": 1203 }, { "epoch": 2.29, "grad_norm": 2.1110408306121826, "learning_rate": 1.7996513830883663e-05, "loss": 0.0643, "step": 1204 }, { "epoch": 2.29, "grad_norm": 1.7737447023391724, "learning_rate": 1.7992814912187318e-05, "loss": 0.0373, "step": 1205 }, { "epoch": 2.29, "grad_norm": 1.6041792631149292, "learning_rate": 1.7989112962961034e-05, "loss": 0.0328, "step": 1206 }, { "epoch": 2.29, "grad_norm": 2.2440948486328125, "learning_rate": 1.798540798460842e-05, "loss": 0.0641, "step": 1207 }, { "epoch": 2.3, "grad_norm": 1.3710585832595825, "learning_rate": 1.7981699978534255e-05, "loss": 0.0324, "step": 1208 }, { "epoch": 2.3, "grad_norm": 1.2438743114471436, "learning_rate": 1.797798894614445e-05, "loss": 0.031, "step": 1209 }, { "epoch": 2.3, "grad_norm": 2.3894059658050537, "learning_rate": 1.7974274888846065e-05, "loss": 0.0363, "step": 1210 }, { "epoch": 2.3, "grad_norm": 1.3771783113479614, "learning_rate": 1.7970557808047317e-05, "loss": 0.0378, "step": 1211 }, { "epoch": 2.3, "grad_norm": 1.7317999601364136, "learning_rate": 1.796683770515755e-05, "loss": 0.0409, "step": 1212 }, { "epoch": 2.31, "grad_norm": 1.3610329627990723, "learning_rate": 1.796311458158728e-05, "loss": 0.0298, "step": 1213 }, { "epoch": 2.31, "grad_norm": 1.7258540391921997, "learning_rate": 1.7959388438748152e-05, "loss": 0.0461, "step": 1214 }, { "epoch": 2.31, "grad_norm": 1.8174598217010498, "learning_rate": 1.7955659278052954e-05, "loss": 0.0437, "step": 1215 }, { "epoch": 2.31, "grad_norm": 1.4539897441864014, "learning_rate": 1.7951927100915618e-05, "loss": 0.0375, "step": 1216 }, { "epoch": 2.31, "grad_norm": 1.6831566095352173, "learning_rate": 1.7948191908751235e-05, "loss": 0.0397, "step": 1217 }, { "epoch": 2.32, "grad_norm": 1.1820752620697021, "learning_rate": 1.794445370297602e-05, "loss": 0.0253, "step": 1218 }, { "epoch": 2.32, "grad_norm": 1.7984287738800049, "learning_rate": 1.7940712485007347e-05, "loss": 0.0296, "step": 1219 }, { "epoch": 2.32, "grad_norm": 1.623560905456543, "learning_rate": 1.7936968256263717e-05, "loss": 0.0356, "step": 1220 }, { "epoch": 2.32, "grad_norm": 1.744101643562317, "learning_rate": 1.7933221018164785e-05, "loss": 0.0366, "step": 1221 }, { "epoch": 2.32, "grad_norm": 1.5082882642745972, "learning_rate": 1.7929470772131338e-05, "loss": 0.0266, "step": 1222 }, { "epoch": 2.33, "grad_norm": 2.2248990535736084, "learning_rate": 1.7925717519585307e-05, "loss": 0.0491, "step": 1223 }, { "epoch": 2.33, "grad_norm": 1.8173506259918213, "learning_rate": 1.7921961261949762e-05, "loss": 0.0389, "step": 1224 }, { "epoch": 2.33, "grad_norm": 1.9379295110702515, "learning_rate": 1.7918202000648918e-05, "loss": 0.0574, "step": 1225 }, { "epoch": 2.33, "grad_norm": 1.6399519443511963, "learning_rate": 1.7914439737108128e-05, "loss": 0.0456, "step": 1226 }, { "epoch": 2.33, "grad_norm": 1.1015560626983643, "learning_rate": 1.7910674472753865e-05, "loss": 0.0284, "step": 1227 }, { "epoch": 2.33, "grad_norm": 1.4479420185089111, "learning_rate": 1.790690620901377e-05, "loss": 0.0278, "step": 1228 }, { "epoch": 2.34, "grad_norm": 1.1655195951461792, "learning_rate": 1.7903134947316598e-05, "loss": 0.0254, "step": 1229 }, { "epoch": 2.34, "grad_norm": 1.4946995973587036, "learning_rate": 1.789936068909225e-05, "loss": 0.0375, "step": 1230 }, { "epoch": 2.34, "grad_norm": 1.3458664417266846, "learning_rate": 1.789558343577176e-05, "loss": 0.026, "step": 1231 }, { "epoch": 2.34, "grad_norm": 1.5220470428466797, "learning_rate": 1.78918031887873e-05, "loss": 0.044, "step": 1232 }, { "epoch": 2.34, "grad_norm": 1.6877504587173462, "learning_rate": 1.7888019949572178e-05, "loss": 0.0261, "step": 1233 }, { "epoch": 2.35, "grad_norm": 1.6043541431427002, "learning_rate": 1.7884233719560832e-05, "loss": 0.0357, "step": 1234 }, { "epoch": 2.35, "grad_norm": 1.7392429113388062, "learning_rate": 1.7880444500188842e-05, "loss": 0.0373, "step": 1235 }, { "epoch": 2.35, "grad_norm": 1.545371413230896, "learning_rate": 1.787665229289291e-05, "loss": 0.0318, "step": 1236 }, { "epoch": 2.35, "grad_norm": 1.5993329286575317, "learning_rate": 1.787285709911088e-05, "loss": 0.0343, "step": 1237 }, { "epoch": 2.35, "grad_norm": 1.9305416345596313, "learning_rate": 1.7869058920281727e-05, "loss": 0.0344, "step": 1238 }, { "epoch": 2.36, "grad_norm": 1.7077898979187012, "learning_rate": 1.786525775784555e-05, "loss": 0.0538, "step": 1239 }, { "epoch": 2.36, "grad_norm": 1.5403344631195068, "learning_rate": 1.7861453613243593e-05, "loss": 0.0346, "step": 1240 }, { "epoch": 2.36, "grad_norm": 1.8777607679367065, "learning_rate": 1.785764648791822e-05, "loss": 0.0596, "step": 1241 }, { "epoch": 2.36, "grad_norm": 1.5287246704101562, "learning_rate": 1.785383638331293e-05, "loss": 0.027, "step": 1242 }, { "epoch": 2.36, "grad_norm": 1.938624620437622, "learning_rate": 1.7850023300872347e-05, "loss": 0.0675, "step": 1243 }, { "epoch": 2.37, "grad_norm": 1.5509599447250366, "learning_rate": 1.7846207242042227e-05, "loss": 0.0373, "step": 1244 }, { "epoch": 2.37, "grad_norm": 1.4864838123321533, "learning_rate": 1.7842388208269458e-05, "loss": 0.0312, "step": 1245 }, { "epoch": 2.37, "grad_norm": 2.9327385425567627, "learning_rate": 1.783856620100205e-05, "loss": 0.0578, "step": 1246 }, { "epoch": 2.37, "grad_norm": 1.4068456888198853, "learning_rate": 1.7834741221689142e-05, "loss": 0.0305, "step": 1247 }, { "epoch": 2.37, "grad_norm": 1.7658439874649048, "learning_rate": 1.7830913271781005e-05, "loss": 0.051, "step": 1248 }, { "epoch": 2.37, "grad_norm": 2.046968698501587, "learning_rate": 1.7827082352729027e-05, "loss": 0.0463, "step": 1249 }, { "epoch": 2.38, "grad_norm": 1.0399824380874634, "learning_rate": 1.7823248465985734e-05, "loss": 0.0229, "step": 1250 }, { "epoch": 2.38, "grad_norm": 1.6915473937988281, "learning_rate": 1.7819411613004762e-05, "loss": 0.0504, "step": 1251 }, { "epoch": 2.38, "grad_norm": 1.608856439590454, "learning_rate": 1.781557179524088e-05, "loss": 0.0453, "step": 1252 }, { "epoch": 2.38, "grad_norm": 1.6692733764648438, "learning_rate": 1.781172901414999e-05, "loss": 0.0461, "step": 1253 }, { "epoch": 2.38, "grad_norm": 1.6341065168380737, "learning_rate": 1.78078832711891e-05, "loss": 0.037, "step": 1254 }, { "epoch": 2.39, "grad_norm": 1.5078089237213135, "learning_rate": 1.780403456781635e-05, "loss": 0.0332, "step": 1255 }, { "epoch": 2.39, "grad_norm": 1.5845333337783813, "learning_rate": 1.780018290549101e-05, "loss": 0.0313, "step": 1256 }, { "epoch": 2.39, "grad_norm": 1.4304102659225464, "learning_rate": 1.7796328285673453e-05, "loss": 0.0387, "step": 1257 }, { "epoch": 2.39, "grad_norm": 1.7891038656234741, "learning_rate": 1.7792470709825195e-05, "loss": 0.0575, "step": 1258 }, { "epoch": 2.39, "grad_norm": 1.3741086721420288, "learning_rate": 1.778861017940885e-05, "loss": 0.0334, "step": 1259 }, { "epoch": 2.4, "grad_norm": 1.8616143465042114, "learning_rate": 1.7784746695888177e-05, "loss": 0.0469, "step": 1260 }, { "epoch": 2.4, "grad_norm": 1.4223086833953857, "learning_rate": 1.778088026072803e-05, "loss": 0.0224, "step": 1261 }, { "epoch": 2.4, "grad_norm": 1.921544075012207, "learning_rate": 1.7777010875394402e-05, "loss": 0.0397, "step": 1262 }, { "epoch": 2.4, "grad_norm": 2.0777604579925537, "learning_rate": 1.7773138541354398e-05, "loss": 0.0491, "step": 1263 }, { "epoch": 2.4, "grad_norm": 1.9518744945526123, "learning_rate": 1.7769263260076233e-05, "loss": 0.0378, "step": 1264 }, { "epoch": 2.4, "grad_norm": 1.7992699146270752, "learning_rate": 1.7765385033029247e-05, "loss": 0.0404, "step": 1265 }, { "epoch": 2.41, "grad_norm": 1.6790142059326172, "learning_rate": 1.77615038616839e-05, "loss": 0.0491, "step": 1266 }, { "epoch": 2.41, "grad_norm": 1.4635308980941772, "learning_rate": 1.7757619747511766e-05, "loss": 0.0319, "step": 1267 }, { "epoch": 2.41, "grad_norm": 2.1214358806610107, "learning_rate": 1.7753732691985525e-05, "loss": 0.0395, "step": 1268 }, { "epoch": 2.41, "grad_norm": 1.4434770345687866, "learning_rate": 1.7749842696578987e-05, "loss": 0.0313, "step": 1269 }, { "epoch": 2.41, "grad_norm": 2.2248916625976562, "learning_rate": 1.774594976276707e-05, "loss": 0.0468, "step": 1270 }, { "epoch": 2.42, "grad_norm": 1.6659162044525146, "learning_rate": 1.7742053892025805e-05, "loss": 0.042, "step": 1271 }, { "epoch": 2.42, "grad_norm": 1.6485422849655151, "learning_rate": 1.7738155085832338e-05, "loss": 0.0412, "step": 1272 }, { "epoch": 2.42, "grad_norm": 1.3560752868652344, "learning_rate": 1.7734253345664925e-05, "loss": 0.0375, "step": 1273 }, { "epoch": 2.42, "grad_norm": 2.2354142665863037, "learning_rate": 1.773034867300294e-05, "loss": 0.0453, "step": 1274 }, { "epoch": 2.42, "grad_norm": 1.6816743612289429, "learning_rate": 1.7726441069326864e-05, "loss": 0.0389, "step": 1275 }, { "epoch": 2.43, "grad_norm": 1.4573460817337036, "learning_rate": 1.77225305361183e-05, "loss": 0.0329, "step": 1276 }, { "epoch": 2.43, "grad_norm": 2.2404956817626953, "learning_rate": 1.7718617074859938e-05, "loss": 0.0502, "step": 1277 }, { "epoch": 2.43, "grad_norm": 1.376296877861023, "learning_rate": 1.7714700687035608e-05, "loss": 0.0269, "step": 1278 }, { "epoch": 2.43, "grad_norm": 1.6084649562835693, "learning_rate": 1.7710781374130226e-05, "loss": 0.0409, "step": 1279 }, { "epoch": 2.43, "grad_norm": 1.5858243703842163, "learning_rate": 1.7706859137629826e-05, "loss": 0.0206, "step": 1280 }, { "epoch": 2.44, "grad_norm": 2.4614264965057373, "learning_rate": 1.7702933979021555e-05, "loss": 0.0371, "step": 1281 }, { "epoch": 2.44, "grad_norm": 1.5718436241149902, "learning_rate": 1.7699005899793662e-05, "loss": 0.0354, "step": 1282 }, { "epoch": 2.44, "grad_norm": 1.8170595169067383, "learning_rate": 1.7695074901435507e-05, "loss": 0.0409, "step": 1283 }, { "epoch": 2.44, "grad_norm": 1.8556125164031982, "learning_rate": 1.769114098543755e-05, "loss": 0.0497, "step": 1284 }, { "epoch": 2.44, "grad_norm": 1.7238880395889282, "learning_rate": 1.7687204153291364e-05, "loss": 0.0326, "step": 1285 }, { "epoch": 2.44, "grad_norm": 2.8198275566101074, "learning_rate": 1.7683264406489626e-05, "loss": 0.0704, "step": 1286 }, { "epoch": 2.45, "grad_norm": 1.2721434831619263, "learning_rate": 1.7679321746526118e-05, "loss": 0.0283, "step": 1287 }, { "epoch": 2.45, "grad_norm": 1.5229331254959106, "learning_rate": 1.7675376174895723e-05, "loss": 0.0367, "step": 1288 }, { "epoch": 2.45, "grad_norm": 1.4658204317092896, "learning_rate": 1.767142769309444e-05, "loss": 0.0351, "step": 1289 }, { "epoch": 2.45, "grad_norm": 1.8199785947799683, "learning_rate": 1.7667476302619354e-05, "loss": 0.0442, "step": 1290 }, { "epoch": 2.45, "grad_norm": 2.1298563480377197, "learning_rate": 1.766352200496866e-05, "loss": 0.0489, "step": 1291 }, { "epoch": 2.46, "grad_norm": 1.2974904775619507, "learning_rate": 1.7659564801641666e-05, "loss": 0.0237, "step": 1292 }, { "epoch": 2.46, "grad_norm": 2.168304920196533, "learning_rate": 1.7655604694138762e-05, "loss": 0.0468, "step": 1293 }, { "epoch": 2.46, "grad_norm": 2.060917854309082, "learning_rate": 1.7651641683961457e-05, "loss": 0.0531, "step": 1294 }, { "epoch": 2.46, "grad_norm": 2.023512601852417, "learning_rate": 1.7647675772612353e-05, "loss": 0.0545, "step": 1295 }, { "epoch": 2.46, "grad_norm": 1.4606008529663086, "learning_rate": 1.7643706961595148e-05, "loss": 0.0351, "step": 1296 }, { "epoch": 2.47, "grad_norm": 2.316333055496216, "learning_rate": 1.7639735252414648e-05, "loss": 0.0621, "step": 1297 }, { "epoch": 2.47, "grad_norm": 1.2990678548812866, "learning_rate": 1.763576064657675e-05, "loss": 0.0262, "step": 1298 }, { "epoch": 2.47, "grad_norm": 1.570328950881958, "learning_rate": 1.7631783145588453e-05, "loss": 0.0325, "step": 1299 }, { "epoch": 2.47, "grad_norm": 1.5386464595794678, "learning_rate": 1.7627802750957855e-05, "loss": 0.0367, "step": 1300 }, { "epoch": 2.47, "eval_blimp_filtered_avg": 0.7343283582089553, "eval_blimp_filtered_std": 0.004863561450296074, "step": 1300 }, { "epoch": 2.47, "eval_blimp_supplement_avg": 0.8081896551724138, "eval_blimp_supplement_std": 0.017357279270616147, "step": 1300 }, { "epoch": 2.47, "eval_vqa_filtered_avg": 0.37, "eval_vqa_filtered_std": 0.04852365870939099, "step": 1300 }, { "epoch": 2.47, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956911, "step": 1300 }, { "epoch": 2.47, "grad_norm": 1.817378044128418, "learning_rate": 1.762381946419415e-05, "loss": 0.0354, "step": 1301 }, { "epoch": 2.48, "grad_norm": 1.5768214464187622, "learning_rate": 1.7619833286807627e-05, "loss": 0.0478, "step": 1302 }, { "epoch": 2.48, "grad_norm": 1.6830250024795532, "learning_rate": 1.7615844220309667e-05, "loss": 0.0335, "step": 1303 }, { "epoch": 2.48, "grad_norm": 1.5546985864639282, "learning_rate": 1.7611852266212764e-05, "loss": 0.0322, "step": 1304 }, { "epoch": 2.48, "grad_norm": 1.5185424089431763, "learning_rate": 1.7607857426030485e-05, "loss": 0.0347, "step": 1305 }, { "epoch": 2.48, "grad_norm": 1.874476432800293, "learning_rate": 1.76038597012775e-05, "loss": 0.0306, "step": 1306 }, { "epoch": 2.48, "grad_norm": 1.6326690912246704, "learning_rate": 1.7599859093469583e-05, "loss": 0.0322, "step": 1307 }, { "epoch": 2.49, "grad_norm": 1.4925075769424438, "learning_rate": 1.7595855604123582e-05, "loss": 0.0352, "step": 1308 }, { "epoch": 2.49, "grad_norm": 4.751516819000244, "learning_rate": 1.759184923475745e-05, "loss": 0.0431, "step": 1309 }, { "epoch": 2.49, "grad_norm": 1.9763637781143188, "learning_rate": 1.7587839986890228e-05, "loss": 0.062, "step": 1310 }, { "epoch": 2.49, "grad_norm": 2.2759687900543213, "learning_rate": 1.7583827862042052e-05, "loss": 0.0375, "step": 1311 }, { "epoch": 2.49, "grad_norm": 1.9302337169647217, "learning_rate": 1.7579812861734146e-05, "loss": 0.0482, "step": 1312 }, { "epoch": 2.5, "grad_norm": 2.268179178237915, "learning_rate": 1.7575794987488824e-05, "loss": 0.0641, "step": 1313 }, { "epoch": 2.5, "grad_norm": 1.6460742950439453, "learning_rate": 1.7571774240829487e-05, "loss": 0.0267, "step": 1314 }, { "epoch": 2.5, "grad_norm": 2.479024887084961, "learning_rate": 1.7567750623280634e-05, "loss": 0.0323, "step": 1315 }, { "epoch": 2.5, "grad_norm": 1.524983286857605, "learning_rate": 1.7563724136367844e-05, "loss": 0.0408, "step": 1316 }, { "epoch": 2.5, "grad_norm": 2.2729666233062744, "learning_rate": 1.7559694781617786e-05, "loss": 0.0395, "step": 1317 }, { "epoch": 2.51, "grad_norm": 1.88374924659729, "learning_rate": 1.755566256055822e-05, "loss": 0.0341, "step": 1318 }, { "epoch": 2.51, "grad_norm": 2.2351536750793457, "learning_rate": 1.7551627474717986e-05, "loss": 0.0531, "step": 1319 }, { "epoch": 2.51, "grad_norm": 2.941922903060913, "learning_rate": 1.7547589525627018e-05, "loss": 0.0408, "step": 1320 }, { "epoch": 2.51, "grad_norm": 3.3393092155456543, "learning_rate": 1.754354871481633e-05, "loss": 0.0433, "step": 1321 }, { "epoch": 2.51, "grad_norm": 2.267005443572998, "learning_rate": 1.753950504381802e-05, "loss": 0.0499, "step": 1322 }, { "epoch": 2.52, "grad_norm": 1.4958664178848267, "learning_rate": 1.753545851416528e-05, "loss": 0.0273, "step": 1323 }, { "epoch": 2.52, "grad_norm": 1.712415337562561, "learning_rate": 1.7531409127392374e-05, "loss": 0.0435, "step": 1324 }, { "epoch": 2.52, "grad_norm": 1.971885085105896, "learning_rate": 1.7527356885034654e-05, "loss": 0.0411, "step": 1325 }, { "epoch": 2.52, "grad_norm": 1.765038251876831, "learning_rate": 1.7523301788628557e-05, "loss": 0.0456, "step": 1326 }, { "epoch": 2.52, "grad_norm": 1.6227880716323853, "learning_rate": 1.7519243839711602e-05, "loss": 0.0398, "step": 1327 }, { "epoch": 2.52, "grad_norm": 2.3423962593078613, "learning_rate": 1.7515183039822385e-05, "loss": 0.0423, "step": 1328 }, { "epoch": 2.53, "grad_norm": 1.793069839477539, "learning_rate": 1.7511119390500587e-05, "loss": 0.042, "step": 1329 }, { "epoch": 2.53, "grad_norm": 1.5468616485595703, "learning_rate": 1.7507052893286966e-05, "loss": 0.0414, "step": 1330 }, { "epoch": 2.53, "grad_norm": 2.0088202953338623, "learning_rate": 1.7502983549723365e-05, "loss": 0.0335, "step": 1331 }, { "epoch": 2.53, "grad_norm": 2.3229684829711914, "learning_rate": 1.74989113613527e-05, "loss": 0.0535, "step": 1332 }, { "epoch": 2.53, "grad_norm": 2.2348039150238037, "learning_rate": 1.7494836329718975e-05, "loss": 0.0495, "step": 1333 }, { "epoch": 2.54, "grad_norm": 2.1318209171295166, "learning_rate": 1.749075845636726e-05, "loss": 0.0441, "step": 1334 }, { "epoch": 2.54, "grad_norm": 1.5792418718338013, "learning_rate": 1.748667774284371e-05, "loss": 0.0296, "step": 1335 }, { "epoch": 2.54, "grad_norm": 1.3758803606033325, "learning_rate": 1.7482594190695558e-05, "loss": 0.0311, "step": 1336 }, { "epoch": 2.54, "grad_norm": 1.7146170139312744, "learning_rate": 1.7478507801471104e-05, "loss": 0.0408, "step": 1337 }, { "epoch": 2.54, "grad_norm": 2.5438008308410645, "learning_rate": 1.7474418576719735e-05, "loss": 0.0676, "step": 1338 }, { "epoch": 2.55, "grad_norm": 2.1990723609924316, "learning_rate": 1.747032651799191e-05, "loss": 0.0531, "step": 1339 }, { "epoch": 2.55, "grad_norm": 1.9105610847473145, "learning_rate": 1.746623162683915e-05, "loss": 0.0514, "step": 1340 }, { "epoch": 2.55, "grad_norm": 1.9144867658615112, "learning_rate": 1.7462133904814076e-05, "loss": 0.0484, "step": 1341 }, { "epoch": 2.55, "grad_norm": 1.422361969947815, "learning_rate": 1.7458033353470357e-05, "loss": 0.0471, "step": 1342 }, { "epoch": 2.55, "grad_norm": 1.9198143482208252, "learning_rate": 1.745392997436275e-05, "loss": 0.0622, "step": 1343 }, { "epoch": 2.56, "grad_norm": 1.9632000923156738, "learning_rate": 1.7449823769047074e-05, "loss": 0.051, "step": 1344 }, { "epoch": 2.56, "grad_norm": 2.2395248413085938, "learning_rate": 1.7445714739080227e-05, "loss": 0.0484, "step": 1345 }, { "epoch": 2.56, "grad_norm": 1.5432943105697632, "learning_rate": 1.7441602886020178e-05, "loss": 0.0308, "step": 1346 }, { "epoch": 2.56, "grad_norm": 2.047947883605957, "learning_rate": 1.743748821142596e-05, "loss": 0.0496, "step": 1347 }, { "epoch": 2.56, "grad_norm": 2.144784450531006, "learning_rate": 1.743337071685768e-05, "loss": 0.048, "step": 1348 }, { "epoch": 2.56, "grad_norm": 1.5758427381515503, "learning_rate": 1.742925040387652e-05, "loss": 0.0411, "step": 1349 }, { "epoch": 2.57, "grad_norm": 1.7667827606201172, "learning_rate": 1.7425127274044714e-05, "loss": 0.0391, "step": 1350 }, { "epoch": 2.57, "grad_norm": 1.820212721824646, "learning_rate": 1.7421001328925588e-05, "loss": 0.0518, "step": 1351 }, { "epoch": 2.57, "grad_norm": 1.672013521194458, "learning_rate": 1.741687257008351e-05, "loss": 0.0535, "step": 1352 }, { "epoch": 2.57, "grad_norm": 1.4466242790222168, "learning_rate": 1.7412740999083933e-05, "loss": 0.0316, "step": 1353 }, { "epoch": 2.57, "grad_norm": 1.9763050079345703, "learning_rate": 1.740860661749337e-05, "loss": 0.0534, "step": 1354 }, { "epoch": 2.58, "grad_norm": 1.6281019449234009, "learning_rate": 1.7404469426879392e-05, "loss": 0.0387, "step": 1355 }, { "epoch": 2.58, "grad_norm": 2.1017818450927734, "learning_rate": 1.7400329428810656e-05, "loss": 0.0566, "step": 1356 }, { "epoch": 2.58, "grad_norm": 1.9158635139465332, "learning_rate": 1.7396186624856862e-05, "loss": 0.0579, "step": 1357 }, { "epoch": 2.58, "grad_norm": 1.1686488389968872, "learning_rate": 1.7392041016588782e-05, "loss": 0.0281, "step": 1358 }, { "epoch": 2.58, "grad_norm": 1.849375605583191, "learning_rate": 1.738789260557826e-05, "loss": 0.0442, "step": 1359 }, { "epoch": 2.59, "grad_norm": 1.8204283714294434, "learning_rate": 1.738374139339818e-05, "loss": 0.0446, "step": 1360 }, { "epoch": 2.59, "grad_norm": 1.8324123620986938, "learning_rate": 1.7379587381622512e-05, "loss": 0.046, "step": 1361 }, { "epoch": 2.59, "grad_norm": 1.8624264001846313, "learning_rate": 1.7375430571826276e-05, "loss": 0.0458, "step": 1362 }, { "epoch": 2.59, "grad_norm": 2.161884069442749, "learning_rate": 1.7371270965585554e-05, "loss": 0.0626, "step": 1363 }, { "epoch": 2.59, "grad_norm": 1.6857842206954956, "learning_rate": 1.736710856447749e-05, "loss": 0.0496, "step": 1364 }, { "epoch": 2.6, "grad_norm": 2.1137053966522217, "learning_rate": 1.7362943370080282e-05, "loss": 0.0549, "step": 1365 }, { "epoch": 2.6, "grad_norm": 1.8184421062469482, "learning_rate": 1.73587753839732e-05, "loss": 0.04, "step": 1366 }, { "epoch": 2.6, "grad_norm": 1.7343051433563232, "learning_rate": 1.7354604607736557e-05, "loss": 0.0458, "step": 1367 }, { "epoch": 2.6, "grad_norm": 1.4133073091506958, "learning_rate": 1.7350431042951737e-05, "loss": 0.0367, "step": 1368 }, { "epoch": 2.6, "grad_norm": 2.111546277999878, "learning_rate": 1.7346254691201166e-05, "loss": 0.0496, "step": 1369 }, { "epoch": 2.6, "grad_norm": 2.08075213432312, "learning_rate": 1.7342075554068347e-05, "loss": 0.0593, "step": 1370 }, { "epoch": 2.61, "grad_norm": 2.1898865699768066, "learning_rate": 1.733789363313782e-05, "loss": 0.0584, "step": 1371 }, { "epoch": 2.61, "grad_norm": 1.7307120561599731, "learning_rate": 1.733370892999519e-05, "loss": 0.0615, "step": 1372 }, { "epoch": 2.61, "grad_norm": 1.7536959648132324, "learning_rate": 1.7329521446227122e-05, "loss": 0.0424, "step": 1373 }, { "epoch": 2.61, "grad_norm": 1.406726360321045, "learning_rate": 1.7325331183421322e-05, "loss": 0.0263, "step": 1374 }, { "epoch": 2.61, "grad_norm": 1.910494089126587, "learning_rate": 1.732113814316656e-05, "loss": 0.0499, "step": 1375 }, { "epoch": 2.62, "grad_norm": 1.514067530632019, "learning_rate": 1.7316942327052653e-05, "loss": 0.0371, "step": 1376 }, { "epoch": 2.62, "grad_norm": 1.45138680934906, "learning_rate": 1.7312743736670475e-05, "loss": 0.0439, "step": 1377 }, { "epoch": 2.62, "grad_norm": 1.4693409204483032, "learning_rate": 1.730854237361195e-05, "loss": 0.0435, "step": 1378 }, { "epoch": 2.62, "grad_norm": 2.5825083255767822, "learning_rate": 1.730433823947005e-05, "loss": 0.0498, "step": 1379 }, { "epoch": 2.62, "grad_norm": 1.5503402948379517, "learning_rate": 1.7300131335838804e-05, "loss": 0.0361, "step": 1380 }, { "epoch": 2.63, "grad_norm": 2.4008688926696777, "learning_rate": 1.7295921664313293e-05, "loss": 0.0399, "step": 1381 }, { "epoch": 2.63, "grad_norm": 1.690412998199463, "learning_rate": 1.7291709226489634e-05, "loss": 0.0371, "step": 1382 }, { "epoch": 2.63, "grad_norm": 1.2597596645355225, "learning_rate": 1.7287494023965002e-05, "loss": 0.0357, "step": 1383 }, { "epoch": 2.63, "grad_norm": 2.244752883911133, "learning_rate": 1.7283276058337624e-05, "loss": 0.064, "step": 1384 }, { "epoch": 2.63, "grad_norm": 1.5385000705718994, "learning_rate": 1.7279055331206768e-05, "loss": 0.0393, "step": 1385 }, { "epoch": 2.63, "grad_norm": 1.1296125650405884, "learning_rate": 1.727483184417276e-05, "loss": 0.0267, "step": 1386 }, { "epoch": 2.64, "grad_norm": 1.4849328994750977, "learning_rate": 1.7270605598836946e-05, "loss": 0.0363, "step": 1387 }, { "epoch": 2.64, "grad_norm": 1.4863110780715942, "learning_rate": 1.7266376596801744e-05, "loss": 0.0348, "step": 1388 }, { "epoch": 2.64, "grad_norm": 1.3627749681472778, "learning_rate": 1.7262144839670614e-05, "loss": 0.0262, "step": 1389 }, { "epoch": 2.64, "grad_norm": 1.8056634664535522, "learning_rate": 1.7257910329048048e-05, "loss": 0.0376, "step": 1390 }, { "epoch": 2.64, "grad_norm": 1.9287132024765015, "learning_rate": 1.7253673066539595e-05, "loss": 0.0553, "step": 1391 }, { "epoch": 2.65, "grad_norm": 1.9223958253860474, "learning_rate": 1.724943305375184e-05, "loss": 0.0413, "step": 1392 }, { "epoch": 2.65, "grad_norm": 1.4933427572250366, "learning_rate": 1.7245190292292413e-05, "loss": 0.0256, "step": 1393 }, { "epoch": 2.65, "grad_norm": 1.6180129051208496, "learning_rate": 1.724094478376998e-05, "loss": 0.0394, "step": 1394 }, { "epoch": 2.65, "grad_norm": 1.5674339532852173, "learning_rate": 1.7236696529794263e-05, "loss": 0.0418, "step": 1395 }, { "epoch": 2.65, "grad_norm": 2.098445177078247, "learning_rate": 1.723244553197601e-05, "loss": 0.0381, "step": 1396 }, { "epoch": 2.66, "grad_norm": 1.7464901208877563, "learning_rate": 1.722819179192702e-05, "loss": 0.0446, "step": 1397 }, { "epoch": 2.66, "grad_norm": 1.3673158884048462, "learning_rate": 1.7223935311260124e-05, "loss": 0.0298, "step": 1398 }, { "epoch": 2.66, "grad_norm": 1.5375001430511475, "learning_rate": 1.72196760915892e-05, "loss": 0.04, "step": 1399 }, { "epoch": 2.66, "grad_norm": 2.054001569747925, "learning_rate": 1.7215414134529154e-05, "loss": 0.0661, "step": 1400 }, { "epoch": 2.66, "eval_blimp_filtered_avg": 0.7338805970149254, "eval_blimp_filtered_std": 0.004868024060004553, "step": 1400 }, { "epoch": 2.66, "eval_blimp_supplement_avg": 0.790948275862069, "eval_blimp_supplement_std": 0.017718708431067404, "step": 1400 }, { "epoch": 2.66, "eval_vqa_filtered_avg": 0.34, "eval_vqa_filtered_std": 0.04760952285695234, "step": 1400 }, { "epoch": 2.66, "eval_winoground_filtered_avg": 0.52, "eval_winoground_filtered_std": 0.05021167315686779, "step": 1400 }, { "epoch": 2.66, "grad_norm": 1.8161982297897339, "learning_rate": 1.7211149441695938e-05, "loss": 0.0343, "step": 1401 }, { "epoch": 2.67, "grad_norm": 1.6362156867980957, "learning_rate": 1.7206882014706542e-05, "loss": 0.0444, "step": 1402 }, { "epoch": 2.67, "grad_norm": 1.5196362733840942, "learning_rate": 1.720261185517899e-05, "loss": 0.0309, "step": 1403 }, { "epoch": 2.67, "grad_norm": 1.8338069915771484, "learning_rate": 1.7198338964732334e-05, "loss": 0.0438, "step": 1404 }, { "epoch": 2.67, "grad_norm": 2.024822473526001, "learning_rate": 1.7194063344986678e-05, "loss": 0.0498, "step": 1405 }, { "epoch": 2.67, "grad_norm": 1.6320611238479614, "learning_rate": 1.718978499756315e-05, "loss": 0.0395, "step": 1406 }, { "epoch": 2.67, "grad_norm": 2.035020351409912, "learning_rate": 1.718550392408391e-05, "loss": 0.0539, "step": 1407 }, { "epoch": 2.68, "grad_norm": 1.809511661529541, "learning_rate": 1.7181220126172163e-05, "loss": 0.0469, "step": 1408 }, { "epoch": 2.68, "grad_norm": 2.107138156890869, "learning_rate": 1.717693360545214e-05, "loss": 0.0504, "step": 1409 }, { "epoch": 2.68, "grad_norm": 1.3472083806991577, "learning_rate": 1.7172644363549095e-05, "loss": 0.0352, "step": 1410 }, { "epoch": 2.68, "grad_norm": 1.4162206649780273, "learning_rate": 1.716835240208933e-05, "loss": 0.0393, "step": 1411 }, { "epoch": 2.68, "grad_norm": 1.6941478252410889, "learning_rate": 1.7164057722700172e-05, "loss": 0.041, "step": 1412 }, { "epoch": 2.69, "grad_norm": 2.313992500305176, "learning_rate": 1.7159760327009977e-05, "loss": 0.0473, "step": 1413 }, { "epoch": 2.69, "grad_norm": 1.3721638917922974, "learning_rate": 1.7155460216648134e-05, "loss": 0.042, "step": 1414 }, { "epoch": 2.69, "grad_norm": 1.6849480867385864, "learning_rate": 1.715115739324505e-05, "loss": 0.0443, "step": 1415 }, { "epoch": 2.69, "grad_norm": 1.5937862396240234, "learning_rate": 1.714685185843218e-05, "loss": 0.0512, "step": 1416 }, { "epoch": 2.69, "grad_norm": 1.3114043474197388, "learning_rate": 1.714254361384199e-05, "loss": 0.0469, "step": 1417 }, { "epoch": 2.7, "grad_norm": 1.5205775499343872, "learning_rate": 1.7138232661107984e-05, "loss": 0.0393, "step": 1418 }, { "epoch": 2.7, "grad_norm": 1.8415467739105225, "learning_rate": 1.7133919001864686e-05, "loss": 0.0523, "step": 1419 }, { "epoch": 2.7, "grad_norm": 2.0850961208343506, "learning_rate": 1.7129602637747652e-05, "loss": 0.0363, "step": 1420 }, { "epoch": 2.7, "grad_norm": 1.2998409271240234, "learning_rate": 1.7125283570393457e-05, "loss": 0.0257, "step": 1421 }, { "epoch": 2.7, "grad_norm": 2.8493754863739014, "learning_rate": 1.712096180143971e-05, "loss": 0.0369, "step": 1422 }, { "epoch": 2.71, "grad_norm": 2.0939152240753174, "learning_rate": 1.7116637332525036e-05, "loss": 0.0366, "step": 1423 }, { "epoch": 2.71, "grad_norm": 1.8818622827529907, "learning_rate": 1.7112310165289084e-05, "loss": 0.0296, "step": 1424 }, { "epoch": 2.71, "grad_norm": 1.6244480609893799, "learning_rate": 1.7107980301372532e-05, "loss": 0.0463, "step": 1425 }, { "epoch": 2.71, "grad_norm": 2.4402880668640137, "learning_rate": 1.710364774241708e-05, "loss": 0.0691, "step": 1426 }, { "epoch": 2.71, "grad_norm": 1.3955118656158447, "learning_rate": 1.709931249006544e-05, "loss": 0.0306, "step": 1427 }, { "epoch": 2.71, "grad_norm": 1.7573925256729126, "learning_rate": 1.7094974545961358e-05, "loss": 0.0487, "step": 1428 }, { "epoch": 2.72, "grad_norm": 1.6818406581878662, "learning_rate": 1.7090633911749594e-05, "loss": 0.0299, "step": 1429 }, { "epoch": 2.72, "grad_norm": 1.5400456190109253, "learning_rate": 1.7086290589075926e-05, "loss": 0.0305, "step": 1430 }, { "epoch": 2.72, "grad_norm": 2.0720741748809814, "learning_rate": 1.7081944579587153e-05, "loss": 0.0499, "step": 1431 }, { "epoch": 2.72, "grad_norm": 2.6174299716949463, "learning_rate": 1.7077595884931102e-05, "loss": 0.0853, "step": 1432 }, { "epoch": 2.72, "grad_norm": 1.4457992315292358, "learning_rate": 1.7073244506756602e-05, "loss": 0.0374, "step": 1433 }, { "epoch": 2.73, "grad_norm": 1.3504878282546997, "learning_rate": 1.7068890446713512e-05, "loss": 0.0304, "step": 1434 }, { "epoch": 2.73, "grad_norm": 1.5574394464492798, "learning_rate": 1.7064533706452704e-05, "loss": 0.0307, "step": 1435 }, { "epoch": 2.73, "grad_norm": 1.85750150680542, "learning_rate": 1.706017428762606e-05, "loss": 0.0427, "step": 1436 }, { "epoch": 2.73, "grad_norm": 1.6429429054260254, "learning_rate": 1.705581219188649e-05, "loss": 0.0441, "step": 1437 }, { "epoch": 2.73, "grad_norm": 1.5045359134674072, "learning_rate": 1.7051447420887906e-05, "loss": 0.0458, "step": 1438 }, { "epoch": 2.74, "grad_norm": 2.2723805904388428, "learning_rate": 1.704707997628525e-05, "loss": 0.064, "step": 1439 }, { "epoch": 2.74, "grad_norm": 2.1117618083953857, "learning_rate": 1.7042709859734454e-05, "loss": 0.0374, "step": 1440 }, { "epoch": 2.74, "grad_norm": 1.6543391942977905, "learning_rate": 1.7038337072892485e-05, "loss": 0.0394, "step": 1441 }, { "epoch": 2.74, "grad_norm": 2.092384099960327, "learning_rate": 1.7033961617417318e-05, "loss": 0.049, "step": 1442 }, { "epoch": 2.74, "grad_norm": 2.133237838745117, "learning_rate": 1.7029583494967935e-05, "loss": 0.0262, "step": 1443 }, { "epoch": 2.75, "grad_norm": 1.842736840248108, "learning_rate": 1.7025202707204328e-05, "loss": 0.0552, "step": 1444 }, { "epoch": 2.75, "grad_norm": 2.14099383354187, "learning_rate": 1.70208192557875e-05, "loss": 0.0508, "step": 1445 }, { "epoch": 2.75, "grad_norm": 2.801361560821533, "learning_rate": 1.7016433142379475e-05, "loss": 0.0482, "step": 1446 }, { "epoch": 2.75, "grad_norm": 2.12227463722229, "learning_rate": 1.7012044368643266e-05, "loss": 0.0408, "step": 1447 }, { "epoch": 2.75, "grad_norm": 1.8081763982772827, "learning_rate": 1.700765293624292e-05, "loss": 0.0329, "step": 1448 }, { "epoch": 2.75, "grad_norm": 2.0998969078063965, "learning_rate": 1.7003258846843468e-05, "loss": 0.0436, "step": 1449 }, { "epoch": 2.76, "grad_norm": 1.85587739944458, "learning_rate": 1.699886210211096e-05, "loss": 0.0422, "step": 1450 }, { "epoch": 2.76, "grad_norm": 1.5365127325057983, "learning_rate": 1.6994462703712456e-05, "loss": 0.048, "step": 1451 }, { "epoch": 2.76, "grad_norm": 2.172468900680542, "learning_rate": 1.6990060653316013e-05, "loss": 0.0659, "step": 1452 }, { "epoch": 2.76, "grad_norm": 1.503078818321228, "learning_rate": 1.6985655952590704e-05, "loss": 0.0436, "step": 1453 }, { "epoch": 2.76, "grad_norm": 1.7140440940856934, "learning_rate": 1.6981248603206593e-05, "loss": 0.0545, "step": 1454 }, { "epoch": 2.77, "grad_norm": 1.881050705909729, "learning_rate": 1.6976838606834763e-05, "loss": 0.0317, "step": 1455 }, { "epoch": 2.77, "grad_norm": 2.0963752269744873, "learning_rate": 1.6972425965147296e-05, "loss": 0.0542, "step": 1456 }, { "epoch": 2.77, "grad_norm": 1.8877127170562744, "learning_rate": 1.6968010679817264e-05, "loss": 0.0488, "step": 1457 }, { "epoch": 2.77, "grad_norm": 1.2066303491592407, "learning_rate": 1.6963592752518764e-05, "loss": 0.0304, "step": 1458 }, { "epoch": 2.77, "grad_norm": 2.100986957550049, "learning_rate": 1.6959172184926874e-05, "loss": 0.036, "step": 1459 }, { "epoch": 2.78, "grad_norm": 1.6601433753967285, "learning_rate": 1.695474897871769e-05, "loss": 0.0483, "step": 1460 }, { "epoch": 2.78, "grad_norm": 1.7731434106826782, "learning_rate": 1.69503231355683e-05, "loss": 0.0475, "step": 1461 }, { "epoch": 2.78, "grad_norm": 1.8608626127243042, "learning_rate": 1.6945894657156784e-05, "loss": 0.0286, "step": 1462 }, { "epoch": 2.78, "grad_norm": 1.2403239011764526, "learning_rate": 1.6941463545162235e-05, "loss": 0.0268, "step": 1463 }, { "epoch": 2.78, "grad_norm": 1.720775842666626, "learning_rate": 1.6937029801264745e-05, "loss": 0.0504, "step": 1464 }, { "epoch": 2.79, "grad_norm": 2.2950966358184814, "learning_rate": 1.6932593427145385e-05, "loss": 0.0392, "step": 1465 }, { "epoch": 2.79, "grad_norm": 1.4712214469909668, "learning_rate": 1.692815442448625e-05, "loss": 0.0375, "step": 1466 }, { "epoch": 2.79, "grad_norm": 1.4324734210968018, "learning_rate": 1.6923712794970407e-05, "loss": 0.0261, "step": 1467 }, { "epoch": 2.79, "grad_norm": 2.5626847743988037, "learning_rate": 1.6919268540281936e-05, "loss": 0.0498, "step": 1468 }, { "epoch": 2.79, "grad_norm": 2.2656331062316895, "learning_rate": 1.691482166210591e-05, "loss": 0.0522, "step": 1469 }, { "epoch": 2.79, "grad_norm": 1.8823176622390747, "learning_rate": 1.691037216212838e-05, "loss": 0.0411, "step": 1470 }, { "epoch": 2.8, "grad_norm": 2.668316602706909, "learning_rate": 1.6905920042036417e-05, "loss": 0.0403, "step": 1471 }, { "epoch": 2.8, "grad_norm": 1.7095972299575806, "learning_rate": 1.6901465303518066e-05, "loss": 0.0355, "step": 1472 }, { "epoch": 2.8, "grad_norm": 1.2088764905929565, "learning_rate": 1.6897007948262373e-05, "loss": 0.0312, "step": 1473 }, { "epoch": 2.8, "grad_norm": 1.3285363912582397, "learning_rate": 1.6892547977959372e-05, "loss": 0.0207, "step": 1474 }, { "epoch": 2.8, "grad_norm": 1.5954822301864624, "learning_rate": 1.6888085394300096e-05, "loss": 0.0429, "step": 1475 }, { "epoch": 2.81, "grad_norm": 2.074509859085083, "learning_rate": 1.6883620198976557e-05, "loss": 0.0532, "step": 1476 }, { "epoch": 2.81, "grad_norm": 1.1915949583053589, "learning_rate": 1.6879152393681772e-05, "loss": 0.0224, "step": 1477 }, { "epoch": 2.81, "grad_norm": 2.7690978050231934, "learning_rate": 1.6874681980109735e-05, "loss": 0.0398, "step": 1478 }, { "epoch": 2.81, "grad_norm": 1.5815085172653198, "learning_rate": 1.6870208959955438e-05, "loss": 0.0281, "step": 1479 }, { "epoch": 2.81, "grad_norm": 1.9665439128875732, "learning_rate": 1.6865733334914846e-05, "loss": 0.0362, "step": 1480 }, { "epoch": 2.82, "grad_norm": 2.1775546073913574, "learning_rate": 1.6861255106684932e-05, "loss": 0.0371, "step": 1481 }, { "epoch": 2.82, "grad_norm": 1.648036241531372, "learning_rate": 1.685677427696365e-05, "loss": 0.0284, "step": 1482 }, { "epoch": 2.82, "grad_norm": 1.6640424728393555, "learning_rate": 1.6852290847449922e-05, "loss": 0.0253, "step": 1483 }, { "epoch": 2.82, "grad_norm": 1.2158982753753662, "learning_rate": 1.6847804819843683e-05, "loss": 0.0197, "step": 1484 }, { "epoch": 2.82, "grad_norm": 1.4029244184494019, "learning_rate": 1.684331619584584e-05, "loss": 0.0246, "step": 1485 }, { "epoch": 2.83, "grad_norm": 1.808957576751709, "learning_rate": 1.6838824977158283e-05, "loss": 0.0404, "step": 1486 }, { "epoch": 2.83, "grad_norm": 1.9063189029693604, "learning_rate": 1.683433116548389e-05, "loss": 0.0459, "step": 1487 }, { "epoch": 2.83, "grad_norm": 2.1226425170898438, "learning_rate": 1.6829834762526512e-05, "loss": 0.045, "step": 1488 }, { "epoch": 2.83, "grad_norm": 2.086581230163574, "learning_rate": 1.6825335769991003e-05, "loss": 0.0416, "step": 1489 }, { "epoch": 2.83, "grad_norm": 1.3665595054626465, "learning_rate": 1.6820834189583178e-05, "loss": 0.0293, "step": 1490 }, { "epoch": 2.83, "grad_norm": 1.6668226718902588, "learning_rate": 1.6816330023009842e-05, "loss": 0.0406, "step": 1491 }, { "epoch": 2.84, "grad_norm": 1.887266993522644, "learning_rate": 1.6811823271978783e-05, "loss": 0.0427, "step": 1492 }, { "epoch": 2.84, "grad_norm": 2.5431532859802246, "learning_rate": 1.6807313938198768e-05, "loss": 0.0616, "step": 1493 }, { "epoch": 2.84, "grad_norm": 2.1289432048797607, "learning_rate": 1.680280202337954e-05, "loss": 0.0451, "step": 1494 }, { "epoch": 2.84, "grad_norm": 2.0465855598449707, "learning_rate": 1.6798287529231816e-05, "loss": 0.0382, "step": 1495 }, { "epoch": 2.84, "grad_norm": 1.731510043144226, "learning_rate": 1.6793770457467302e-05, "loss": 0.0274, "step": 1496 }, { "epoch": 2.85, "grad_norm": 1.7338287830352783, "learning_rate": 1.6789250809798682e-05, "loss": 0.0382, "step": 1497 }, { "epoch": 2.85, "grad_norm": 1.8619035482406616, "learning_rate": 1.6784728587939604e-05, "loss": 0.0588, "step": 1498 }, { "epoch": 2.85, "grad_norm": 2.0676491260528564, "learning_rate": 1.6780203793604695e-05, "loss": 0.0285, "step": 1499 }, { "epoch": 2.85, "grad_norm": 2.94146466255188, "learning_rate": 1.6775676428509574e-05, "loss": 0.0489, "step": 1500 }, { "epoch": 2.85, "eval_blimp_filtered_avg": 0.7368656716417911, "eval_blimp_filtered_std": 0.004867931612199695, "step": 1500 }, { "epoch": 2.85, "eval_blimp_supplement_avg": 0.7974137931034483, "eval_blimp_supplement_std": 0.017522854855969057, "step": 1500 }, { "epoch": 2.85, "eval_vqa_filtered_avg": 0.34, "eval_vqa_filtered_std": 0.04760952285695235, "step": 1500 }, { "epoch": 2.85, "eval_winoground_filtered_avg": 0.54, "eval_winoground_filtered_std": 0.05009082659620333, "step": 1500 }, { "epoch": 2.85, "grad_norm": 1.6103599071502686, "learning_rate": 1.6771146494370812e-05, "loss": 0.0375, "step": 1501 }, { "epoch": 2.86, "grad_norm": 2.4732515811920166, "learning_rate": 1.6766613992905968e-05, "loss": 0.0346, "step": 1502 }, { "epoch": 2.86, "grad_norm": 1.8517084121704102, "learning_rate": 1.676207892583357e-05, "loss": 0.0462, "step": 1503 }, { "epoch": 2.86, "grad_norm": 1.8187564611434937, "learning_rate": 1.6757541294873117e-05, "loss": 0.0482, "step": 1504 }, { "epoch": 2.86, "grad_norm": 1.7923002243041992, "learning_rate": 1.675300110174509e-05, "loss": 0.0438, "step": 1505 }, { "epoch": 2.86, "grad_norm": 2.023491859436035, "learning_rate": 1.6748458348170924e-05, "loss": 0.0489, "step": 1506 }, { "epoch": 2.87, "grad_norm": 1.879847526550293, "learning_rate": 1.6743913035873044e-05, "loss": 0.0496, "step": 1507 }, { "epoch": 2.87, "grad_norm": 1.360902190208435, "learning_rate": 1.6739365166574828e-05, "loss": 0.0281, "step": 1508 }, { "epoch": 2.87, "grad_norm": 1.6439207792282104, "learning_rate": 1.6734814742000634e-05, "loss": 0.0275, "step": 1509 }, { "epoch": 2.87, "grad_norm": 1.4941701889038086, "learning_rate": 1.6730261763875794e-05, "loss": 0.0308, "step": 1510 }, { "epoch": 2.87, "grad_norm": 1.5217808485031128, "learning_rate": 1.6725706233926588e-05, "loss": 0.0317, "step": 1511 }, { "epoch": 2.87, "grad_norm": 1.5765337944030762, "learning_rate": 1.6721148153880285e-05, "loss": 0.0369, "step": 1512 }, { "epoch": 2.88, "grad_norm": 2.591670274734497, "learning_rate": 1.6716587525465108e-05, "loss": 0.0637, "step": 1513 }, { "epoch": 2.88, "grad_norm": 1.5750192403793335, "learning_rate": 1.6712024350410253e-05, "loss": 0.0358, "step": 1514 }, { "epoch": 2.88, "grad_norm": 1.6690224409103394, "learning_rate": 1.6707458630445878e-05, "loss": 0.0335, "step": 1515 }, { "epoch": 2.88, "grad_norm": 1.8035495281219482, "learning_rate": 1.6702890367303103e-05, "loss": 0.0448, "step": 1516 }, { "epoch": 2.88, "grad_norm": 2.0410122871398926, "learning_rate": 1.6698319562714017e-05, "loss": 0.0484, "step": 1517 }, { "epoch": 2.89, "grad_norm": 1.9152569770812988, "learning_rate": 1.669374621841168e-05, "loss": 0.0389, "step": 1518 }, { "epoch": 2.89, "grad_norm": 1.928487777709961, "learning_rate": 1.668917033613009e-05, "loss": 0.05, "step": 1519 }, { "epoch": 2.89, "grad_norm": 1.7640846967697144, "learning_rate": 1.668459191760424e-05, "loss": 0.0446, "step": 1520 }, { "epoch": 2.89, "grad_norm": 3.392484664916992, "learning_rate": 1.6680010964570058e-05, "loss": 0.0574, "step": 1521 }, { "epoch": 2.89, "grad_norm": 1.7753551006317139, "learning_rate": 1.667542747876445e-05, "loss": 0.0495, "step": 1522 }, { "epoch": 2.9, "grad_norm": 1.6937929391860962, "learning_rate": 1.6670841461925273e-05, "loss": 0.0416, "step": 1523 }, { "epoch": 2.9, "grad_norm": 1.8437535762786865, "learning_rate": 1.6666252915791347e-05, "loss": 0.0405, "step": 1524 }, { "epoch": 2.9, "grad_norm": 1.2151983976364136, "learning_rate": 1.6661661842102446e-05, "loss": 0.0193, "step": 1525 }, { "epoch": 2.9, "grad_norm": 1.3780099153518677, "learning_rate": 1.6657068242599312e-05, "loss": 0.025, "step": 1526 }, { "epoch": 2.9, "grad_norm": 1.614394187927246, "learning_rate": 1.6652472119023637e-05, "loss": 0.0433, "step": 1527 }, { "epoch": 2.9, "grad_norm": 1.6994349956512451, "learning_rate": 1.6647873473118076e-05, "loss": 0.0457, "step": 1528 }, { "epoch": 2.91, "grad_norm": 1.5600141286849976, "learning_rate": 1.664327230662623e-05, "loss": 0.0396, "step": 1529 }, { "epoch": 2.91, "grad_norm": 2.22792387008667, "learning_rate": 1.663866862129267e-05, "loss": 0.0752, "step": 1530 }, { "epoch": 2.91, "grad_norm": 1.7961091995239258, "learning_rate": 1.663406241886291e-05, "loss": 0.043, "step": 1531 }, { "epoch": 2.91, "grad_norm": 1.367488980293274, "learning_rate": 1.662945370108342e-05, "loss": 0.0329, "step": 1532 }, { "epoch": 2.91, "grad_norm": 1.9288681745529175, "learning_rate": 1.662484246970163e-05, "loss": 0.0524, "step": 1533 }, { "epoch": 2.92, "grad_norm": 1.9872825145721436, "learning_rate": 1.6620228726465922e-05, "loss": 0.0515, "step": 1534 }, { "epoch": 2.92, "grad_norm": 2.2214250564575195, "learning_rate": 1.6615612473125628e-05, "loss": 0.0589, "step": 1535 }, { "epoch": 2.92, "grad_norm": 1.8163784742355347, "learning_rate": 1.661099371143103e-05, "loss": 0.0535, "step": 1536 }, { "epoch": 2.92, "grad_norm": 3.0090525150299072, "learning_rate": 1.660637244313336e-05, "loss": 0.0608, "step": 1537 }, { "epoch": 2.92, "grad_norm": 1.5075860023498535, "learning_rate": 1.6601748669984808e-05, "loss": 0.03, "step": 1538 }, { "epoch": 2.93, "grad_norm": 2.239757776260376, "learning_rate": 1.6597122393738506e-05, "loss": 0.0497, "step": 1539 }, { "epoch": 2.93, "grad_norm": 1.8114534616470337, "learning_rate": 1.6592493616148535e-05, "loss": 0.045, "step": 1540 }, { "epoch": 2.93, "grad_norm": 1.769778847694397, "learning_rate": 1.6587862338969934e-05, "loss": 0.0493, "step": 1541 }, { "epoch": 2.93, "grad_norm": 2.3367068767547607, "learning_rate": 1.6583228563958677e-05, "loss": 0.0476, "step": 1542 }, { "epoch": 2.93, "grad_norm": 1.3508057594299316, "learning_rate": 1.6578592292871696e-05, "loss": 0.0331, "step": 1543 }, { "epoch": 2.94, "grad_norm": 1.9670943021774292, "learning_rate": 1.6573953527466866e-05, "loss": 0.0611, "step": 1544 }, { "epoch": 2.94, "grad_norm": 2.212846279144287, "learning_rate": 1.6569312269503e-05, "loss": 0.0518, "step": 1545 }, { "epoch": 2.94, "grad_norm": 1.986210823059082, "learning_rate": 1.6564668520739867e-05, "loss": 0.0373, "step": 1546 }, { "epoch": 2.94, "grad_norm": 1.4447108507156372, "learning_rate": 1.6560022282938174e-05, "loss": 0.0393, "step": 1547 }, { "epoch": 2.94, "grad_norm": 1.8267812728881836, "learning_rate": 1.6555373557859572e-05, "loss": 0.0465, "step": 1548 }, { "epoch": 2.94, "grad_norm": 2.0429108142852783, "learning_rate": 1.6550722347266663e-05, "loss": 0.0508, "step": 1549 }, { "epoch": 2.95, "grad_norm": 1.5761868953704834, "learning_rate": 1.6546068652922977e-05, "loss": 0.0421, "step": 1550 }, { "epoch": 2.95, "grad_norm": 1.5395371913909912, "learning_rate": 1.6541412476592998e-05, "loss": 0.0467, "step": 1551 }, { "epoch": 2.95, "grad_norm": 2.0363876819610596, "learning_rate": 1.653675382004215e-05, "loss": 0.0567, "step": 1552 }, { "epoch": 2.95, "grad_norm": 1.3053922653198242, "learning_rate": 1.6532092685036783e-05, "loss": 0.0242, "step": 1553 }, { "epoch": 2.95, "grad_norm": 1.3001224994659424, "learning_rate": 1.652742907334421e-05, "loss": 0.0282, "step": 1554 }, { "epoch": 2.96, "grad_norm": 1.5846953392028809, "learning_rate": 1.6522762986732665e-05, "loss": 0.0468, "step": 1555 }, { "epoch": 2.96, "grad_norm": 2.587979793548584, "learning_rate": 1.651809442697133e-05, "loss": 0.046, "step": 1556 }, { "epoch": 2.96, "grad_norm": 1.5436335802078247, "learning_rate": 1.6513423395830317e-05, "loss": 0.0388, "step": 1557 }, { "epoch": 2.96, "grad_norm": 1.3006987571716309, "learning_rate": 1.650874989508068e-05, "loss": 0.0358, "step": 1558 }, { "epoch": 2.96, "grad_norm": 1.7882940769195557, "learning_rate": 1.6504073926494412e-05, "loss": 0.0251, "step": 1559 }, { "epoch": 2.97, "grad_norm": 1.7943528890609741, "learning_rate": 1.6499395491844432e-05, "loss": 0.0475, "step": 1560 }, { "epoch": 2.97, "grad_norm": 1.9520879983901978, "learning_rate": 1.6494714592904606e-05, "loss": 0.0484, "step": 1561 }, { "epoch": 2.97, "grad_norm": 1.504476547241211, "learning_rate": 1.6490031231449725e-05, "loss": 0.0451, "step": 1562 }, { "epoch": 2.97, "grad_norm": 1.9080092906951904, "learning_rate": 1.648534540925552e-05, "loss": 0.0482, "step": 1563 }, { "epoch": 2.97, "grad_norm": 1.3798726797103882, "learning_rate": 1.648065712809865e-05, "loss": 0.0327, "step": 1564 }, { "epoch": 2.98, "grad_norm": 1.8849060535430908, "learning_rate": 1.647596638975671e-05, "loss": 0.0468, "step": 1565 }, { "epoch": 2.98, "grad_norm": 1.1836392879486084, "learning_rate": 1.6471273196008224e-05, "loss": 0.0232, "step": 1566 }, { "epoch": 2.98, "grad_norm": 2.1543381214141846, "learning_rate": 1.646657754863265e-05, "loss": 0.0575, "step": 1567 }, { "epoch": 2.98, "grad_norm": 1.6395021677017212, "learning_rate": 1.646187944941037e-05, "loss": 0.0433, "step": 1568 }, { "epoch": 2.98, "grad_norm": 1.9673881530761719, "learning_rate": 1.6457178900122704e-05, "loss": 0.0432, "step": 1569 }, { "epoch": 2.98, "grad_norm": 1.9277725219726562, "learning_rate": 1.6452475902551902e-05, "loss": 0.0533, "step": 1570 }, { "epoch": 2.99, "grad_norm": 1.181236982345581, "learning_rate": 1.6447770458481126e-05, "loss": 0.0337, "step": 1571 }, { "epoch": 2.99, "grad_norm": 1.7804510593414307, "learning_rate": 1.6443062569694483e-05, "loss": 0.0351, "step": 1572 }, { "epoch": 2.99, "grad_norm": 2.4129676818847656, "learning_rate": 1.6438352237977007e-05, "loss": 0.0598, "step": 1573 }, { "epoch": 2.99, "grad_norm": 1.9156911373138428, "learning_rate": 1.643363946511464e-05, "loss": 0.0323, "step": 1574 }, { "epoch": 2.99, "grad_norm": 1.645728349685669, "learning_rate": 1.642892425289427e-05, "loss": 0.0367, "step": 1575 }, { "epoch": 3.0, "grad_norm": 2.573460578918457, "learning_rate": 1.6424206603103702e-05, "loss": 0.0553, "step": 1576 }, { "epoch": 3.0, "grad_norm": 2.2048609256744385, "learning_rate": 1.6419486517531658e-05, "loss": 0.0436, "step": 1577 }, { "epoch": 3.0, "grad_norm": 1.5446937084197998, "learning_rate": 1.6414763997967794e-05, "loss": 0.0339, "step": 1578 }, { "epoch": 3.0, "grad_norm": 0.6709340214729309, "learning_rate": 1.641003904620269e-05, "loss": 0.0119, "step": 1579 }, { "epoch": 3.0, "grad_norm": 1.3252534866333008, "learning_rate": 1.6405311664027837e-05, "loss": 0.0362, "step": 1580 }, { "epoch": 3.01, "grad_norm": 1.5059958696365356, "learning_rate": 1.6400581853235663e-05, "loss": 0.0355, "step": 1581 }, { "epoch": 3.01, "grad_norm": 1.25771164894104, "learning_rate": 1.6395849615619494e-05, "loss": 0.0211, "step": 1582 }, { "epoch": 3.01, "grad_norm": 1.713769555091858, "learning_rate": 1.6391114952973602e-05, "loss": 0.0516, "step": 1583 }, { "epoch": 3.01, "grad_norm": 1.1800352334976196, "learning_rate": 1.6386377867093157e-05, "loss": 0.0261, "step": 1584 }, { "epoch": 3.01, "grad_norm": 1.2270004749298096, "learning_rate": 1.638163835977427e-05, "loss": 0.0295, "step": 1585 }, { "epoch": 3.02, "grad_norm": 1.0064992904663086, "learning_rate": 1.6376896432813942e-05, "loss": 0.0277, "step": 1586 }, { "epoch": 3.02, "grad_norm": 1.1297739744186401, "learning_rate": 1.637215208801012e-05, "loss": 0.0246, "step": 1587 }, { "epoch": 3.02, "grad_norm": 1.742272138595581, "learning_rate": 1.6367405327161643e-05, "loss": 0.0486, "step": 1588 }, { "epoch": 3.02, "grad_norm": 1.1579952239990234, "learning_rate": 1.6362656152068287e-05, "loss": 0.0239, "step": 1589 }, { "epoch": 3.02, "grad_norm": 1.5753610134124756, "learning_rate": 1.6357904564530728e-05, "loss": 0.0226, "step": 1590 }, { "epoch": 3.02, "grad_norm": 1.2742767333984375, "learning_rate": 1.6353150566350563e-05, "loss": 0.028, "step": 1591 }, { "epoch": 3.03, "grad_norm": 1.4276915788650513, "learning_rate": 1.6348394159330307e-05, "loss": 0.0357, "step": 1592 }, { "epoch": 3.03, "grad_norm": 1.528022289276123, "learning_rate": 1.634363534527338e-05, "loss": 0.0318, "step": 1593 }, { "epoch": 3.03, "grad_norm": 1.370553970336914, "learning_rate": 1.6338874125984122e-05, "loss": 0.0229, "step": 1594 }, { "epoch": 3.03, "grad_norm": 1.3544758558273315, "learning_rate": 1.6334110503267777e-05, "loss": 0.0169, "step": 1595 }, { "epoch": 3.03, "grad_norm": 1.6991629600524902, "learning_rate": 1.6329344478930507e-05, "loss": 0.0324, "step": 1596 }, { "epoch": 3.04, "grad_norm": 1.3641507625579834, "learning_rate": 1.6324576054779386e-05, "loss": 0.0253, "step": 1597 }, { "epoch": 3.04, "grad_norm": 2.3353819847106934, "learning_rate": 1.631980523262239e-05, "loss": 0.032, "step": 1598 }, { "epoch": 3.04, "grad_norm": 1.6720738410949707, "learning_rate": 1.6315032014268412e-05, "loss": 0.0385, "step": 1599 }, { "epoch": 3.04, "grad_norm": 1.1255733966827393, "learning_rate": 1.6310256401527246e-05, "loss": 0.0236, "step": 1600 }, { "epoch": 3.04, "eval_blimp_filtered_avg": 0.7276119402985075, "eval_blimp_filtered_std": 0.0049030280425692, "step": 1600 }, { "epoch": 3.04, "eval_blimp_supplement_avg": 0.7823275862068966, "eval_blimp_supplement_std": 0.017938041848358673, "step": 1600 }, { "epoch": 3.04, "eval_vqa_filtered_avg": 0.34, "eval_vqa_filtered_std": 0.04760952285695236, "step": 1600 }, { "epoch": 3.04, "eval_winoground_filtered_avg": 0.53, "eval_winoground_filtered_std": 0.0501613558046592, "step": 1600 }, { "epoch": 3.04, "grad_norm": 1.7692023515701294, "learning_rate": 1.6305478396209597e-05, "loss": 0.0295, "step": 1601 }, { "epoch": 3.05, "grad_norm": 1.7927953004837036, "learning_rate": 1.630069800012708e-05, "loss": 0.0306, "step": 1602 }, { "epoch": 3.05, "grad_norm": 1.322614312171936, "learning_rate": 1.6295915215092218e-05, "loss": 0.0185, "step": 1603 }, { "epoch": 3.05, "grad_norm": 1.4467414617538452, "learning_rate": 1.6291130042918434e-05, "loss": 0.0263, "step": 1604 }, { "epoch": 3.05, "grad_norm": 1.027329683303833, "learning_rate": 1.6286342485420056e-05, "loss": 0.0144, "step": 1605 }, { "epoch": 3.05, "grad_norm": 1.2320804595947266, "learning_rate": 1.6281552544412323e-05, "loss": 0.023, "step": 1606 }, { "epoch": 3.06, "grad_norm": 1.500370740890503, "learning_rate": 1.627676022171137e-05, "loss": 0.0247, "step": 1607 }, { "epoch": 3.06, "grad_norm": 1.6739881038665771, "learning_rate": 1.6271965519134242e-05, "loss": 0.041, "step": 1608 }, { "epoch": 3.06, "grad_norm": 1.7388907670974731, "learning_rate": 1.626716843849888e-05, "loss": 0.0288, "step": 1609 }, { "epoch": 3.06, "grad_norm": 1.429703712463379, "learning_rate": 1.626236898162413e-05, "loss": 0.0316, "step": 1610 }, { "epoch": 3.06, "grad_norm": 1.401723027229309, "learning_rate": 1.6257567150329742e-05, "loss": 0.0174, "step": 1611 }, { "epoch": 3.06, "grad_norm": 1.3024379014968872, "learning_rate": 1.6252762946436357e-05, "loss": 0.0307, "step": 1612 }, { "epoch": 3.07, "grad_norm": 2.168133497238159, "learning_rate": 1.6247956371765526e-05, "loss": 0.022, "step": 1613 }, { "epoch": 3.07, "grad_norm": 1.4228801727294922, "learning_rate": 1.6243147428139697e-05, "loss": 0.0293, "step": 1614 }, { "epoch": 3.07, "grad_norm": 1.4971542358398438, "learning_rate": 1.6238336117382204e-05, "loss": 0.0273, "step": 1615 }, { "epoch": 3.07, "grad_norm": 2.198575973510742, "learning_rate": 1.6233522441317298e-05, "loss": 0.0702, "step": 1616 }, { "epoch": 3.07, "grad_norm": 1.0419461727142334, "learning_rate": 1.6228706401770108e-05, "loss": 0.0143, "step": 1617 }, { "epoch": 3.08, "grad_norm": 1.3767184019088745, "learning_rate": 1.6223888000566678e-05, "loss": 0.018, "step": 1618 }, { "epoch": 3.08, "grad_norm": 1.7313289642333984, "learning_rate": 1.621906723953393e-05, "loss": 0.029, "step": 1619 }, { "epoch": 3.08, "grad_norm": 0.7428654432296753, "learning_rate": 1.621424412049969e-05, "loss": 0.0107, "step": 1620 }, { "epoch": 3.08, "grad_norm": 1.387147307395935, "learning_rate": 1.6209418645292674e-05, "loss": 0.0254, "step": 1621 }, { "epoch": 3.08, "grad_norm": 1.2600364685058594, "learning_rate": 1.6204590815742503e-05, "loss": 0.0291, "step": 1622 }, { "epoch": 3.09, "grad_norm": 1.0964314937591553, "learning_rate": 1.6199760633679667e-05, "loss": 0.0181, "step": 1623 }, { "epoch": 3.09, "grad_norm": 1.7824574708938599, "learning_rate": 1.6194928100935576e-05, "loss": 0.0423, "step": 1624 }, { "epoch": 3.09, "grad_norm": 1.8771440982818604, "learning_rate": 1.6190093219342512e-05, "loss": 0.0426, "step": 1625 }, { "epoch": 3.09, "grad_norm": 0.860299289226532, "learning_rate": 1.6185255990733648e-05, "loss": 0.0196, "step": 1626 }, { "epoch": 3.09, "grad_norm": 1.232007622718811, "learning_rate": 1.6180416416943056e-05, "loss": 0.0175, "step": 1627 }, { "epoch": 3.1, "grad_norm": 1.4918949604034424, "learning_rate": 1.61755744998057e-05, "loss": 0.0214, "step": 1628 }, { "epoch": 3.1, "grad_norm": 1.5385735034942627, "learning_rate": 1.6170730241157415e-05, "loss": 0.025, "step": 1629 }, { "epoch": 3.1, "grad_norm": 1.0312505960464478, "learning_rate": 1.6165883642834938e-05, "loss": 0.015, "step": 1630 }, { "epoch": 3.1, "grad_norm": 1.3214082717895508, "learning_rate": 1.6161034706675894e-05, "loss": 0.0246, "step": 1631 }, { "epoch": 3.1, "grad_norm": 1.254702091217041, "learning_rate": 1.615618343451878e-05, "loss": 0.0192, "step": 1632 }, { "epoch": 3.1, "grad_norm": 1.7042814493179321, "learning_rate": 1.6151329828203003e-05, "loss": 0.0366, "step": 1633 }, { "epoch": 3.11, "grad_norm": 1.1157753467559814, "learning_rate": 1.614647388956883e-05, "loss": 0.024, "step": 1634 }, { "epoch": 3.11, "grad_norm": 1.9644254446029663, "learning_rate": 1.6141615620457423e-05, "loss": 0.032, "step": 1635 }, { "epoch": 3.11, "grad_norm": 1.6441493034362793, "learning_rate": 1.6136755022710835e-05, "loss": 0.0296, "step": 1636 }, { "epoch": 3.11, "grad_norm": 1.6015225648880005, "learning_rate": 1.6131892098171988e-05, "loss": 0.0346, "step": 1637 }, { "epoch": 3.11, "grad_norm": 1.4264566898345947, "learning_rate": 1.6127026848684696e-05, "loss": 0.023, "step": 1638 }, { "epoch": 3.12, "grad_norm": 1.313141107559204, "learning_rate": 1.612215927609365e-05, "loss": 0.023, "step": 1639 }, { "epoch": 3.12, "grad_norm": 1.0375596284866333, "learning_rate": 1.6117289382244423e-05, "loss": 0.0247, "step": 1640 }, { "epoch": 3.12, "grad_norm": 1.4555021524429321, "learning_rate": 1.611241716898347e-05, "loss": 0.026, "step": 1641 }, { "epoch": 3.12, "grad_norm": 0.9433594942092896, "learning_rate": 1.6107542638158122e-05, "loss": 0.0205, "step": 1642 }, { "epoch": 3.12, "grad_norm": 1.4232335090637207, "learning_rate": 1.610266579161659e-05, "loss": 0.0181, "step": 1643 }, { "epoch": 3.13, "grad_norm": 1.5441125631332397, "learning_rate": 1.6097786631207965e-05, "loss": 0.0204, "step": 1644 }, { "epoch": 3.13, "grad_norm": 1.6251298189163208, "learning_rate": 1.6092905158782215e-05, "loss": 0.0125, "step": 1645 }, { "epoch": 3.13, "grad_norm": 1.2997840642929077, "learning_rate": 1.6088021376190178e-05, "loss": 0.0254, "step": 1646 }, { "epoch": 3.13, "grad_norm": 1.7595272064208984, "learning_rate": 1.6083135285283578e-05, "loss": 0.0221, "step": 1647 }, { "epoch": 3.13, "grad_norm": 1.4571518898010254, "learning_rate": 1.6078246887915006e-05, "loss": 0.0216, "step": 1648 }, { "epoch": 3.13, "grad_norm": 1.8976833820343018, "learning_rate": 1.6073356185937936e-05, "loss": 0.0474, "step": 1649 }, { "epoch": 3.14, "grad_norm": 0.9777296781539917, "learning_rate": 1.606846318120671e-05, "loss": 0.0234, "step": 1650 }, { "epoch": 3.14, "grad_norm": 1.218605875968933, "learning_rate": 1.6063567875576537e-05, "loss": 0.0158, "step": 1651 }, { "epoch": 3.14, "grad_norm": 1.949329137802124, "learning_rate": 1.6058670270903512e-05, "loss": 0.0542, "step": 1652 }, { "epoch": 3.14, "grad_norm": 2.7997021675109863, "learning_rate": 1.6053770369044595e-05, "loss": 0.0243, "step": 1653 }, { "epoch": 3.14, "grad_norm": 1.4682564735412598, "learning_rate": 1.6048868171857612e-05, "loss": 0.0287, "step": 1654 }, { "epoch": 3.15, "grad_norm": 1.0287057161331177, "learning_rate": 1.604396368120127e-05, "loss": 0.0282, "step": 1655 }, { "epoch": 3.15, "grad_norm": 1.4592174291610718, "learning_rate": 1.603905689893513e-05, "loss": 0.0266, "step": 1656 }, { "epoch": 3.15, "grad_norm": 0.8489573001861572, "learning_rate": 1.6034147826919647e-05, "loss": 0.0126, "step": 1657 }, { "epoch": 3.15, "grad_norm": 1.3222793340682983, "learning_rate": 1.602923646701612e-05, "loss": 0.0156, "step": 1658 }, { "epoch": 3.15, "grad_norm": 1.1242907047271729, "learning_rate": 1.6024322821086724e-05, "loss": 0.0292, "step": 1659 }, { "epoch": 3.16, "grad_norm": 1.5427563190460205, "learning_rate": 1.6019406890994504e-05, "loss": 0.0278, "step": 1660 }, { "epoch": 3.16, "grad_norm": 1.4716800451278687, "learning_rate": 1.6014488678603367e-05, "loss": 0.0184, "step": 1661 }, { "epoch": 3.16, "grad_norm": 1.8603131771087646, "learning_rate": 1.6009568185778085e-05, "loss": 0.0272, "step": 1662 }, { "epoch": 3.16, "grad_norm": 1.4376370906829834, "learning_rate": 1.6004645414384295e-05, "loss": 0.0287, "step": 1663 }, { "epoch": 3.16, "grad_norm": 1.2555499076843262, "learning_rate": 1.5999720366288502e-05, "loss": 0.0287, "step": 1664 }, { "epoch": 3.17, "grad_norm": 1.1069493293762207, "learning_rate": 1.5994793043358074e-05, "loss": 0.023, "step": 1665 }, { "epoch": 3.17, "grad_norm": 1.610357403755188, "learning_rate": 1.5989863447461234e-05, "loss": 0.0304, "step": 1666 }, { "epoch": 3.17, "grad_norm": 1.0859414339065552, "learning_rate": 1.5984931580467072e-05, "loss": 0.019, "step": 1667 }, { "epoch": 3.17, "grad_norm": 1.5532410144805908, "learning_rate": 1.5979997444245543e-05, "loss": 0.0316, "step": 1668 }, { "epoch": 3.17, "grad_norm": 1.0747636556625366, "learning_rate": 1.5975061040667453e-05, "loss": 0.0392, "step": 1669 }, { "epoch": 3.17, "grad_norm": 1.7179269790649414, "learning_rate": 1.597012237160448e-05, "loss": 0.0495, "step": 1670 }, { "epoch": 3.18, "grad_norm": 1.6711649894714355, "learning_rate": 1.596518143892915e-05, "loss": 0.0184, "step": 1671 }, { "epoch": 3.18, "grad_norm": 1.6358249187469482, "learning_rate": 1.596023824451484e-05, "loss": 0.0337, "step": 1672 }, { "epoch": 3.18, "grad_norm": 1.141688346862793, "learning_rate": 1.5955292790235815e-05, "loss": 0.0132, "step": 1673 }, { "epoch": 3.18, "grad_norm": 1.4406706094741821, "learning_rate": 1.5950345077967165e-05, "loss": 0.0267, "step": 1674 }, { "epoch": 3.18, "grad_norm": 1.2822673320770264, "learning_rate": 1.5945395109584855e-05, "loss": 0.0256, "step": 1675 }, { "epoch": 3.19, "grad_norm": 2.2495224475860596, "learning_rate": 1.5940442886965694e-05, "loss": 0.0395, "step": 1676 }, { "epoch": 3.19, "grad_norm": 1.3163946866989136, "learning_rate": 1.5935488411987353e-05, "loss": 0.025, "step": 1677 }, { "epoch": 3.19, "grad_norm": 1.5015023946762085, "learning_rate": 1.5930531686528356e-05, "loss": 0.0335, "step": 1678 }, { "epoch": 3.19, "grad_norm": 1.5714977979660034, "learning_rate": 1.5925572712468074e-05, "loss": 0.034, "step": 1679 }, { "epoch": 3.19, "grad_norm": 1.2441788911819458, "learning_rate": 1.5920611491686747e-05, "loss": 0.0284, "step": 1680 }, { "epoch": 3.2, "grad_norm": 3.540977716445923, "learning_rate": 1.5915648026065442e-05, "loss": 0.0561, "step": 1681 }, { "epoch": 3.2, "grad_norm": 1.4771134853363037, "learning_rate": 1.5910682317486096e-05, "loss": 0.0215, "step": 1682 }, { "epoch": 3.2, "grad_norm": 1.3452155590057373, "learning_rate": 1.5905714367831495e-05, "loss": 0.0366, "step": 1683 }, { "epoch": 3.2, "grad_norm": 1.2059392929077148, "learning_rate": 1.5900744178985267e-05, "loss": 0.0148, "step": 1684 }, { "epoch": 3.2, "grad_norm": 1.5215613842010498, "learning_rate": 1.589577175283189e-05, "loss": 0.0152, "step": 1685 }, { "epoch": 3.21, "grad_norm": 1.2857911586761475, "learning_rate": 1.58907970912567e-05, "loss": 0.0266, "step": 1686 }, { "epoch": 3.21, "grad_norm": 1.4333269596099854, "learning_rate": 1.5885820196145865e-05, "loss": 0.0253, "step": 1687 }, { "epoch": 3.21, "grad_norm": 1.4853439331054688, "learning_rate": 1.588084106938642e-05, "loss": 0.021, "step": 1688 }, { "epoch": 3.21, "grad_norm": 1.496726632118225, "learning_rate": 1.5875859712866224e-05, "loss": 0.0229, "step": 1689 }, { "epoch": 3.21, "grad_norm": 1.0049349069595337, "learning_rate": 1.5870876128473995e-05, "loss": 0.0196, "step": 1690 }, { "epoch": 3.21, "grad_norm": 1.294420599937439, "learning_rate": 1.5865890318099296e-05, "loss": 0.0252, "step": 1691 }, { "epoch": 3.22, "grad_norm": 1.1148287057876587, "learning_rate": 1.5860902283632528e-05, "loss": 0.0324, "step": 1692 }, { "epoch": 3.22, "grad_norm": 1.8307185173034668, "learning_rate": 1.585591202696494e-05, "loss": 0.0333, "step": 1693 }, { "epoch": 3.22, "grad_norm": 1.5953941345214844, "learning_rate": 1.5850919549988623e-05, "loss": 0.0337, "step": 1694 }, { "epoch": 3.22, "grad_norm": 1.8430829048156738, "learning_rate": 1.58459248545965e-05, "loss": 0.0255, "step": 1695 }, { "epoch": 3.22, "grad_norm": 1.5012850761413574, "learning_rate": 1.584092794268235e-05, "loss": 0.0244, "step": 1696 }, { "epoch": 3.23, "grad_norm": 1.1381056308746338, "learning_rate": 1.583592881614079e-05, "loss": 0.0262, "step": 1697 }, { "epoch": 3.23, "grad_norm": 1.7109826803207397, "learning_rate": 1.5830927476867266e-05, "loss": 0.021, "step": 1698 }, { "epoch": 3.23, "grad_norm": 2.803757667541504, "learning_rate": 1.582592392675807e-05, "loss": 0.0271, "step": 1699 }, { "epoch": 3.23, "grad_norm": 1.3511645793914795, "learning_rate": 1.5820918167710337e-05, "loss": 0.0224, "step": 1700 }, { "epoch": 3.23, "eval_blimp_filtered_avg": 0.7301492537313433, "eval_blimp_filtered_std": 0.00491112155860563, "step": 1700 }, { "epoch": 3.23, "eval_blimp_supplement_avg": 0.7866379310344828, "eval_blimp_supplement_std": 0.017803304802653252, "step": 1700 }, { "epoch": 3.23, "eval_vqa_filtered_avg": 0.36, "eval_vqa_filtered_std": 0.04824181513244218, "step": 1700 }, { "epoch": 3.23, "eval_winoground_filtered_avg": 0.55, "eval_winoground_filtered_std": 0.05, "step": 1700 }, { "epoch": 3.23, "grad_norm": 1.6934709548950195, "learning_rate": 1.581591020162203e-05, "loss": 0.0205, "step": 1701 }, { "epoch": 3.24, "grad_norm": 1.259806752204895, "learning_rate": 1.5810900030391953e-05, "loss": 0.0252, "step": 1702 }, { "epoch": 3.24, "grad_norm": 0.8046837449073792, "learning_rate": 1.5805887655919748e-05, "loss": 0.0228, "step": 1703 }, { "epoch": 3.24, "grad_norm": 1.5544774532318115, "learning_rate": 1.5800873080105894e-05, "loss": 0.0218, "step": 1704 }, { "epoch": 3.24, "grad_norm": 1.2339173555374146, "learning_rate": 1.5795856304851696e-05, "loss": 0.0273, "step": 1705 }, { "epoch": 3.24, "grad_norm": 1.1239312887191772, "learning_rate": 1.57908373320593e-05, "loss": 0.022, "step": 1706 }, { "epoch": 3.25, "grad_norm": 1.4489978551864624, "learning_rate": 1.5785816163631686e-05, "loss": 0.0253, "step": 1707 }, { "epoch": 3.25, "grad_norm": 1.3494914770126343, "learning_rate": 1.578079280147266e-05, "loss": 0.0266, "step": 1708 }, { "epoch": 3.25, "grad_norm": 1.4359363317489624, "learning_rate": 1.5775767247486864e-05, "loss": 0.0262, "step": 1709 }, { "epoch": 3.25, "grad_norm": 1.670751929283142, "learning_rate": 1.577073950357978e-05, "loss": 0.0418, "step": 1710 }, { "epoch": 3.25, "grad_norm": 1.4074912071228027, "learning_rate": 1.57657095716577e-05, "loss": 0.0289, "step": 1711 }, { "epoch": 3.25, "grad_norm": 1.5289236307144165, "learning_rate": 1.5760677453627756e-05, "loss": 0.0272, "step": 1712 }, { "epoch": 3.26, "grad_norm": 1.4932886362075806, "learning_rate": 1.575564315139792e-05, "loss": 0.0212, "step": 1713 }, { "epoch": 3.26, "grad_norm": 1.2967259883880615, "learning_rate": 1.575060666687698e-05, "loss": 0.0269, "step": 1714 }, { "epoch": 3.26, "grad_norm": 1.0802960395812988, "learning_rate": 1.574556800197454e-05, "loss": 0.0177, "step": 1715 }, { "epoch": 3.26, "grad_norm": 0.6445685029029846, "learning_rate": 1.5740527158601055e-05, "loss": 0.014, "step": 1716 }, { "epoch": 3.26, "grad_norm": 1.3532171249389648, "learning_rate": 1.57354841386678e-05, "loss": 0.0279, "step": 1717 }, { "epoch": 3.27, "grad_norm": 1.2368031740188599, "learning_rate": 1.5730438944086858e-05, "loss": 0.0207, "step": 1718 }, { "epoch": 3.27, "grad_norm": 1.5832945108413696, "learning_rate": 1.5725391576771153e-05, "loss": 0.0276, "step": 1719 }, { "epoch": 3.27, "grad_norm": 1.339755892753601, "learning_rate": 1.572034203863443e-05, "loss": 0.0279, "step": 1720 }, { "epoch": 3.27, "grad_norm": 1.358864426612854, "learning_rate": 1.571529033159126e-05, "loss": 0.022, "step": 1721 }, { "epoch": 3.27, "grad_norm": 1.5937970876693726, "learning_rate": 1.5710236457557023e-05, "loss": 0.0228, "step": 1722 }, { "epoch": 3.28, "grad_norm": 1.2345134019851685, "learning_rate": 1.5705180418447935e-05, "loss": 0.0232, "step": 1723 }, { "epoch": 3.28, "grad_norm": 1.4654738903045654, "learning_rate": 1.570012221618103e-05, "loss": 0.022, "step": 1724 }, { "epoch": 3.28, "grad_norm": 1.0521446466445923, "learning_rate": 1.569506185267415e-05, "loss": 0.023, "step": 1725 }, { "epoch": 3.28, "grad_norm": 1.431315302848816, "learning_rate": 1.5689999329845974e-05, "loss": 0.0333, "step": 1726 }, { "epoch": 3.28, "grad_norm": 0.851016104221344, "learning_rate": 1.5684934649615987e-05, "loss": 0.0191, "step": 1727 }, { "epoch": 3.29, "grad_norm": 1.2962679862976074, "learning_rate": 1.5679867813904505e-05, "loss": 0.038, "step": 1728 }, { "epoch": 3.29, "grad_norm": 1.4401192665100098, "learning_rate": 1.5674798824632645e-05, "loss": 0.0279, "step": 1729 }, { "epoch": 3.29, "grad_norm": 1.717755913734436, "learning_rate": 1.5669727683722356e-05, "loss": 0.0354, "step": 1730 }, { "epoch": 3.29, "grad_norm": 1.560701608657837, "learning_rate": 1.5664654393096387e-05, "loss": 0.0286, "step": 1731 }, { "epoch": 3.29, "grad_norm": 1.6711983680725098, "learning_rate": 1.5659578954678314e-05, "loss": 0.0223, "step": 1732 }, { "epoch": 3.29, "grad_norm": 1.2025411128997803, "learning_rate": 1.5654501370392526e-05, "loss": 0.0185, "step": 1733 }, { "epoch": 3.3, "grad_norm": 1.4098929166793823, "learning_rate": 1.5649421642164223e-05, "loss": 0.025, "step": 1734 }, { "epoch": 3.3, "grad_norm": 1.2462745904922485, "learning_rate": 1.5644339771919416e-05, "loss": 0.0166, "step": 1735 }, { "epoch": 3.3, "grad_norm": 1.7657322883605957, "learning_rate": 1.563925576158493e-05, "loss": 0.0312, "step": 1736 }, { "epoch": 3.3, "grad_norm": 1.3763245344161987, "learning_rate": 1.5634169613088402e-05, "loss": 0.0199, "step": 1737 }, { "epoch": 3.3, "grad_norm": 1.5644985437393188, "learning_rate": 1.5629081328358286e-05, "loss": 0.0297, "step": 1738 }, { "epoch": 3.31, "grad_norm": 1.1160866022109985, "learning_rate": 1.5623990909323832e-05, "loss": 0.0217, "step": 1739 }, { "epoch": 3.31, "grad_norm": 1.396847128868103, "learning_rate": 1.5618898357915115e-05, "loss": 0.029, "step": 1740 }, { "epoch": 3.31, "grad_norm": 1.4812453985214233, "learning_rate": 1.5613803676063e-05, "loss": 0.0269, "step": 1741 }, { "epoch": 3.31, "grad_norm": 1.1533353328704834, "learning_rate": 1.560870686569918e-05, "loss": 0.0546, "step": 1742 }, { "epoch": 3.31, "grad_norm": 1.2137198448181152, "learning_rate": 1.5603607928756136e-05, "loss": 0.029, "step": 1743 }, { "epoch": 3.32, "grad_norm": 1.636121153831482, "learning_rate": 1.559850686716717e-05, "loss": 0.022, "step": 1744 }, { "epoch": 3.32, "grad_norm": 1.1376657485961914, "learning_rate": 1.559340368286639e-05, "loss": 0.0152, "step": 1745 }, { "epoch": 3.32, "grad_norm": 1.4871578216552734, "learning_rate": 1.5588298377788687e-05, "loss": 0.0189, "step": 1746 }, { "epoch": 3.32, "grad_norm": 1.3610031604766846, "learning_rate": 1.5583190953869785e-05, "loss": 0.0172, "step": 1747 }, { "epoch": 3.32, "grad_norm": 1.14969801902771, "learning_rate": 1.5578081413046195e-05, "loss": 0.0126, "step": 1748 }, { "epoch": 3.33, "grad_norm": 1.3248211145401, "learning_rate": 1.557296975725523e-05, "loss": 0.0328, "step": 1749 }, { "epoch": 3.33, "grad_norm": 1.0577207803726196, "learning_rate": 1.556785598843502e-05, "loss": 0.0187, "step": 1750 }, { "epoch": 3.33, "grad_norm": 2.406080961227417, "learning_rate": 1.5562740108524472e-05, "loss": 0.0332, "step": 1751 }, { "epoch": 3.33, "grad_norm": 2.116612434387207, "learning_rate": 1.5557622119463313e-05, "loss": 0.0399, "step": 1752 }, { "epoch": 3.33, "grad_norm": 1.6478185653686523, "learning_rate": 1.5552502023192063e-05, "loss": 0.0339, "step": 1753 }, { "epoch": 3.33, "grad_norm": 0.9994806051254272, "learning_rate": 1.5547379821652037e-05, "loss": 0.0223, "step": 1754 }, { "epoch": 3.34, "grad_norm": 1.1488239765167236, "learning_rate": 1.554225551678536e-05, "loss": 0.029, "step": 1755 }, { "epoch": 3.34, "grad_norm": 1.4422129392623901, "learning_rate": 1.5537129110534947e-05, "loss": 0.0285, "step": 1756 }, { "epoch": 3.34, "grad_norm": 1.181380033493042, "learning_rate": 1.55320006048445e-05, "loss": 0.0246, "step": 1757 }, { "epoch": 3.34, "grad_norm": 2.3222532272338867, "learning_rate": 1.5526870001658535e-05, "loss": 0.0425, "step": 1758 }, { "epoch": 3.34, "grad_norm": 1.8507418632507324, "learning_rate": 1.552173730292235e-05, "loss": 0.0287, "step": 1759 }, { "epoch": 3.35, "grad_norm": 1.7831445932388306, "learning_rate": 1.5516602510582044e-05, "loss": 0.0302, "step": 1760 }, { "epoch": 3.35, "grad_norm": 1.8509196043014526, "learning_rate": 1.551146562658451e-05, "loss": 0.0324, "step": 1761 }, { "epoch": 3.35, "grad_norm": 1.0314780473709106, "learning_rate": 1.5506326652877433e-05, "loss": 0.0151, "step": 1762 }, { "epoch": 3.35, "grad_norm": 0.9274294972419739, "learning_rate": 1.5501185591409285e-05, "loss": 0.014, "step": 1763 }, { "epoch": 3.35, "grad_norm": 1.34929358959198, "learning_rate": 1.549604244412934e-05, "loss": 0.0164, "step": 1764 }, { "epoch": 3.36, "grad_norm": 1.6678249835968018, "learning_rate": 1.5490897212987657e-05, "loss": 0.033, "step": 1765 }, { "epoch": 3.36, "grad_norm": 1.3100156784057617, "learning_rate": 1.548574989993508e-05, "loss": 0.0194, "step": 1766 }, { "epoch": 3.36, "grad_norm": 1.3372546434402466, "learning_rate": 1.5480600506923248e-05, "loss": 0.0185, "step": 1767 }, { "epoch": 3.36, "grad_norm": 0.9084145426750183, "learning_rate": 1.5475449035904597e-05, "loss": 0.0165, "step": 1768 }, { "epoch": 3.36, "grad_norm": 0.8685306906700134, "learning_rate": 1.5470295488832336e-05, "loss": 0.0165, "step": 1769 }, { "epoch": 3.37, "grad_norm": 1.239938497543335, "learning_rate": 1.5465139867660458e-05, "loss": 0.0184, "step": 1770 }, { "epoch": 3.37, "grad_norm": 1.3483777046203613, "learning_rate": 1.5459982174343767e-05, "loss": 0.0372, "step": 1771 }, { "epoch": 3.37, "grad_norm": 2.1052465438842773, "learning_rate": 1.5454822410837834e-05, "loss": 0.043, "step": 1772 }, { "epoch": 3.37, "grad_norm": 1.1698567867279053, "learning_rate": 1.5449660579099014e-05, "loss": 0.0309, "step": 1773 }, { "epoch": 3.37, "grad_norm": 1.9429227113723755, "learning_rate": 1.5444496681084448e-05, "loss": 0.0308, "step": 1774 }, { "epoch": 3.37, "grad_norm": 1.7246915102005005, "learning_rate": 1.5439330718752072e-05, "loss": 0.026, "step": 1775 }, { "epoch": 3.38, "grad_norm": 1.9642128944396973, "learning_rate": 1.543416269406059e-05, "loss": 0.028, "step": 1776 }, { "epoch": 3.38, "grad_norm": 0.8872991800308228, "learning_rate": 1.542899260896949e-05, "loss": 0.0111, "step": 1777 }, { "epoch": 3.38, "grad_norm": 1.6237640380859375, "learning_rate": 1.5423820465439052e-05, "loss": 0.0288, "step": 1778 }, { "epoch": 3.38, "grad_norm": 1.3261765241622925, "learning_rate": 1.5418646265430328e-05, "loss": 0.0235, "step": 1779 }, { "epoch": 3.38, "grad_norm": 1.4342628717422485, "learning_rate": 1.5413470010905148e-05, "loss": 0.0228, "step": 1780 }, { "epoch": 3.39, "grad_norm": 1.2709319591522217, "learning_rate": 1.5408291703826125e-05, "loss": 0.02, "step": 1781 }, { "epoch": 3.39, "grad_norm": 1.8124680519104004, "learning_rate": 1.5403111346156648e-05, "loss": 0.0373, "step": 1782 }, { "epoch": 3.39, "grad_norm": 1.2442424297332764, "learning_rate": 1.5397928939860887e-05, "loss": 0.0198, "step": 1783 }, { "epoch": 3.39, "grad_norm": 1.1361157894134521, "learning_rate": 1.5392744486903787e-05, "loss": 0.0157, "step": 1784 }, { "epoch": 3.39, "grad_norm": 0.9991012215614319, "learning_rate": 1.5387557989251067e-05, "loss": 0.0266, "step": 1785 }, { "epoch": 3.4, "grad_norm": 1.2301535606384277, "learning_rate": 1.5382369448869226e-05, "loss": 0.0212, "step": 1786 }, { "epoch": 3.4, "grad_norm": 1.5864934921264648, "learning_rate": 1.5377178867725527e-05, "loss": 0.0246, "step": 1787 }, { "epoch": 3.4, "grad_norm": 1.7715873718261719, "learning_rate": 1.5371986247788016e-05, "loss": 0.0401, "step": 1788 }, { "epoch": 3.4, "grad_norm": 2.1800613403320312, "learning_rate": 1.536679159102552e-05, "loss": 0.029, "step": 1789 }, { "epoch": 3.4, "grad_norm": 1.5932927131652832, "learning_rate": 1.5361594899407616e-05, "loss": 0.0343, "step": 1790 }, { "epoch": 3.4, "grad_norm": 1.2349356412887573, "learning_rate": 1.5356396174904667e-05, "loss": 0.0202, "step": 1791 }, { "epoch": 3.41, "grad_norm": 1.7213616371154785, "learning_rate": 1.535119541948781e-05, "loss": 0.0329, "step": 1792 }, { "epoch": 3.41, "grad_norm": 1.0292502641677856, "learning_rate": 1.5345992635128943e-05, "loss": 0.0134, "step": 1793 }, { "epoch": 3.41, "grad_norm": 2.1175997257232666, "learning_rate": 1.534078782380074e-05, "loss": 0.031, "step": 1794 }, { "epoch": 3.41, "grad_norm": 1.1110278367996216, "learning_rate": 1.5335580987476632e-05, "loss": 0.0139, "step": 1795 }, { "epoch": 3.41, "grad_norm": 1.6885652542114258, "learning_rate": 1.5330372128130838e-05, "loss": 0.0311, "step": 1796 }, { "epoch": 3.42, "grad_norm": 2.207709789276123, "learning_rate": 1.5325161247738326e-05, "loss": 0.0285, "step": 1797 }, { "epoch": 3.42, "grad_norm": 1.5372333526611328, "learning_rate": 1.5319948348274835e-05, "loss": 0.0322, "step": 1798 }, { "epoch": 3.42, "grad_norm": 1.3722026348114014, "learning_rate": 1.531473343171688e-05, "loss": 0.0189, "step": 1799 }, { "epoch": 3.42, "grad_norm": 1.2735618352890015, "learning_rate": 1.5309516500041716e-05, "loss": 0.0312, "step": 1800 }, { "epoch": 3.42, "eval_blimp_filtered_avg": 0.7307462686567164, "eval_blimp_filtered_std": 0.004901811714947462, "step": 1800 }, { "epoch": 3.42, "eval_blimp_supplement_avg": 0.7887931034482759, "eval_blimp_supplement_std": 0.01764834511094836, "step": 1800 }, { "epoch": 3.42, "eval_vqa_filtered_avg": 0.38, "eval_vqa_filtered_std": 0.048783173121456316, "step": 1800 }, { "epoch": 3.42, "eval_winoground_filtered_avg": 0.52, "eval_winoground_filtered_std": 0.05021167315686779, "step": 1800 }, { "epoch": 3.42, "grad_norm": 1.8361809253692627, "learning_rate": 1.5304297555227393e-05, "loss": 0.0333, "step": 1801 }, { "epoch": 3.43, "grad_norm": 1.2599622011184692, "learning_rate": 1.52990765992527e-05, "loss": 0.0204, "step": 1802 }, { "epoch": 3.43, "grad_norm": 1.4097249507904053, "learning_rate": 1.5293853634097207e-05, "loss": 0.0144, "step": 1803 }, { "epoch": 3.43, "grad_norm": 1.4832966327667236, "learning_rate": 1.528862866174123e-05, "loss": 0.0226, "step": 1804 }, { "epoch": 3.43, "grad_norm": 1.1304261684417725, "learning_rate": 1.528340168416585e-05, "loss": 0.0186, "step": 1805 }, { "epoch": 3.43, "grad_norm": 1.7622685432434082, "learning_rate": 1.527817270335292e-05, "loss": 0.0397, "step": 1806 }, { "epoch": 3.44, "grad_norm": 1.4927997589111328, "learning_rate": 1.5272941721285034e-05, "loss": 0.047, "step": 1807 }, { "epoch": 3.44, "grad_norm": 1.377238154411316, "learning_rate": 1.5267708739945558e-05, "loss": 0.0309, "step": 1808 }, { "epoch": 3.44, "grad_norm": 1.0784552097320557, "learning_rate": 1.5262473761318618e-05, "loss": 0.0199, "step": 1809 }, { "epoch": 3.44, "grad_norm": 1.7291147708892822, "learning_rate": 1.5257236787389085e-05, "loss": 0.0441, "step": 1810 }, { "epoch": 3.44, "grad_norm": 1.8589739799499512, "learning_rate": 1.5251997820142595e-05, "loss": 0.0333, "step": 1811 }, { "epoch": 3.44, "grad_norm": 1.7911772727966309, "learning_rate": 1.5246756861565536e-05, "loss": 0.0337, "step": 1812 }, { "epoch": 3.45, "grad_norm": 0.743824303150177, "learning_rate": 1.5241513913645057e-05, "loss": 0.019, "step": 1813 }, { "epoch": 3.45, "grad_norm": 2.1684861183166504, "learning_rate": 1.5236268978369054e-05, "loss": 0.0357, "step": 1814 }, { "epoch": 3.45, "grad_norm": 1.655097484588623, "learning_rate": 1.5231022057726179e-05, "loss": 0.0418, "step": 1815 }, { "epoch": 3.45, "grad_norm": 1.3814141750335693, "learning_rate": 1.5225773153705841e-05, "loss": 0.023, "step": 1816 }, { "epoch": 3.45, "grad_norm": 1.9564968347549438, "learning_rate": 1.5220522268298194e-05, "loss": 0.0363, "step": 1817 }, { "epoch": 3.46, "grad_norm": 1.2546037435531616, "learning_rate": 1.5215269403494151e-05, "loss": 0.0296, "step": 1818 }, { "epoch": 3.46, "grad_norm": 1.794338583946228, "learning_rate": 1.5210014561285366e-05, "loss": 0.0295, "step": 1819 }, { "epoch": 3.46, "grad_norm": 1.6889896392822266, "learning_rate": 1.5204757743664253e-05, "loss": 0.0221, "step": 1820 }, { "epoch": 3.46, "grad_norm": 1.6819062232971191, "learning_rate": 1.5199498952623968e-05, "loss": 0.0402, "step": 1821 }, { "epoch": 3.46, "grad_norm": 1.1802375316619873, "learning_rate": 1.5194238190158417e-05, "loss": 0.0133, "step": 1822 }, { "epoch": 3.47, "grad_norm": 1.4171371459960938, "learning_rate": 1.5188975458262256e-05, "loss": 0.0208, "step": 1823 }, { "epoch": 3.47, "grad_norm": 1.8831627368927002, "learning_rate": 1.5183710758930881e-05, "loss": 0.0274, "step": 1824 }, { "epoch": 3.47, "grad_norm": 1.2374258041381836, "learning_rate": 1.517844409416044e-05, "loss": 0.0207, "step": 1825 }, { "epoch": 3.47, "grad_norm": 0.8370751142501831, "learning_rate": 1.5173175465947827e-05, "loss": 0.0081, "step": 1826 }, { "epoch": 3.47, "grad_norm": 1.5397295951843262, "learning_rate": 1.5167904876290678e-05, "loss": 0.0249, "step": 1827 }, { "epoch": 3.48, "grad_norm": 1.653146505355835, "learning_rate": 1.5162632327187367e-05, "loss": 0.071, "step": 1828 }, { "epoch": 3.48, "grad_norm": 1.1296186447143555, "learning_rate": 1.5157357820637023e-05, "loss": 0.0227, "step": 1829 }, { "epoch": 3.48, "grad_norm": 1.5944956541061401, "learning_rate": 1.515208135863951e-05, "loss": 0.0258, "step": 1830 }, { "epoch": 3.48, "grad_norm": 0.9351037740707397, "learning_rate": 1.5146802943195433e-05, "loss": 0.0106, "step": 1831 }, { "epoch": 3.48, "grad_norm": 1.4745427370071411, "learning_rate": 1.5141522576306138e-05, "loss": 0.0268, "step": 1832 }, { "epoch": 3.48, "grad_norm": 1.4338109493255615, "learning_rate": 1.5136240259973716e-05, "loss": 0.0309, "step": 1833 }, { "epoch": 3.49, "grad_norm": 1.2387864589691162, "learning_rate": 1.5130955996200989e-05, "loss": 0.0252, "step": 1834 }, { "epoch": 3.49, "grad_norm": 1.4157352447509766, "learning_rate": 1.5125669786991522e-05, "loss": 0.0412, "step": 1835 }, { "epoch": 3.49, "grad_norm": 1.9424235820770264, "learning_rate": 1.5120381634349616e-05, "loss": 0.0321, "step": 1836 }, { "epoch": 3.49, "grad_norm": 1.3445935249328613, "learning_rate": 1.5115091540280315e-05, "loss": 0.0216, "step": 1837 }, { "epoch": 3.49, "grad_norm": 1.7735623121261597, "learning_rate": 1.5109799506789392e-05, "loss": 0.0309, "step": 1838 }, { "epoch": 3.5, "grad_norm": 2.464395761489868, "learning_rate": 1.5104505535883349e-05, "loss": 0.0397, "step": 1839 }, { "epoch": 3.5, "grad_norm": 1.8700116872787476, "learning_rate": 1.5099209629569443e-05, "loss": 0.0288, "step": 1840 }, { "epoch": 3.5, "grad_norm": 0.9052969217300415, "learning_rate": 1.5093911789855646e-05, "loss": 0.0137, "step": 1841 }, { "epoch": 3.5, "grad_norm": 0.8794175982475281, "learning_rate": 1.508861201875067e-05, "loss": 0.0097, "step": 1842 }, { "epoch": 3.5, "grad_norm": 2.2426326274871826, "learning_rate": 1.5083310318263964e-05, "loss": 0.0399, "step": 1843 }, { "epoch": 3.51, "grad_norm": 1.6771459579467773, "learning_rate": 1.50780066904057e-05, "loss": 0.0274, "step": 1844 }, { "epoch": 3.51, "grad_norm": 1.3171972036361694, "learning_rate": 1.5072701137186784e-05, "loss": 0.0165, "step": 1845 }, { "epoch": 3.51, "grad_norm": 1.2750054597854614, "learning_rate": 1.5067393660618854e-05, "loss": 0.0254, "step": 1846 }, { "epoch": 3.51, "grad_norm": 1.8021330833435059, "learning_rate": 1.5062084262714276e-05, "loss": 0.0281, "step": 1847 }, { "epoch": 3.51, "grad_norm": 1.2297152280807495, "learning_rate": 1.505677294548614e-05, "loss": 0.0223, "step": 1848 }, { "epoch": 3.52, "grad_norm": 1.0643383264541626, "learning_rate": 1.505145971094827e-05, "loss": 0.0183, "step": 1849 }, { "epoch": 3.52, "grad_norm": 0.9411414861679077, "learning_rate": 1.5046144561115221e-05, "loss": 0.0278, "step": 1850 }, { "epoch": 3.52, "grad_norm": 1.3842238187789917, "learning_rate": 1.504082749800226e-05, "loss": 0.0204, "step": 1851 }, { "epoch": 3.52, "grad_norm": 1.776387333869934, "learning_rate": 1.503550852362539e-05, "loss": 0.0409, "step": 1852 }, { "epoch": 3.52, "grad_norm": 0.7278493642807007, "learning_rate": 1.5030187640001339e-05, "loss": 0.0184, "step": 1853 }, { "epoch": 3.52, "grad_norm": 1.618058443069458, "learning_rate": 1.5024864849147554e-05, "loss": 0.0339, "step": 1854 }, { "epoch": 3.53, "grad_norm": 1.109046220779419, "learning_rate": 1.5019540153082201e-05, "loss": 0.0189, "step": 1855 }, { "epoch": 3.53, "grad_norm": 1.517149806022644, "learning_rate": 1.5014213553824188e-05, "loss": 0.0302, "step": 1856 }, { "epoch": 3.53, "grad_norm": 1.4292638301849365, "learning_rate": 1.500888505339312e-05, "loss": 0.0335, "step": 1857 }, { "epoch": 3.53, "grad_norm": 2.1928372383117676, "learning_rate": 1.5003554653809342e-05, "loss": 0.0334, "step": 1858 }, { "epoch": 3.53, "grad_norm": 1.249976634979248, "learning_rate": 1.4998222357093903e-05, "loss": 0.0307, "step": 1859 }, { "epoch": 3.54, "grad_norm": 1.79392671585083, "learning_rate": 1.4992888165268585e-05, "loss": 0.0284, "step": 1860 }, { "epoch": 3.54, "grad_norm": 1.6729233264923096, "learning_rate": 1.498755208035588e-05, "loss": 0.0278, "step": 1861 }, { "epoch": 3.54, "grad_norm": 1.4600334167480469, "learning_rate": 1.4982214104379003e-05, "loss": 0.0174, "step": 1862 }, { "epoch": 3.54, "grad_norm": 1.534895658493042, "learning_rate": 1.4976874239361883e-05, "loss": 0.0216, "step": 1863 }, { "epoch": 3.54, "grad_norm": 2.062053918838501, "learning_rate": 1.4971532487329166e-05, "loss": 0.0287, "step": 1864 }, { "epoch": 3.55, "grad_norm": 0.9115514159202576, "learning_rate": 1.496618885030621e-05, "loss": 0.0182, "step": 1865 }, { "epoch": 3.55, "grad_norm": 1.381737470626831, "learning_rate": 1.4960843330319102e-05, "loss": 0.0201, "step": 1866 }, { "epoch": 3.55, "grad_norm": 1.160710334777832, "learning_rate": 1.4955495929394622e-05, "loss": 0.0372, "step": 1867 }, { "epoch": 3.55, "grad_norm": 1.3605152368545532, "learning_rate": 1.4950146649560275e-05, "loss": 0.0315, "step": 1868 }, { "epoch": 3.55, "grad_norm": 1.976434588432312, "learning_rate": 1.4944795492844276e-05, "loss": 0.0294, "step": 1869 }, { "epoch": 3.56, "grad_norm": 1.3304303884506226, "learning_rate": 1.4939442461275557e-05, "loss": 0.0146, "step": 1870 }, { "epoch": 3.56, "grad_norm": 2.055698871612549, "learning_rate": 1.4934087556883754e-05, "loss": 0.0533, "step": 1871 }, { "epoch": 3.56, "grad_norm": 1.2154542207717896, "learning_rate": 1.4928730781699213e-05, "loss": 0.0187, "step": 1872 }, { "epoch": 3.56, "grad_norm": 1.372741460800171, "learning_rate": 1.4923372137752994e-05, "loss": 0.0227, "step": 1873 }, { "epoch": 3.56, "grad_norm": 1.0407781600952148, "learning_rate": 1.491801162707686e-05, "loss": 0.0193, "step": 1874 }, { "epoch": 3.56, "grad_norm": 1.4871271848678589, "learning_rate": 1.4912649251703288e-05, "loss": 0.021, "step": 1875 }, { "epoch": 3.57, "grad_norm": 1.8638195991516113, "learning_rate": 1.490728501366546e-05, "loss": 0.0272, "step": 1876 }, { "epoch": 3.57, "grad_norm": 1.4049071073532104, "learning_rate": 1.4901918914997261e-05, "loss": 0.0385, "step": 1877 }, { "epoch": 3.57, "grad_norm": 1.0433604717254639, "learning_rate": 1.4896550957733285e-05, "loss": 0.0148, "step": 1878 }, { "epoch": 3.57, "grad_norm": 1.295287013053894, "learning_rate": 1.4891181143908826e-05, "loss": 0.0273, "step": 1879 }, { "epoch": 3.57, "grad_norm": 1.1305054426193237, "learning_rate": 1.4885809475559891e-05, "loss": 0.0132, "step": 1880 }, { "epoch": 3.58, "grad_norm": 2.1045875549316406, "learning_rate": 1.488043595472318e-05, "loss": 0.0333, "step": 1881 }, { "epoch": 3.58, "grad_norm": 1.8891915082931519, "learning_rate": 1.4875060583436102e-05, "loss": 0.0387, "step": 1882 }, { "epoch": 3.58, "grad_norm": 1.5524392127990723, "learning_rate": 1.4869683363736764e-05, "loss": 0.0497, "step": 1883 }, { "epoch": 3.58, "grad_norm": 1.5928623676300049, "learning_rate": 1.4864304297663976e-05, "loss": 0.0359, "step": 1884 }, { "epoch": 3.58, "grad_norm": 1.3174724578857422, "learning_rate": 1.4858923387257247e-05, "loss": 0.0228, "step": 1885 }, { "epoch": 3.59, "grad_norm": 1.404592752456665, "learning_rate": 1.4853540634556784e-05, "loss": 0.0227, "step": 1886 }, { "epoch": 3.59, "grad_norm": 1.355066180229187, "learning_rate": 1.4848156041603501e-05, "loss": 0.0281, "step": 1887 }, { "epoch": 3.59, "grad_norm": 1.4295587539672852, "learning_rate": 1.4842769610438991e-05, "loss": 0.0299, "step": 1888 }, { "epoch": 3.59, "grad_norm": 1.2507820129394531, "learning_rate": 1.4837381343105568e-05, "loss": 0.0226, "step": 1889 }, { "epoch": 3.59, "grad_norm": 1.3433654308319092, "learning_rate": 1.4831991241646226e-05, "loss": 0.02, "step": 1890 }, { "epoch": 3.6, "grad_norm": 1.6936955451965332, "learning_rate": 1.4826599308104655e-05, "loss": 0.0226, "step": 1891 }, { "epoch": 3.6, "grad_norm": 1.165428638458252, "learning_rate": 1.4821205544525245e-05, "loss": 0.0249, "step": 1892 }, { "epoch": 3.6, "grad_norm": 2.1911845207214355, "learning_rate": 1.4815809952953082e-05, "loss": 0.0446, "step": 1893 }, { "epoch": 3.6, "grad_norm": 1.3739080429077148, "learning_rate": 1.4810412535433935e-05, "loss": 0.0229, "step": 1894 }, { "epoch": 3.6, "grad_norm": 1.302950382232666, "learning_rate": 1.4805013294014274e-05, "loss": 0.0299, "step": 1895 }, { "epoch": 3.6, "grad_norm": 1.567146897315979, "learning_rate": 1.4799612230741257e-05, "loss": 0.0311, "step": 1896 }, { "epoch": 3.61, "grad_norm": 1.3951503038406372, "learning_rate": 1.479420934766274e-05, "loss": 0.0274, "step": 1897 }, { "epoch": 3.61, "grad_norm": 1.190510630607605, "learning_rate": 1.4788804646827251e-05, "loss": 0.0162, "step": 1898 }, { "epoch": 3.61, "grad_norm": 1.3660848140716553, "learning_rate": 1.4783398130284031e-05, "loss": 0.0177, "step": 1899 }, { "epoch": 3.61, "grad_norm": 1.0555355548858643, "learning_rate": 1.477798980008299e-05, "loss": 0.0186, "step": 1900 }, { "epoch": 3.61, "eval_blimp_filtered_avg": 0.7349253731343284, "eval_blimp_filtered_std": 0.004876498688918063, "step": 1900 }, { "epoch": 3.61, "eval_blimp_supplement_avg": 0.7974137931034483, "eval_blimp_supplement_std": 0.01754737248053469, "step": 1900 }, { "epoch": 3.61, "eval_vqa_filtered_avg": 0.35, "eval_vqa_filtered_std": 0.0479372485441102, "step": 1900 }, { "epoch": 3.61, "eval_winoground_filtered_avg": 0.56, "eval_winoground_filtered_std": 0.04988876515698589, "step": 1900 }, { "epoch": 3.61, "grad_norm": 1.4514813423156738, "learning_rate": 1.4772579658274732e-05, "loss": 0.0192, "step": 1901 }, { "epoch": 3.62, "grad_norm": 1.4593253135681152, "learning_rate": 1.4767167706910556e-05, "loss": 0.03, "step": 1902 }, { "epoch": 3.62, "grad_norm": 0.8981193900108337, "learning_rate": 1.4761753948042434e-05, "loss": 0.0187, "step": 1903 }, { "epoch": 3.62, "grad_norm": 1.5097615718841553, "learning_rate": 1.4756338383723034e-05, "loss": 0.0203, "step": 1904 }, { "epoch": 3.62, "grad_norm": 2.490546226501465, "learning_rate": 1.47509210160057e-05, "loss": 0.0385, "step": 1905 }, { "epoch": 3.62, "grad_norm": 2.17225980758667, "learning_rate": 1.4745501846944464e-05, "loss": 0.0421, "step": 1906 }, { "epoch": 3.63, "grad_norm": 1.45164155960083, "learning_rate": 1.4740080878594044e-05, "loss": 0.0275, "step": 1907 }, { "epoch": 3.63, "grad_norm": 1.1853697299957275, "learning_rate": 1.473465811300983e-05, "loss": 0.0177, "step": 1908 }, { "epoch": 3.63, "grad_norm": 1.6651930809020996, "learning_rate": 1.4729233552247907e-05, "loss": 0.0226, "step": 1909 }, { "epoch": 3.63, "grad_norm": 1.6476185321807861, "learning_rate": 1.4723807198365032e-05, "loss": 0.0241, "step": 1910 }, { "epoch": 3.63, "grad_norm": 1.8277767896652222, "learning_rate": 1.4718379053418643e-05, "loss": 0.0293, "step": 1911 }, { "epoch": 3.63, "grad_norm": 1.4282209873199463, "learning_rate": 1.471294911946686e-05, "loss": 0.017, "step": 1912 }, { "epoch": 3.64, "grad_norm": 1.5704602003097534, "learning_rate": 1.4707517398568478e-05, "loss": 0.0318, "step": 1913 }, { "epoch": 3.64, "grad_norm": 1.2316220998764038, "learning_rate": 1.470208389278297e-05, "loss": 0.0466, "step": 1914 }, { "epoch": 3.64, "grad_norm": 1.665513515472412, "learning_rate": 1.4696648604170487e-05, "loss": 0.0299, "step": 1915 }, { "epoch": 3.64, "grad_norm": 0.9614927172660828, "learning_rate": 1.4691211534791859e-05, "loss": 0.0142, "step": 1916 }, { "epoch": 3.64, "grad_norm": 1.2873011827468872, "learning_rate": 1.468577268670858e-05, "loss": 0.0285, "step": 1917 }, { "epoch": 3.65, "grad_norm": 0.7097098231315613, "learning_rate": 1.4680332061982832e-05, "loss": 0.0119, "step": 1918 }, { "epoch": 3.65, "grad_norm": 1.052564263343811, "learning_rate": 1.4674889662677464e-05, "loss": 0.0137, "step": 1919 }, { "epoch": 3.65, "grad_norm": 1.3399070501327515, "learning_rate": 1.4669445490855996e-05, "loss": 0.0204, "step": 1920 }, { "epoch": 3.65, "grad_norm": 2.065497398376465, "learning_rate": 1.4663999548582624e-05, "loss": 0.0395, "step": 1921 }, { "epoch": 3.65, "grad_norm": 0.7590615749359131, "learning_rate": 1.4658551837922215e-05, "loss": 0.0128, "step": 1922 }, { "epoch": 3.66, "grad_norm": 2.011699914932251, "learning_rate": 1.4653102360940306e-05, "loss": 0.0203, "step": 1923 }, { "epoch": 3.66, "grad_norm": 1.42698335647583, "learning_rate": 1.4647651119703099e-05, "loss": 0.0276, "step": 1924 }, { "epoch": 3.66, "grad_norm": 1.327642560005188, "learning_rate": 1.4642198116277478e-05, "loss": 0.019, "step": 1925 }, { "epoch": 3.66, "grad_norm": 1.3806874752044678, "learning_rate": 1.4636743352730975e-05, "loss": 0.0404, "step": 1926 }, { "epoch": 3.66, "grad_norm": 1.5465598106384277, "learning_rate": 1.4631286831131808e-05, "loss": 0.0313, "step": 1927 }, { "epoch": 3.67, "grad_norm": 1.4979503154754639, "learning_rate": 1.4625828553548853e-05, "loss": 0.0319, "step": 1928 }, { "epoch": 3.67, "grad_norm": 1.4262300729751587, "learning_rate": 1.4620368522051651e-05, "loss": 0.0375, "step": 1929 }, { "epoch": 3.67, "grad_norm": 1.5172828435897827, "learning_rate": 1.4614906738710408e-05, "loss": 0.0269, "step": 1930 }, { "epoch": 3.67, "grad_norm": 1.4352595806121826, "learning_rate": 1.4609443205596001e-05, "loss": 0.023, "step": 1931 }, { "epoch": 3.67, "grad_norm": 1.2264691591262817, "learning_rate": 1.4603977924779963e-05, "loss": 0.0185, "step": 1932 }, { "epoch": 3.67, "grad_norm": 1.0197293758392334, "learning_rate": 1.4598510898334497e-05, "loss": 0.025, "step": 1933 }, { "epoch": 3.68, "grad_norm": 2.2147216796875, "learning_rate": 1.4593042128332453e-05, "loss": 0.0357, "step": 1934 }, { "epoch": 3.68, "grad_norm": 1.381643533706665, "learning_rate": 1.4587571616847363e-05, "loss": 0.0166, "step": 1935 }, { "epoch": 3.68, "grad_norm": 1.507037878036499, "learning_rate": 1.45820993659534e-05, "loss": 0.0433, "step": 1936 }, { "epoch": 3.68, "grad_norm": 1.086026668548584, "learning_rate": 1.457662537772541e-05, "loss": 0.0271, "step": 1937 }, { "epoch": 3.68, "grad_norm": 1.3760788440704346, "learning_rate": 1.4571149654238891e-05, "loss": 0.0369, "step": 1938 }, { "epoch": 3.69, "grad_norm": 1.3714524507522583, "learning_rate": 1.4565672197570005e-05, "loss": 0.0386, "step": 1939 }, { "epoch": 3.69, "grad_norm": 1.2354936599731445, "learning_rate": 1.4560193009795556e-05, "loss": 0.0186, "step": 1940 }, { "epoch": 3.69, "grad_norm": 1.160125732421875, "learning_rate": 1.4554712092993026e-05, "loss": 0.0206, "step": 1941 }, { "epoch": 3.69, "grad_norm": 1.5506200790405273, "learning_rate": 1.4549229449240538e-05, "loss": 0.0367, "step": 1942 }, { "epoch": 3.69, "grad_norm": 1.2625131607055664, "learning_rate": 1.4543745080616875e-05, "loss": 0.029, "step": 1943 }, { "epoch": 3.7, "grad_norm": 1.2420504093170166, "learning_rate": 1.4538258989201466e-05, "loss": 0.024, "step": 1944 }, { "epoch": 3.7, "grad_norm": 1.329761028289795, "learning_rate": 1.453277117707441e-05, "loss": 0.0211, "step": 1945 }, { "epoch": 3.7, "grad_norm": 1.0112383365631104, "learning_rate": 1.4527281646316441e-05, "loss": 0.0278, "step": 1946 }, { "epoch": 3.7, "grad_norm": 1.285385251045227, "learning_rate": 1.452179039900895e-05, "loss": 0.0184, "step": 1947 }, { "epoch": 3.7, "grad_norm": 1.3446134328842163, "learning_rate": 1.4516297437233988e-05, "loss": 0.0244, "step": 1948 }, { "epoch": 3.71, "grad_norm": 1.4595736265182495, "learning_rate": 1.4510802763074242e-05, "loss": 0.0261, "step": 1949 }, { "epoch": 3.71, "grad_norm": 0.8612267971038818, "learning_rate": 1.4505306378613063e-05, "loss": 0.0163, "step": 1950 }, { "epoch": 3.71, "grad_norm": 1.0254136323928833, "learning_rate": 1.4499808285934433e-05, "loss": 0.0106, "step": 1951 }, { "epoch": 3.71, "grad_norm": 1.6427078247070312, "learning_rate": 1.4494308487123e-05, "loss": 0.0247, "step": 1952 }, { "epoch": 3.71, "grad_norm": 1.4899840354919434, "learning_rate": 1.4488806984264039e-05, "loss": 0.019, "step": 1953 }, { "epoch": 3.71, "grad_norm": 0.9449028968811035, "learning_rate": 1.4483303779443491e-05, "loss": 0.0176, "step": 1954 }, { "epoch": 3.72, "grad_norm": 0.9819753766059875, "learning_rate": 1.4477798874747933e-05, "loss": 0.0191, "step": 1955 }, { "epoch": 3.72, "grad_norm": 0.9651154279708862, "learning_rate": 1.4472292272264584e-05, "loss": 0.0253, "step": 1956 }, { "epoch": 3.72, "grad_norm": 2.257427215576172, "learning_rate": 1.4466783974081308e-05, "loss": 0.0608, "step": 1957 }, { "epoch": 3.72, "grad_norm": 1.187882900238037, "learning_rate": 1.4461273982286619e-05, "loss": 0.0127, "step": 1958 }, { "epoch": 3.72, "grad_norm": 1.4293698072433472, "learning_rate": 1.4455762298969665e-05, "loss": 0.025, "step": 1959 }, { "epoch": 3.73, "grad_norm": 0.9243450164794922, "learning_rate": 1.4450248926220235e-05, "loss": 0.0162, "step": 1960 }, { "epoch": 3.73, "grad_norm": 1.2736411094665527, "learning_rate": 1.4444733866128766e-05, "loss": 0.0208, "step": 1961 }, { "epoch": 3.73, "grad_norm": 1.5013233423233032, "learning_rate": 1.4439217120786331e-05, "loss": 0.0234, "step": 1962 }, { "epoch": 3.73, "grad_norm": 1.2651935815811157, "learning_rate": 1.4433698692284634e-05, "loss": 0.0302, "step": 1963 }, { "epoch": 3.73, "grad_norm": 0.9718825221061707, "learning_rate": 1.4428178582716035e-05, "loss": 0.016, "step": 1964 }, { "epoch": 3.74, "grad_norm": 1.0386440753936768, "learning_rate": 1.4422656794173514e-05, "loss": 0.016, "step": 1965 }, { "epoch": 3.74, "grad_norm": 1.092517614364624, "learning_rate": 1.4417133328750695e-05, "loss": 0.0198, "step": 1966 }, { "epoch": 3.74, "grad_norm": 1.555005431175232, "learning_rate": 1.441160818854184e-05, "loss": 0.0242, "step": 1967 }, { "epoch": 3.74, "grad_norm": 1.5606721639633179, "learning_rate": 1.440608137564184e-05, "loss": 0.0252, "step": 1968 }, { "epoch": 3.74, "grad_norm": 1.3999879360198975, "learning_rate": 1.440055289214623e-05, "loss": 0.0191, "step": 1969 }, { "epoch": 3.75, "grad_norm": 1.5618236064910889, "learning_rate": 1.4395022740151162e-05, "loss": 0.0222, "step": 1970 }, { "epoch": 3.75, "grad_norm": 1.5839093923568726, "learning_rate": 1.438949092175344e-05, "loss": 0.0309, "step": 1971 }, { "epoch": 3.75, "grad_norm": 1.4235825538635254, "learning_rate": 1.4383957439050486e-05, "loss": 0.0286, "step": 1972 }, { "epoch": 3.75, "grad_norm": 1.9601995944976807, "learning_rate": 1.437842229414036e-05, "loss": 0.0324, "step": 1973 }, { "epoch": 3.75, "grad_norm": 1.0850316286087036, "learning_rate": 1.4372885489121747e-05, "loss": 0.0148, "step": 1974 }, { "epoch": 3.75, "grad_norm": 1.062840461730957, "learning_rate": 1.4367347026093965e-05, "loss": 0.0183, "step": 1975 }, { "epoch": 3.76, "grad_norm": 1.5208847522735596, "learning_rate": 1.4361806907156957e-05, "loss": 0.0309, "step": 1976 }, { "epoch": 3.76, "grad_norm": 1.7557220458984375, "learning_rate": 1.43562651344113e-05, "loss": 0.0299, "step": 1977 }, { "epoch": 3.76, "grad_norm": 1.709802508354187, "learning_rate": 1.4350721709958197e-05, "loss": 0.0199, "step": 1978 }, { "epoch": 3.76, "grad_norm": 1.6606554985046387, "learning_rate": 1.4345176635899467e-05, "loss": 0.0335, "step": 1979 }, { "epoch": 3.76, "grad_norm": 1.6785351037979126, "learning_rate": 1.4339629914337572e-05, "loss": 0.0294, "step": 1980 }, { "epoch": 3.77, "grad_norm": 1.1634719371795654, "learning_rate": 1.4334081547375584e-05, "loss": 0.018, "step": 1981 }, { "epoch": 3.77, "grad_norm": 1.435924768447876, "learning_rate": 1.4328531537117205e-05, "loss": 0.0285, "step": 1982 }, { "epoch": 3.77, "grad_norm": 1.4343838691711426, "learning_rate": 1.4322979885666756e-05, "loss": 0.0379, "step": 1983 }, { "epoch": 3.77, "grad_norm": 1.2046489715576172, "learning_rate": 1.431742659512919e-05, "loss": 0.0257, "step": 1984 }, { "epoch": 3.77, "grad_norm": 0.7746323347091675, "learning_rate": 1.4311871667610066e-05, "loss": 0.0121, "step": 1985 }, { "epoch": 3.78, "grad_norm": 1.277243971824646, "learning_rate": 1.4306315105215578e-05, "loss": 0.0454, "step": 1986 }, { "epoch": 3.78, "grad_norm": 2.1459124088287354, "learning_rate": 1.4300756910052534e-05, "loss": 0.0413, "step": 1987 }, { "epoch": 3.78, "grad_norm": 1.0959552526474, "learning_rate": 1.429519708422836e-05, "loss": 0.025, "step": 1988 }, { "epoch": 3.78, "grad_norm": 1.4200830459594727, "learning_rate": 1.4289635629851101e-05, "loss": 0.029, "step": 1989 }, { "epoch": 3.78, "grad_norm": 1.2487889528274536, "learning_rate": 1.4284072549029424e-05, "loss": 0.0182, "step": 1990 }, { "epoch": 3.79, "grad_norm": 1.3308106660842896, "learning_rate": 1.4278507843872604e-05, "loss": 0.0233, "step": 1991 }, { "epoch": 3.79, "grad_norm": 1.657437801361084, "learning_rate": 1.4272941516490541e-05, "loss": 0.0371, "step": 1992 }, { "epoch": 3.79, "grad_norm": 1.1253795623779297, "learning_rate": 1.4267373568993741e-05, "loss": 0.0178, "step": 1993 }, { "epoch": 3.79, "grad_norm": 0.9585698843002319, "learning_rate": 1.4261804003493334e-05, "loss": 0.0174, "step": 1994 }, { "epoch": 3.79, "grad_norm": 1.4828925132751465, "learning_rate": 1.4256232822101054e-05, "loss": 0.0166, "step": 1995 }, { "epoch": 3.79, "grad_norm": 1.4431558847427368, "learning_rate": 1.4250660026929257e-05, "loss": 0.0245, "step": 1996 }, { "epoch": 3.8, "grad_norm": 1.423842191696167, "learning_rate": 1.4245085620090902e-05, "loss": 0.0248, "step": 1997 }, { "epoch": 3.8, "grad_norm": 1.485876202583313, "learning_rate": 1.4239509603699565e-05, "loss": 0.0298, "step": 1998 }, { "epoch": 3.8, "grad_norm": 1.596734881401062, "learning_rate": 1.4233931979869427e-05, "loss": 0.0263, "step": 1999 }, { "epoch": 3.8, "grad_norm": 1.9273959398269653, "learning_rate": 1.4228352750715287e-05, "loss": 0.013, "step": 2000 }, { "epoch": 3.8, "eval_blimp_filtered_avg": 0.7311940298507462, "eval_blimp_filtered_std": 0.004928707029689641, "step": 2000 }, { "epoch": 3.8, "eval_blimp_supplement_avg": 0.7974137931034483, "eval_blimp_supplement_std": 0.01746960129550142, "step": 2000 }, { "epoch": 3.8, "eval_vqa_filtered_avg": 0.3, "eval_vqa_filtered_std": 0.046056618647183814, "step": 2000 }, { "epoch": 3.8, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 2000 }, { "epoch": 3.8, "grad_norm": 1.0083805322647095, "learning_rate": 1.4222771918352543e-05, "loss": 0.0176, "step": 2001 }, { "epoch": 3.81, "grad_norm": 2.140791177749634, "learning_rate": 1.4217189484897209e-05, "loss": 0.0422, "step": 2002 }, { "epoch": 3.81, "grad_norm": 1.1478970050811768, "learning_rate": 1.4211605452465901e-05, "loss": 0.0233, "step": 2003 }, { "epoch": 3.81, "grad_norm": 1.0523980855941772, "learning_rate": 1.4206019823175845e-05, "loss": 0.0157, "step": 2004 }, { "epoch": 3.81, "grad_norm": 1.3117514848709106, "learning_rate": 1.4200432599144867e-05, "loss": 0.035, "step": 2005 }, { "epoch": 3.81, "grad_norm": 1.2343816757202148, "learning_rate": 1.4194843782491403e-05, "loss": 0.0194, "step": 2006 }, { "epoch": 3.82, "grad_norm": 1.5935697555541992, "learning_rate": 1.4189253375334486e-05, "loss": 0.0234, "step": 2007 }, { "epoch": 3.82, "grad_norm": 1.9706801176071167, "learning_rate": 1.4183661379793766e-05, "loss": 0.0367, "step": 2008 }, { "epoch": 3.82, "grad_norm": 1.6860216856002808, "learning_rate": 1.4178067797989474e-05, "loss": 0.0257, "step": 2009 }, { "epoch": 3.82, "grad_norm": 1.717128038406372, "learning_rate": 1.4172472632042464e-05, "loss": 0.0172, "step": 2010 }, { "epoch": 3.82, "grad_norm": 1.5691723823547363, "learning_rate": 1.416687588407418e-05, "loss": 0.0255, "step": 2011 }, { "epoch": 3.83, "grad_norm": 1.860659122467041, "learning_rate": 1.4161277556206661e-05, "loss": 0.0271, "step": 2012 }, { "epoch": 3.83, "grad_norm": 1.531982421875, "learning_rate": 1.4155677650562555e-05, "loss": 0.0267, "step": 2013 }, { "epoch": 3.83, "grad_norm": 1.3667001724243164, "learning_rate": 1.4150076169265107e-05, "loss": 0.0298, "step": 2014 }, { "epoch": 3.83, "grad_norm": 1.4913971424102783, "learning_rate": 1.414447311443815e-05, "loss": 0.0289, "step": 2015 }, { "epoch": 3.83, "grad_norm": 1.1543893814086914, "learning_rate": 1.4138868488206128e-05, "loss": 0.0214, "step": 2016 }, { "epoch": 3.83, "grad_norm": 3.6248533725738525, "learning_rate": 1.4133262292694068e-05, "loss": 0.0317, "step": 2017 }, { "epoch": 3.84, "grad_norm": 1.2843577861785889, "learning_rate": 1.4127654530027597e-05, "loss": 0.0257, "step": 2018 }, { "epoch": 3.84, "grad_norm": 0.9333481192588806, "learning_rate": 1.412204520233294e-05, "loss": 0.0235, "step": 2019 }, { "epoch": 3.84, "grad_norm": 1.3161096572875977, "learning_rate": 1.4116434311736904e-05, "loss": 0.0201, "step": 2020 }, { "epoch": 3.84, "grad_norm": 1.2300610542297363, "learning_rate": 1.4110821860366904e-05, "loss": 0.0195, "step": 2021 }, { "epoch": 3.84, "grad_norm": 1.1651076078414917, "learning_rate": 1.4105207850350932e-05, "loss": 0.0255, "step": 2022 }, { "epoch": 3.85, "grad_norm": 1.3869388103485107, "learning_rate": 1.4099592283817584e-05, "loss": 0.0261, "step": 2023 }, { "epoch": 3.85, "grad_norm": 1.1957371234893799, "learning_rate": 1.409397516289604e-05, "loss": 0.0179, "step": 2024 }, { "epoch": 3.85, "grad_norm": 1.0529378652572632, "learning_rate": 1.4088356489716064e-05, "loss": 0.0216, "step": 2025 }, { "epoch": 3.85, "grad_norm": 1.4260621070861816, "learning_rate": 1.408273626640802e-05, "loss": 0.0209, "step": 2026 }, { "epoch": 3.85, "grad_norm": 1.1287633180618286, "learning_rate": 1.4077114495102851e-05, "loss": 0.0152, "step": 2027 }, { "epoch": 3.86, "grad_norm": 1.595619797706604, "learning_rate": 1.4071491177932089e-05, "loss": 0.0311, "step": 2028 }, { "epoch": 3.86, "grad_norm": 1.4661039113998413, "learning_rate": 1.4065866317027855e-05, "loss": 0.0203, "step": 2029 }, { "epoch": 3.86, "grad_norm": 1.4908660650253296, "learning_rate": 1.406023991452285e-05, "loss": 0.0266, "step": 2030 }, { "epoch": 3.86, "grad_norm": 1.456425428390503, "learning_rate": 1.4054611972550365e-05, "loss": 0.0187, "step": 2031 }, { "epoch": 3.86, "grad_norm": 1.3766913414001465, "learning_rate": 1.404898249324427e-05, "loss": 0.025, "step": 2032 }, { "epoch": 3.87, "grad_norm": 1.0119030475616455, "learning_rate": 1.4043351478739025e-05, "loss": 0.0235, "step": 2033 }, { "epoch": 3.87, "grad_norm": 1.1862791776657104, "learning_rate": 1.4037718931169664e-05, "loss": 0.0312, "step": 2034 }, { "epoch": 3.87, "grad_norm": 1.1621404886245728, "learning_rate": 1.4032084852671803e-05, "loss": 0.0206, "step": 2035 }, { "epoch": 3.87, "grad_norm": 1.0350810289382935, "learning_rate": 1.4026449245381646e-05, "loss": 0.0225, "step": 2036 }, { "epoch": 3.87, "grad_norm": 2.0224475860595703, "learning_rate": 1.402081211143597e-05, "loss": 0.052, "step": 2037 }, { "epoch": 3.87, "grad_norm": 1.248250961303711, "learning_rate": 1.4015173452972131e-05, "loss": 0.0183, "step": 2038 }, { "epoch": 3.88, "grad_norm": 1.855621337890625, "learning_rate": 1.4009533272128066e-05, "loss": 0.0277, "step": 2039 }, { "epoch": 3.88, "grad_norm": 1.6024099588394165, "learning_rate": 1.400389157104229e-05, "loss": 0.0205, "step": 2040 }, { "epoch": 3.88, "grad_norm": 1.1771409511566162, "learning_rate": 1.3998248351853892e-05, "loss": 0.0202, "step": 2041 }, { "epoch": 3.88, "grad_norm": 0.9928612112998962, "learning_rate": 1.3992603616702528e-05, "loss": 0.0202, "step": 2042 }, { "epoch": 3.88, "grad_norm": 1.482016682624817, "learning_rate": 1.3986957367728447e-05, "loss": 0.0232, "step": 2043 }, { "epoch": 3.89, "grad_norm": 1.693349838256836, "learning_rate": 1.398130960707246e-05, "loss": 0.0368, "step": 2044 }, { "epoch": 3.89, "grad_norm": 1.4055081605911255, "learning_rate": 1.397566033687595e-05, "loss": 0.0317, "step": 2045 }, { "epoch": 3.89, "grad_norm": 1.4154680967330933, "learning_rate": 1.3970009559280884e-05, "loss": 0.0276, "step": 2046 }, { "epoch": 3.89, "grad_norm": 1.1323808431625366, "learning_rate": 1.3964357276429787e-05, "loss": 0.0246, "step": 2047 }, { "epoch": 3.89, "grad_norm": 1.2375428676605225, "learning_rate": 1.3958703490465758e-05, "loss": 0.0232, "step": 2048 }, { "epoch": 3.9, "grad_norm": 2.331026792526245, "learning_rate": 1.3953048203532476e-05, "loss": 0.0249, "step": 2049 }, { "epoch": 3.9, "grad_norm": 1.6461784839630127, "learning_rate": 1.3947391417774176e-05, "loss": 0.0435, "step": 2050 }, { "epoch": 3.9, "grad_norm": 1.3354713916778564, "learning_rate": 1.394173313533567e-05, "loss": 0.0262, "step": 2051 }, { "epoch": 3.9, "grad_norm": 0.9696877598762512, "learning_rate": 1.3936073358362328e-05, "loss": 0.0145, "step": 2052 }, { "epoch": 3.9, "grad_norm": 1.0654540061950684, "learning_rate": 1.3930412089000099e-05, "loss": 0.0259, "step": 2053 }, { "epoch": 3.9, "grad_norm": 1.3902686834335327, "learning_rate": 1.3924749329395489e-05, "loss": 0.021, "step": 2054 }, { "epoch": 3.91, "grad_norm": 1.3510124683380127, "learning_rate": 1.3919085081695571e-05, "loss": 0.0241, "step": 2055 }, { "epoch": 3.91, "grad_norm": 1.413559079170227, "learning_rate": 1.3913419348047983e-05, "loss": 0.0356, "step": 2056 }, { "epoch": 3.91, "grad_norm": 1.1411254405975342, "learning_rate": 1.3907752130600932e-05, "loss": 0.0217, "step": 2057 }, { "epoch": 3.91, "grad_norm": 1.7650985717773438, "learning_rate": 1.390208343150317e-05, "loss": 0.0442, "step": 2058 }, { "epoch": 3.91, "grad_norm": 0.9004316329956055, "learning_rate": 1.3896413252904035e-05, "loss": 0.0114, "step": 2059 }, { "epoch": 3.92, "grad_norm": 0.9275758862495422, "learning_rate": 1.3890741596953408e-05, "loss": 0.017, "step": 2060 }, { "epoch": 3.92, "grad_norm": 1.7137537002563477, "learning_rate": 1.3885068465801735e-05, "loss": 0.034, "step": 2061 }, { "epoch": 3.92, "grad_norm": 1.2722257375717163, "learning_rate": 1.3879393861600024e-05, "loss": 0.0195, "step": 2062 }, { "epoch": 3.92, "grad_norm": 1.3780150413513184, "learning_rate": 1.3873717786499838e-05, "loss": 0.0277, "step": 2063 }, { "epoch": 3.92, "grad_norm": 0.9895815253257751, "learning_rate": 1.3868040242653302e-05, "loss": 0.0155, "step": 2064 }, { "epoch": 3.93, "grad_norm": 1.1247366666793823, "learning_rate": 1.3862361232213094e-05, "loss": 0.0127, "step": 2065 }, { "epoch": 3.93, "grad_norm": 1.7272729873657227, "learning_rate": 1.385668075733245e-05, "loss": 0.0431, "step": 2066 }, { "epoch": 3.93, "grad_norm": 1.1190630197525024, "learning_rate": 1.3850998820165158e-05, "loss": 0.0283, "step": 2067 }, { "epoch": 3.93, "grad_norm": 1.249299168586731, "learning_rate": 1.3845315422865561e-05, "loss": 0.0235, "step": 2068 }, { "epoch": 3.93, "grad_norm": 1.0656033754348755, "learning_rate": 1.3839630567588564e-05, "loss": 0.0263, "step": 2069 }, { "epoch": 3.94, "grad_norm": 1.9371066093444824, "learning_rate": 1.3833944256489615e-05, "loss": 0.0476, "step": 2070 }, { "epoch": 3.94, "grad_norm": 1.713775396347046, "learning_rate": 1.3828256491724719e-05, "loss": 0.0312, "step": 2071 }, { "epoch": 3.94, "grad_norm": 0.9784880876541138, "learning_rate": 1.3822567275450427e-05, "loss": 0.0081, "step": 2072 }, { "epoch": 3.94, "grad_norm": 1.7535022497177124, "learning_rate": 1.3816876609823848e-05, "loss": 0.0353, "step": 2073 }, { "epoch": 3.94, "grad_norm": 1.3698827028274536, "learning_rate": 1.3811184497002637e-05, "loss": 0.0274, "step": 2074 }, { "epoch": 3.94, "grad_norm": 0.9590175747871399, "learning_rate": 1.3805490939144991e-05, "loss": 0.0192, "step": 2075 }, { "epoch": 3.95, "grad_norm": 1.9210551977157593, "learning_rate": 1.3799795938409668e-05, "loss": 0.0265, "step": 2076 }, { "epoch": 3.95, "grad_norm": 1.7520955801010132, "learning_rate": 1.379409949695596e-05, "loss": 0.0282, "step": 2077 }, { "epoch": 3.95, "grad_norm": 1.6264647245407104, "learning_rate": 1.3788401616943716e-05, "loss": 0.0396, "step": 2078 }, { "epoch": 3.95, "grad_norm": 0.7246159315109253, "learning_rate": 1.3782702300533324e-05, "loss": 0.0109, "step": 2079 }, { "epoch": 3.95, "grad_norm": 1.7533756494522095, "learning_rate": 1.377700154988572e-05, "loss": 0.0197, "step": 2080 }, { "epoch": 3.96, "grad_norm": 1.1701409816741943, "learning_rate": 1.377129936716238e-05, "loss": 0.0305, "step": 2081 }, { "epoch": 3.96, "grad_norm": 1.3725502490997314, "learning_rate": 1.3765595754525325e-05, "loss": 0.0223, "step": 2082 }, { "epoch": 3.96, "grad_norm": 1.0762672424316406, "learning_rate": 1.3759890714137121e-05, "loss": 0.0178, "step": 2083 }, { "epoch": 3.96, "grad_norm": 1.634518027305603, "learning_rate": 1.375418424816087e-05, "loss": 0.0292, "step": 2084 }, { "epoch": 3.96, "grad_norm": 1.4172121286392212, "learning_rate": 1.3748476358760219e-05, "loss": 0.0368, "step": 2085 }, { "epoch": 3.97, "grad_norm": 1.1138427257537842, "learning_rate": 1.3742767048099355e-05, "loss": 0.017, "step": 2086 }, { "epoch": 3.97, "grad_norm": 2.077343702316284, "learning_rate": 1.3737056318342995e-05, "loss": 0.037, "step": 2087 }, { "epoch": 3.97, "grad_norm": 1.352220892906189, "learning_rate": 1.373134417165641e-05, "loss": 0.0276, "step": 2088 }, { "epoch": 3.97, "grad_norm": 1.9337395429611206, "learning_rate": 1.3725630610205391e-05, "loss": 0.019, "step": 2089 }, { "epoch": 3.97, "grad_norm": 1.5643675327301025, "learning_rate": 1.3719915636156279e-05, "loss": 0.0304, "step": 2090 }, { "epoch": 3.98, "grad_norm": 1.99502694606781, "learning_rate": 1.3714199251675939e-05, "loss": 0.0289, "step": 2091 }, { "epoch": 3.98, "grad_norm": 1.7132854461669922, "learning_rate": 1.3708481458931784e-05, "loss": 0.02, "step": 2092 }, { "epoch": 3.98, "grad_norm": 0.8612022995948792, "learning_rate": 1.370276226009175e-05, "loss": 0.0124, "step": 2093 }, { "epoch": 3.98, "grad_norm": 1.2032455205917358, "learning_rate": 1.369704165732431e-05, "loss": 0.0144, "step": 2094 }, { "epoch": 3.98, "grad_norm": 1.4172003269195557, "learning_rate": 1.3691319652798471e-05, "loss": 0.0222, "step": 2095 }, { "epoch": 3.98, "grad_norm": 1.5540568828582764, "learning_rate": 1.3685596248683773e-05, "loss": 0.0344, "step": 2096 }, { "epoch": 3.99, "grad_norm": 1.1786208152770996, "learning_rate": 1.3679871447150275e-05, "loss": 0.023, "step": 2097 }, { "epoch": 3.99, "grad_norm": 1.0932623147964478, "learning_rate": 1.367414525036858e-05, "loss": 0.0191, "step": 2098 }, { "epoch": 3.99, "grad_norm": 1.6176624298095703, "learning_rate": 1.3668417660509812e-05, "loss": 0.0385, "step": 2099 }, { "epoch": 3.99, "grad_norm": 2.0385584831237793, "learning_rate": 1.3662688679745627e-05, "loss": 0.0552, "step": 2100 }, { "epoch": 3.99, "eval_blimp_filtered_avg": 0.7322388059701492, "eval_blimp_filtered_std": 0.004899194439751133, "step": 2100 }, { "epoch": 3.99, "eval_blimp_supplement_avg": 0.7866379310344828, "eval_blimp_supplement_std": 0.017705868095249835, "step": 2100 }, { "epoch": 3.99, "eval_vqa_filtered_avg": 0.35, "eval_vqa_filtered_std": 0.047937248544110196, "step": 2100 }, { "epoch": 3.99, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 2100 }, { "epoch": 3.99, "grad_norm": 1.1853941679000854, "learning_rate": 1.3656958310248206e-05, "loss": 0.0358, "step": 2101 }, { "epoch": 4.0, "grad_norm": 1.617460012435913, "learning_rate": 1.3651226554190257e-05, "loss": 0.0392, "step": 2102 }, { "epoch": 4.0, "grad_norm": 1.3693385124206543, "learning_rate": 1.3645493413745016e-05, "loss": 0.0241, "step": 2103 }, { "epoch": 4.0, "grad_norm": 1.0686345100402832, "learning_rate": 1.3639758891086241e-05, "loss": 0.0216, "step": 2104 }, { "epoch": 4.0, "grad_norm": 1.165591835975647, "learning_rate": 1.3634022988388217e-05, "loss": 0.0155, "step": 2105 }, { "epoch": 4.0, "grad_norm": 1.0208311080932617, "learning_rate": 1.3628285707825746e-05, "loss": 0.0195, "step": 2106 }, { "epoch": 4.01, "grad_norm": 0.9066298604011536, "learning_rate": 1.3622547051574158e-05, "loss": 0.0196, "step": 2107 }, { "epoch": 4.01, "grad_norm": 0.6958996057510376, "learning_rate": 1.3616807021809307e-05, "loss": 0.0123, "step": 2108 }, { "epoch": 4.01, "grad_norm": 1.0872020721435547, "learning_rate": 1.3611065620707562e-05, "loss": 0.0126, "step": 2109 }, { "epoch": 4.01, "grad_norm": 1.3017529249191284, "learning_rate": 1.3605322850445814e-05, "loss": 0.025, "step": 2110 }, { "epoch": 4.01, "grad_norm": 1.054193377494812, "learning_rate": 1.3599578713201473e-05, "loss": 0.0153, "step": 2111 }, { "epoch": 4.02, "grad_norm": 0.9566866159439087, "learning_rate": 1.359383321115247e-05, "loss": 0.0131, "step": 2112 }, { "epoch": 4.02, "grad_norm": 1.0304524898529053, "learning_rate": 1.3588086346477244e-05, "loss": 0.0173, "step": 2113 }, { "epoch": 4.02, "grad_norm": 1.4902535676956177, "learning_rate": 1.3582338121354767e-05, "loss": 0.0288, "step": 2114 }, { "epoch": 4.02, "grad_norm": 1.2409791946411133, "learning_rate": 1.3576588537964513e-05, "loss": 0.0239, "step": 2115 }, { "epoch": 4.02, "grad_norm": 1.257029414176941, "learning_rate": 1.3570837598486476e-05, "loss": 0.0132, "step": 2116 }, { "epoch": 4.02, "grad_norm": 1.2784637212753296, "learning_rate": 1.356508530510116e-05, "loss": 0.0226, "step": 2117 }, { "epoch": 4.03, "grad_norm": 1.2285295724868774, "learning_rate": 1.3559331659989589e-05, "loss": 0.0129, "step": 2118 }, { "epoch": 4.03, "grad_norm": 0.8785334825515747, "learning_rate": 1.35535766653333e-05, "loss": 0.0102, "step": 2119 }, { "epoch": 4.03, "grad_norm": 1.7554763555526733, "learning_rate": 1.3547820323314335e-05, "loss": 0.0346, "step": 2120 }, { "epoch": 4.03, "grad_norm": 1.3290135860443115, "learning_rate": 1.3542062636115251e-05, "loss": 0.033, "step": 2121 }, { "epoch": 4.03, "grad_norm": 0.6902342438697815, "learning_rate": 1.3536303605919113e-05, "loss": 0.0087, "step": 2122 }, { "epoch": 4.04, "grad_norm": 1.0634711980819702, "learning_rate": 1.3530543234909492e-05, "loss": 0.0192, "step": 2123 }, { "epoch": 4.04, "grad_norm": 0.572431206703186, "learning_rate": 1.352478152527048e-05, "loss": 0.0082, "step": 2124 }, { "epoch": 4.04, "grad_norm": 0.8776907920837402, "learning_rate": 1.3519018479186667e-05, "loss": 0.0148, "step": 2125 }, { "epoch": 4.04, "grad_norm": 0.7309280037879944, "learning_rate": 1.3513254098843143e-05, "loss": 0.0121, "step": 2126 }, { "epoch": 4.04, "grad_norm": 0.8073000907897949, "learning_rate": 1.3507488386425522e-05, "loss": 0.0104, "step": 2127 }, { "epoch": 4.05, "grad_norm": 0.6811977624893188, "learning_rate": 1.3501721344119907e-05, "loss": 0.0079, "step": 2128 }, { "epoch": 4.05, "grad_norm": 1.729273796081543, "learning_rate": 1.3495952974112914e-05, "loss": 0.0226, "step": 2129 }, { "epoch": 4.05, "grad_norm": 1.9295454025268555, "learning_rate": 1.3490183278591654e-05, "loss": 0.031, "step": 2130 }, { "epoch": 4.05, "grad_norm": 0.8432115912437439, "learning_rate": 1.3484412259743753e-05, "loss": 0.016, "step": 2131 }, { "epoch": 4.05, "grad_norm": 0.8489283323287964, "learning_rate": 1.3478639919757331e-05, "loss": 0.0111, "step": 2132 }, { "epoch": 4.06, "grad_norm": 0.991912305355072, "learning_rate": 1.3472866260821006e-05, "loss": 0.0221, "step": 2133 }, { "epoch": 4.06, "grad_norm": 1.0131129026412964, "learning_rate": 1.3467091285123903e-05, "loss": 0.0174, "step": 2134 }, { "epoch": 4.06, "grad_norm": 1.3471729755401611, "learning_rate": 1.346131499485564e-05, "loss": 0.0201, "step": 2135 }, { "epoch": 4.06, "grad_norm": 1.2241073846817017, "learning_rate": 1.345553739220634e-05, "loss": 0.0178, "step": 2136 }, { "epoch": 4.06, "grad_norm": 0.6623218655586243, "learning_rate": 1.3449758479366619e-05, "loss": 0.0061, "step": 2137 }, { "epoch": 4.06, "grad_norm": 0.8427249193191528, "learning_rate": 1.3443978258527593e-05, "loss": 0.0115, "step": 2138 }, { "epoch": 4.07, "grad_norm": 1.2049150466918945, "learning_rate": 1.3438196731880868e-05, "loss": 0.0145, "step": 2139 }, { "epoch": 4.07, "grad_norm": 1.9550988674163818, "learning_rate": 1.3432413901618548e-05, "loss": 0.0186, "step": 2140 }, { "epoch": 4.07, "grad_norm": 0.7712700366973877, "learning_rate": 1.3426629769933239e-05, "loss": 0.0128, "step": 2141 }, { "epoch": 4.07, "grad_norm": 1.1455459594726562, "learning_rate": 1.342084433901803e-05, "loss": 0.0166, "step": 2142 }, { "epoch": 4.07, "grad_norm": 1.088026762008667, "learning_rate": 1.3415057611066504e-05, "loss": 0.0135, "step": 2143 }, { "epoch": 4.08, "grad_norm": 0.8274200558662415, "learning_rate": 1.3409269588272741e-05, "loss": 0.0157, "step": 2144 }, { "epoch": 4.08, "grad_norm": 0.9945977330207825, "learning_rate": 1.3403480272831311e-05, "loss": 0.0159, "step": 2145 }, { "epoch": 4.08, "grad_norm": 1.0345876216888428, "learning_rate": 1.3397689666937267e-05, "loss": 0.0112, "step": 2146 }, { "epoch": 4.08, "grad_norm": 0.7190618515014648, "learning_rate": 1.3391897772786157e-05, "loss": 0.0134, "step": 2147 }, { "epoch": 4.08, "grad_norm": 0.6665619015693665, "learning_rate": 1.3386104592574023e-05, "loss": 0.011, "step": 2148 }, { "epoch": 4.09, "grad_norm": 1.0956332683563232, "learning_rate": 1.338031012849738e-05, "loss": 0.0115, "step": 2149 }, { "epoch": 4.09, "grad_norm": 1.6602171659469604, "learning_rate": 1.3374514382753247e-05, "loss": 0.0288, "step": 2150 }, { "epoch": 4.09, "grad_norm": 1.1493704319000244, "learning_rate": 1.3368717357539117e-05, "loss": 0.0186, "step": 2151 }, { "epoch": 4.09, "grad_norm": 1.3089998960494995, "learning_rate": 1.3362919055052968e-05, "loss": 0.008, "step": 2152 }, { "epoch": 4.09, "grad_norm": 0.9565787315368652, "learning_rate": 1.3357119477493265e-05, "loss": 0.0085, "step": 2153 }, { "epoch": 4.1, "grad_norm": 0.5645134449005127, "learning_rate": 1.3351318627058965e-05, "loss": 0.0062, "step": 2154 }, { "epoch": 4.1, "grad_norm": 1.2937167882919312, "learning_rate": 1.3345516505949495e-05, "loss": 0.0183, "step": 2155 }, { "epoch": 4.1, "grad_norm": 0.5987582802772522, "learning_rate": 1.3339713116364767e-05, "loss": 0.0055, "step": 2156 }, { "epoch": 4.1, "grad_norm": 1.2586889266967773, "learning_rate": 1.3333908460505179e-05, "loss": 0.0136, "step": 2157 }, { "epoch": 4.1, "grad_norm": 1.355879306793213, "learning_rate": 1.3328102540571601e-05, "loss": 0.0143, "step": 2158 }, { "epoch": 4.1, "grad_norm": 0.8192070722579956, "learning_rate": 1.332229535876539e-05, "loss": 0.0087, "step": 2159 }, { "epoch": 4.11, "grad_norm": 1.6629923582077026, "learning_rate": 1.3316486917288378e-05, "loss": 0.0155, "step": 2160 }, { "epoch": 4.11, "grad_norm": 0.633810818195343, "learning_rate": 1.3310677218342876e-05, "loss": 0.0152, "step": 2161 }, { "epoch": 4.11, "grad_norm": 1.4830193519592285, "learning_rate": 1.3304866264131669e-05, "loss": 0.0219, "step": 2162 }, { "epoch": 4.11, "grad_norm": 1.064574956893921, "learning_rate": 1.329905405685802e-05, "loss": 0.0138, "step": 2163 }, { "epoch": 4.11, "grad_norm": 1.2276020050048828, "learning_rate": 1.3293240598725667e-05, "loss": 0.0192, "step": 2164 }, { "epoch": 4.12, "grad_norm": 1.8049097061157227, "learning_rate": 1.3287425891938823e-05, "loss": 0.0117, "step": 2165 }, { "epoch": 4.12, "grad_norm": 0.9343156218528748, "learning_rate": 1.3281609938702173e-05, "loss": 0.0102, "step": 2166 }, { "epoch": 4.12, "grad_norm": 0.9275460839271545, "learning_rate": 1.3275792741220874e-05, "loss": 0.0106, "step": 2167 }, { "epoch": 4.12, "grad_norm": 0.9393692016601562, "learning_rate": 1.326997430170056e-05, "loss": 0.0124, "step": 2168 }, { "epoch": 4.12, "grad_norm": 1.168426275253296, "learning_rate": 1.326415462234732e-05, "loss": 0.018, "step": 2169 }, { "epoch": 4.13, "grad_norm": 1.3522515296936035, "learning_rate": 1.325833370536774e-05, "loss": 0.0288, "step": 2170 }, { "epoch": 4.13, "grad_norm": 1.1513316631317139, "learning_rate": 1.3252511552968852e-05, "loss": 0.0069, "step": 2171 }, { "epoch": 4.13, "grad_norm": 0.9859635233879089, "learning_rate": 1.3246688167358164e-05, "loss": 0.0197, "step": 2172 }, { "epoch": 4.13, "grad_norm": 1.24351167678833, "learning_rate": 1.3240863550743654e-05, "loss": 0.03, "step": 2173 }, { "epoch": 4.13, "grad_norm": 1.6544421911239624, "learning_rate": 1.3235037705333767e-05, "loss": 0.0281, "step": 2174 }, { "epoch": 4.13, "grad_norm": 1.1115013360977173, "learning_rate": 1.3229210633337407e-05, "loss": 0.0127, "step": 2175 }, { "epoch": 4.14, "grad_norm": 1.1599600315093994, "learning_rate": 1.3223382336963952e-05, "loss": 0.0176, "step": 2176 }, { "epoch": 4.14, "grad_norm": 1.2746607065200806, "learning_rate": 1.3217552818423238e-05, "loss": 0.0171, "step": 2177 }, { "epoch": 4.14, "grad_norm": 1.3997174501419067, "learning_rate": 1.321172207992557e-05, "loss": 0.0192, "step": 2178 }, { "epoch": 4.14, "grad_norm": 0.7355913519859314, "learning_rate": 1.3205890123681706e-05, "loss": 0.0074, "step": 2179 }, { "epoch": 4.14, "grad_norm": 0.9937809109687805, "learning_rate": 1.3200056951902876e-05, "loss": 0.0127, "step": 2180 }, { "epoch": 4.15, "grad_norm": 1.737535834312439, "learning_rate": 1.3194222566800765e-05, "loss": 0.0378, "step": 2181 }, { "epoch": 4.15, "grad_norm": 1.0773358345031738, "learning_rate": 1.3188386970587516e-05, "loss": 0.0126, "step": 2182 }, { "epoch": 4.15, "grad_norm": 1.0850467681884766, "learning_rate": 1.3182550165475745e-05, "loss": 0.0184, "step": 2183 }, { "epoch": 4.15, "grad_norm": 0.8798866271972656, "learning_rate": 1.3176712153678509e-05, "loss": 0.0134, "step": 2184 }, { "epoch": 4.15, "grad_norm": 0.6261573433876038, "learning_rate": 1.3170872937409328e-05, "loss": 0.0118, "step": 2185 }, { "epoch": 4.16, "grad_norm": 1.058266282081604, "learning_rate": 1.3165032518882183e-05, "loss": 0.017, "step": 2186 }, { "epoch": 4.16, "grad_norm": 1.1148384809494019, "learning_rate": 1.3159190900311511e-05, "loss": 0.03, "step": 2187 }, { "epoch": 4.16, "grad_norm": 1.8887144327163696, "learning_rate": 1.3153348083912198e-05, "loss": 0.025, "step": 2188 }, { "epoch": 4.16, "grad_norm": 2.548750877380371, "learning_rate": 1.3147504071899589e-05, "loss": 0.0231, "step": 2189 }, { "epoch": 4.16, "grad_norm": 1.5760282278060913, "learning_rate": 1.3141658866489477e-05, "loss": 0.026, "step": 2190 }, { "epoch": 4.17, "grad_norm": 1.1598668098449707, "learning_rate": 1.3135812469898117e-05, "loss": 0.0131, "step": 2191 }, { "epoch": 4.17, "grad_norm": 0.9099019169807434, "learning_rate": 1.3129964884342205e-05, "loss": 0.0192, "step": 2192 }, { "epoch": 4.17, "grad_norm": 1.1840403079986572, "learning_rate": 1.3124116112038895e-05, "loss": 0.0163, "step": 2193 }, { "epoch": 4.17, "grad_norm": 1.4822938442230225, "learning_rate": 1.311826615520579e-05, "loss": 0.0269, "step": 2194 }, { "epoch": 4.17, "grad_norm": 1.2318284511566162, "learning_rate": 1.3112415016060938e-05, "loss": 0.0196, "step": 2195 }, { "epoch": 4.17, "grad_norm": 1.2451096773147583, "learning_rate": 1.310656269682284e-05, "loss": 0.0089, "step": 2196 }, { "epoch": 4.18, "grad_norm": 0.9239171147346497, "learning_rate": 1.3100709199710441e-05, "loss": 0.013, "step": 2197 }, { "epoch": 4.18, "grad_norm": 0.6983928084373474, "learning_rate": 1.3094854526943135e-05, "loss": 0.0125, "step": 2198 }, { "epoch": 4.18, "grad_norm": 0.9464412927627563, "learning_rate": 1.3088998680740761e-05, "loss": 0.0179, "step": 2199 }, { "epoch": 4.18, "grad_norm": 0.5860888361930847, "learning_rate": 1.3083141663323604e-05, "loss": 0.0061, "step": 2200 }, { "epoch": 4.18, "eval_blimp_filtered_avg": 0.7295522388059702, "eval_blimp_filtered_std": 0.004912770723587936, "step": 2200 }, { "epoch": 4.18, "eval_blimp_supplement_avg": 0.7952586206896551, "eval_blimp_supplement_std": 0.01748706284198282, "step": 2200 }, { "epoch": 4.18, "eval_vqa_filtered_avg": 0.34, "eval_vqa_filtered_std": 0.04760952285695236, "step": 2200 }, { "epoch": 4.18, "eval_winoground_filtered_avg": 0.52, "eval_winoground_filtered_std": 0.05021167315686779, "step": 2200 }, { "epoch": 4.18, "grad_norm": 0.8925700783729553, "learning_rate": 1.307728347691239e-05, "loss": 0.0128, "step": 2201 }, { "epoch": 4.19, "grad_norm": 1.1232596635818481, "learning_rate": 1.3071424123728289e-05, "loss": 0.0152, "step": 2202 }, { "epoch": 4.19, "grad_norm": 0.7994497418403625, "learning_rate": 1.3065563605992916e-05, "loss": 0.0118, "step": 2203 }, { "epoch": 4.19, "grad_norm": 0.7547123432159424, "learning_rate": 1.3059701925928328e-05, "loss": 0.0095, "step": 2204 }, { "epoch": 4.19, "grad_norm": 0.935387134552002, "learning_rate": 1.3053839085757014e-05, "loss": 0.0102, "step": 2205 }, { "epoch": 4.19, "grad_norm": 1.3523166179656982, "learning_rate": 1.3047975087701918e-05, "loss": 0.0173, "step": 2206 }, { "epoch": 4.2, "grad_norm": 1.3212000131607056, "learning_rate": 1.3042109933986412e-05, "loss": 0.0204, "step": 2207 }, { "epoch": 4.2, "grad_norm": 0.6090236306190491, "learning_rate": 1.3036243626834301e-05, "loss": 0.0048, "step": 2208 }, { "epoch": 4.2, "grad_norm": 1.064154863357544, "learning_rate": 1.303037616846985e-05, "loss": 0.0094, "step": 2209 }, { "epoch": 4.2, "grad_norm": 0.9552380442619324, "learning_rate": 1.3024507561117731e-05, "loss": 0.0135, "step": 2210 }, { "epoch": 4.2, "grad_norm": 1.0582119226455688, "learning_rate": 1.3018637807003079e-05, "loss": 0.0211, "step": 2211 }, { "epoch": 4.21, "grad_norm": 0.9067414402961731, "learning_rate": 1.301276690835144e-05, "loss": 0.0131, "step": 2212 }, { "epoch": 4.21, "grad_norm": 1.0828461647033691, "learning_rate": 1.300689486738881e-05, "loss": 0.0116, "step": 2213 }, { "epoch": 4.21, "grad_norm": 1.1082024574279785, "learning_rate": 1.3001021686341616e-05, "loss": 0.0195, "step": 2214 }, { "epoch": 4.21, "grad_norm": 1.136460304260254, "learning_rate": 1.2995147367436705e-05, "loss": 0.0205, "step": 2215 }, { "epoch": 4.21, "grad_norm": 0.7588074803352356, "learning_rate": 1.2989271912901375e-05, "loss": 0.0144, "step": 2216 }, { "epoch": 4.21, "grad_norm": 1.606581211090088, "learning_rate": 1.2983395324963341e-05, "loss": 0.0173, "step": 2217 }, { "epoch": 4.22, "grad_norm": 1.3965083360671997, "learning_rate": 1.2977517605850746e-05, "loss": 0.0127, "step": 2218 }, { "epoch": 4.22, "grad_norm": 0.8711161017417908, "learning_rate": 1.2971638757792177e-05, "loss": 0.0174, "step": 2219 }, { "epoch": 4.22, "grad_norm": 1.4534482955932617, "learning_rate": 1.2965758783016634e-05, "loss": 0.0129, "step": 2220 }, { "epoch": 4.22, "grad_norm": 1.4531996250152588, "learning_rate": 1.2959877683753543e-05, "loss": 0.0183, "step": 2221 }, { "epoch": 4.22, "grad_norm": 1.0616806745529175, "learning_rate": 1.2953995462232772e-05, "loss": 0.0083, "step": 2222 }, { "epoch": 4.23, "grad_norm": 0.9289470314979553, "learning_rate": 1.2948112120684604e-05, "loss": 0.0085, "step": 2223 }, { "epoch": 4.23, "grad_norm": 1.11985182762146, "learning_rate": 1.2942227661339744e-05, "loss": 0.0247, "step": 2224 }, { "epoch": 4.23, "grad_norm": 1.7958065271377563, "learning_rate": 1.2936342086429326e-05, "loss": 0.0164, "step": 2225 }, { "epoch": 4.23, "grad_norm": 1.1791836023330688, "learning_rate": 1.2930455398184904e-05, "loss": 0.0123, "step": 2226 }, { "epoch": 4.23, "grad_norm": 0.8514710664749146, "learning_rate": 1.292456759883846e-05, "loss": 0.0146, "step": 2227 }, { "epoch": 4.24, "grad_norm": 1.2566837072372437, "learning_rate": 1.2918678690622388e-05, "loss": 0.0123, "step": 2228 }, { "epoch": 4.24, "grad_norm": 0.9944590926170349, "learning_rate": 1.2912788675769512e-05, "loss": 0.0139, "step": 2229 }, { "epoch": 4.24, "grad_norm": 1.2418197393417358, "learning_rate": 1.2906897556513069e-05, "loss": 0.0159, "step": 2230 }, { "epoch": 4.24, "grad_norm": 1.234543800354004, "learning_rate": 1.2901005335086716e-05, "loss": 0.0115, "step": 2231 }, { "epoch": 4.24, "grad_norm": 0.7990588545799255, "learning_rate": 1.2895112013724532e-05, "loss": 0.0183, "step": 2232 }, { "epoch": 4.25, "grad_norm": 1.910893201828003, "learning_rate": 1.2889217594661005e-05, "loss": 0.0286, "step": 2233 }, { "epoch": 4.25, "grad_norm": 1.4133832454681396, "learning_rate": 1.2883322080131046e-05, "loss": 0.0139, "step": 2234 }, { "epoch": 4.25, "grad_norm": 1.0253498554229736, "learning_rate": 1.2877425472369981e-05, "loss": 0.0112, "step": 2235 }, { "epoch": 4.25, "grad_norm": 1.5792607069015503, "learning_rate": 1.2871527773613549e-05, "loss": 0.018, "step": 2236 }, { "epoch": 4.25, "grad_norm": 1.409759759902954, "learning_rate": 1.2865628986097898e-05, "loss": 0.025, "step": 2237 }, { "epoch": 4.25, "grad_norm": 0.9281973242759705, "learning_rate": 1.2859729112059595e-05, "loss": 0.0121, "step": 2238 }, { "epoch": 4.26, "grad_norm": 1.0219347476959229, "learning_rate": 1.285382815373562e-05, "loss": 0.012, "step": 2239 }, { "epoch": 4.26, "grad_norm": 1.5901671648025513, "learning_rate": 1.284792611336336e-05, "loss": 0.0241, "step": 2240 }, { "epoch": 4.26, "grad_norm": 0.8678973317146301, "learning_rate": 1.2842022993180612e-05, "loss": 0.0116, "step": 2241 }, { "epoch": 4.26, "grad_norm": 1.8518239259719849, "learning_rate": 1.2836118795425584e-05, "loss": 0.0231, "step": 2242 }, { "epoch": 4.26, "grad_norm": 1.1886683702468872, "learning_rate": 1.2830213522336899e-05, "loss": 0.0088, "step": 2243 }, { "epoch": 4.27, "grad_norm": 0.603796660900116, "learning_rate": 1.282430717615357e-05, "loss": 0.0077, "step": 2244 }, { "epoch": 4.27, "grad_norm": 1.3703569173812866, "learning_rate": 1.2818399759115039e-05, "loss": 0.0097, "step": 2245 }, { "epoch": 4.27, "grad_norm": 1.80634343624115, "learning_rate": 1.2812491273461136e-05, "loss": 0.0189, "step": 2246 }, { "epoch": 4.27, "grad_norm": 1.1535483598709106, "learning_rate": 1.2806581721432108e-05, "loss": 0.0119, "step": 2247 }, { "epoch": 4.27, "grad_norm": 1.094956398010254, "learning_rate": 1.2800671105268599e-05, "loss": 0.0162, "step": 2248 }, { "epoch": 4.28, "grad_norm": 0.6538355350494385, "learning_rate": 1.2794759427211658e-05, "loss": 0.0087, "step": 2249 }, { "epoch": 4.28, "grad_norm": 1.8170435428619385, "learning_rate": 1.278884668950274e-05, "loss": 0.0215, "step": 2250 }, { "epoch": 4.28, "grad_norm": 1.0531749725341797, "learning_rate": 1.27829328943837e-05, "loss": 0.0227, "step": 2251 }, { "epoch": 4.28, "grad_norm": 1.1140577793121338, "learning_rate": 1.2777018044096792e-05, "loss": 0.0114, "step": 2252 }, { "epoch": 4.28, "grad_norm": 1.1306918859481812, "learning_rate": 1.2771102140884675e-05, "loss": 0.007, "step": 2253 }, { "epoch": 4.29, "grad_norm": 1.2661123275756836, "learning_rate": 1.2765185186990397e-05, "loss": 0.0135, "step": 2254 }, { "epoch": 4.29, "grad_norm": 1.1513035297393799, "learning_rate": 1.2759267184657416e-05, "loss": 0.0127, "step": 2255 }, { "epoch": 4.29, "grad_norm": 1.188882827758789, "learning_rate": 1.2753348136129584e-05, "loss": 0.0196, "step": 2256 }, { "epoch": 4.29, "grad_norm": 1.024736762046814, "learning_rate": 1.2747428043651144e-05, "loss": 0.0073, "step": 2257 }, { "epoch": 4.29, "grad_norm": 1.1915481090545654, "learning_rate": 1.2741506909466743e-05, "loss": 0.0147, "step": 2258 }, { "epoch": 4.29, "grad_norm": 1.0776464939117432, "learning_rate": 1.273558473582142e-05, "loss": 0.0108, "step": 2259 }, { "epoch": 4.3, "grad_norm": 1.1010030508041382, "learning_rate": 1.27296615249606e-05, "loss": 0.0229, "step": 2260 }, { "epoch": 4.3, "grad_norm": 1.1692205667495728, "learning_rate": 1.2723737279130115e-05, "loss": 0.0103, "step": 2261 }, { "epoch": 4.3, "grad_norm": 1.0209325551986694, "learning_rate": 1.2717812000576182e-05, "loss": 0.0077, "step": 2262 }, { "epoch": 4.3, "grad_norm": 1.9568567276000977, "learning_rate": 1.2711885691545411e-05, "loss": 0.0197, "step": 2263 }, { "epoch": 4.3, "grad_norm": 1.2962785959243774, "learning_rate": 1.2705958354284798e-05, "loss": 0.0183, "step": 2264 }, { "epoch": 4.31, "grad_norm": 0.5907105207443237, "learning_rate": 1.2700029991041738e-05, "loss": 0.0054, "step": 2265 }, { "epoch": 4.31, "grad_norm": 0.9418930411338806, "learning_rate": 1.269410060406401e-05, "loss": 0.0081, "step": 2266 }, { "epoch": 4.31, "grad_norm": 0.8157476186752319, "learning_rate": 1.2688170195599777e-05, "loss": 0.0195, "step": 2267 }, { "epoch": 4.31, "grad_norm": 1.5754177570343018, "learning_rate": 1.2682238767897597e-05, "loss": 0.0134, "step": 2268 }, { "epoch": 4.31, "grad_norm": 1.580366611480713, "learning_rate": 1.267630632320641e-05, "loss": 0.0248, "step": 2269 }, { "epoch": 4.32, "grad_norm": 1.6689666509628296, "learning_rate": 1.2670372863775544e-05, "loss": 0.039, "step": 2270 }, { "epoch": 4.32, "grad_norm": 0.9703996181488037, "learning_rate": 1.2664438391854708e-05, "loss": 0.0145, "step": 2271 }, { "epoch": 4.32, "grad_norm": 0.7858039140701294, "learning_rate": 1.2658502909694e-05, "loss": 0.0079, "step": 2272 }, { "epoch": 4.32, "grad_norm": 0.8039319515228271, "learning_rate": 1.2652566419543893e-05, "loss": 0.0117, "step": 2273 }, { "epoch": 4.32, "grad_norm": 1.2573941946029663, "learning_rate": 1.2646628923655253e-05, "loss": 0.0081, "step": 2274 }, { "epoch": 4.33, "grad_norm": 1.9370592832565308, "learning_rate": 1.264069042427932e-05, "loss": 0.0181, "step": 2275 }, { "epoch": 4.33, "grad_norm": 1.4250093698501587, "learning_rate": 1.2634750923667717e-05, "loss": 0.0106, "step": 2276 }, { "epoch": 4.33, "grad_norm": 2.6937313079833984, "learning_rate": 1.2628810424072441e-05, "loss": 0.0105, "step": 2277 }, { "epoch": 4.33, "grad_norm": 1.211914300918579, "learning_rate": 1.2622868927745881e-05, "loss": 0.0182, "step": 2278 }, { "epoch": 4.33, "grad_norm": 0.7229161262512207, "learning_rate": 1.2616926436940793e-05, "loss": 0.0111, "step": 2279 }, { "epoch": 4.33, "grad_norm": 1.7309857606887817, "learning_rate": 1.2610982953910308e-05, "loss": 0.0165, "step": 2280 }, { "epoch": 4.34, "grad_norm": 0.6578566431999207, "learning_rate": 1.2605038480907943e-05, "loss": 0.006, "step": 2281 }, { "epoch": 4.34, "grad_norm": 0.8749545812606812, "learning_rate": 1.2599093020187582e-05, "loss": 0.0083, "step": 2282 }, { "epoch": 4.34, "grad_norm": 1.2193398475646973, "learning_rate": 1.2593146574003487e-05, "loss": 0.012, "step": 2283 }, { "epoch": 4.34, "grad_norm": 1.291578769683838, "learning_rate": 1.2587199144610294e-05, "loss": 0.0184, "step": 2284 }, { "epoch": 4.34, "grad_norm": 1.1456817388534546, "learning_rate": 1.2581250734263013e-05, "loss": 0.0069, "step": 2285 }, { "epoch": 4.35, "grad_norm": 1.035452127456665, "learning_rate": 1.2575301345217023e-05, "loss": 0.01, "step": 2286 }, { "epoch": 4.35, "grad_norm": 0.5767019987106323, "learning_rate": 1.2569350979728072e-05, "loss": 0.0066, "step": 2287 }, { "epoch": 4.35, "grad_norm": 1.5913183689117432, "learning_rate": 1.2563399640052288e-05, "loss": 0.0231, "step": 2288 }, { "epoch": 4.35, "grad_norm": 0.8648266196250916, "learning_rate": 1.2557447328446154e-05, "loss": 0.0087, "step": 2289 }, { "epoch": 4.35, "grad_norm": 1.6418811082839966, "learning_rate": 1.2551494047166532e-05, "loss": 0.0265, "step": 2290 }, { "epoch": 4.36, "grad_norm": 1.208670735359192, "learning_rate": 1.2545539798470653e-05, "loss": 0.0158, "step": 2291 }, { "epoch": 4.36, "grad_norm": 1.3328157663345337, "learning_rate": 1.2539584584616108e-05, "loss": 0.0189, "step": 2292 }, { "epoch": 4.36, "grad_norm": 1.410677194595337, "learning_rate": 1.2533628407860858e-05, "loss": 0.0184, "step": 2293 }, { "epoch": 4.36, "grad_norm": 1.0210449695587158, "learning_rate": 1.2527671270463223e-05, "loss": 0.0113, "step": 2294 }, { "epoch": 4.36, "grad_norm": 1.0092321634292603, "learning_rate": 1.25217131746819e-05, "loss": 0.0201, "step": 2295 }, { "epoch": 4.37, "grad_norm": 1.8347806930541992, "learning_rate": 1.2515754122775932e-05, "loss": 0.02, "step": 2296 }, { "epoch": 4.37, "grad_norm": 0.9227219223976135, "learning_rate": 1.2509794117004739e-05, "loss": 0.0196, "step": 2297 }, { "epoch": 4.37, "grad_norm": 0.6493018865585327, "learning_rate": 1.25038331596281e-05, "loss": 0.0097, "step": 2298 }, { "epoch": 4.37, "grad_norm": 1.6889581680297852, "learning_rate": 1.2497871252906149e-05, "loss": 0.0105, "step": 2299 }, { "epoch": 4.37, "grad_norm": 1.3572280406951904, "learning_rate": 1.249190839909938e-05, "loss": 0.0188, "step": 2300 }, { "epoch": 4.37, "eval_blimp_filtered_avg": 0.7346268656716418, "eval_blimp_filtered_std": 0.00488277012244752, "step": 2300 }, { "epoch": 4.37, "eval_blimp_supplement_avg": 0.7931034482758621, "eval_blimp_supplement_std": 0.017646100160523173, "step": 2300 }, { "epoch": 4.37, "eval_vqa_filtered_avg": 0.37, "eval_vqa_filtered_std": 0.048523658709391, "step": 2300 }, { "epoch": 4.37, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 2300 }, { "epoch": 4.37, "grad_norm": 0.8938376307487488, "learning_rate": 1.2485944600468658e-05, "loss": 0.0142, "step": 2301 }, { "epoch": 4.38, "grad_norm": 0.9268003106117249, "learning_rate": 1.247997985927519e-05, "loss": 0.0124, "step": 2302 }, { "epoch": 4.38, "grad_norm": 0.9691134691238403, "learning_rate": 1.2474014177780553e-05, "loss": 0.01, "step": 2303 }, { "epoch": 4.38, "grad_norm": 1.2109153270721436, "learning_rate": 1.2468047558246674e-05, "loss": 0.013, "step": 2304 }, { "epoch": 4.38, "grad_norm": 0.5274924039840698, "learning_rate": 1.2462080002935837e-05, "loss": 0.0098, "step": 2305 }, { "epoch": 4.38, "grad_norm": 1.45637047290802, "learning_rate": 1.2456111514110679e-05, "loss": 0.03, "step": 2306 }, { "epoch": 4.39, "grad_norm": 1.066327691078186, "learning_rate": 1.2450142094034194e-05, "loss": 0.021, "step": 2307 }, { "epoch": 4.39, "grad_norm": 0.4031909108161926, "learning_rate": 1.2444171744969734e-05, "loss": 0.0047, "step": 2308 }, { "epoch": 4.39, "grad_norm": 1.076138973236084, "learning_rate": 1.2438200469180986e-05, "loss": 0.0163, "step": 2309 }, { "epoch": 4.39, "grad_norm": 0.6011402606964111, "learning_rate": 1.243222826893201e-05, "loss": 0.0066, "step": 2310 }, { "epoch": 4.39, "grad_norm": 1.1325591802597046, "learning_rate": 1.2426255146487201e-05, "loss": 0.0128, "step": 2311 }, { "epoch": 4.4, "grad_norm": 1.510998249053955, "learning_rate": 1.2420281104111312e-05, "loss": 0.0159, "step": 2312 }, { "epoch": 4.4, "grad_norm": 0.6622524857521057, "learning_rate": 1.2414306144069437e-05, "loss": 0.0116, "step": 2313 }, { "epoch": 4.4, "grad_norm": 1.5368036031723022, "learning_rate": 1.2408330268627028e-05, "loss": 0.0195, "step": 2314 }, { "epoch": 4.4, "grad_norm": 0.9089412689208984, "learning_rate": 1.2402353480049873e-05, "loss": 0.0082, "step": 2315 }, { "epoch": 4.4, "grad_norm": 0.9330781102180481, "learning_rate": 1.2396375780604118e-05, "loss": 0.0078, "step": 2316 }, { "epoch": 4.4, "grad_norm": 1.2989872694015503, "learning_rate": 1.2390397172556243e-05, "loss": 0.0216, "step": 2317 }, { "epoch": 4.41, "grad_norm": 1.6244730949401855, "learning_rate": 1.238441765817308e-05, "loss": 0.0415, "step": 2318 }, { "epoch": 4.41, "grad_norm": 0.9017387628555298, "learning_rate": 1.23784372397218e-05, "loss": 0.015, "step": 2319 }, { "epoch": 4.41, "grad_norm": 1.5028003454208374, "learning_rate": 1.2372455919469925e-05, "loss": 0.0293, "step": 2320 }, { "epoch": 4.41, "grad_norm": 1.135901927947998, "learning_rate": 1.2366473699685309e-05, "loss": 0.0131, "step": 2321 }, { "epoch": 4.41, "grad_norm": 0.32500532269477844, "learning_rate": 1.2360490582636148e-05, "loss": 0.0042, "step": 2322 }, { "epoch": 4.42, "grad_norm": 1.333357334136963, "learning_rate": 1.2354506570590992e-05, "loss": 0.0228, "step": 2323 }, { "epoch": 4.42, "grad_norm": 1.1968176364898682, "learning_rate": 1.234852166581871e-05, "loss": 0.0145, "step": 2324 }, { "epoch": 4.42, "grad_norm": 1.353279948234558, "learning_rate": 1.2342535870588522e-05, "loss": 0.0138, "step": 2325 }, { "epoch": 4.42, "grad_norm": 1.2213573455810547, "learning_rate": 1.2336549187169982e-05, "loss": 0.0367, "step": 2326 }, { "epoch": 4.42, "grad_norm": 1.2874090671539307, "learning_rate": 1.2330561617832984e-05, "loss": 0.0164, "step": 2327 }, { "epoch": 4.43, "grad_norm": 1.6808215379714966, "learning_rate": 1.2324573164847751e-05, "loss": 0.0194, "step": 2328 }, { "epoch": 4.43, "grad_norm": 1.819684386253357, "learning_rate": 1.2318583830484849e-05, "loss": 0.0231, "step": 2329 }, { "epoch": 4.43, "grad_norm": 1.0698074102401733, "learning_rate": 1.2312593617015176e-05, "loss": 0.0153, "step": 2330 }, { "epoch": 4.43, "grad_norm": 1.1849108934402466, "learning_rate": 1.2306602526709961e-05, "loss": 0.011, "step": 2331 }, { "epoch": 4.43, "grad_norm": 0.5983456373214722, "learning_rate": 1.2300610561840762e-05, "loss": 0.0116, "step": 2332 }, { "epoch": 4.44, "grad_norm": 1.106925129890442, "learning_rate": 1.229461772467948e-05, "loss": 0.0128, "step": 2333 }, { "epoch": 4.44, "grad_norm": 1.603007435798645, "learning_rate": 1.2288624017498335e-05, "loss": 0.0228, "step": 2334 }, { "epoch": 4.44, "grad_norm": 0.7140088677406311, "learning_rate": 1.2282629442569886e-05, "loss": 0.0108, "step": 2335 }, { "epoch": 4.44, "grad_norm": 1.4704846143722534, "learning_rate": 1.227663400216701e-05, "loss": 0.0298, "step": 2336 }, { "epoch": 4.44, "grad_norm": 0.823259174823761, "learning_rate": 1.2270637698562925e-05, "loss": 0.018, "step": 2337 }, { "epoch": 4.44, "grad_norm": 1.9890789985656738, "learning_rate": 1.2264640534031172e-05, "loss": 0.0228, "step": 2338 }, { "epoch": 4.45, "grad_norm": 1.7937606573104858, "learning_rate": 1.225864251084561e-05, "loss": 0.0173, "step": 2339 }, { "epoch": 4.45, "grad_norm": 1.4086416959762573, "learning_rate": 1.225264363128043e-05, "loss": 0.0109, "step": 2340 }, { "epoch": 4.45, "grad_norm": 0.8366307616233826, "learning_rate": 1.2246643897610153e-05, "loss": 0.0091, "step": 2341 }, { "epoch": 4.45, "grad_norm": 1.2884299755096436, "learning_rate": 1.2240643312109615e-05, "loss": 0.0238, "step": 2342 }, { "epoch": 4.45, "grad_norm": 0.9772794246673584, "learning_rate": 1.223464187705398e-05, "loss": 0.0203, "step": 2343 }, { "epoch": 4.46, "grad_norm": 1.3158230781555176, "learning_rate": 1.2228639594718734e-05, "loss": 0.0198, "step": 2344 }, { "epoch": 4.46, "grad_norm": 0.6672971844673157, "learning_rate": 1.222263646737968e-05, "loss": 0.0145, "step": 2345 }, { "epoch": 4.46, "grad_norm": 0.7992978096008301, "learning_rate": 1.2216632497312949e-05, "loss": 0.0152, "step": 2346 }, { "epoch": 4.46, "grad_norm": 0.8931558132171631, "learning_rate": 1.2210627686794983e-05, "loss": 0.0132, "step": 2347 }, { "epoch": 4.46, "grad_norm": 0.5343630313873291, "learning_rate": 1.2204622038102547e-05, "loss": 0.0105, "step": 2348 }, { "epoch": 4.47, "grad_norm": 0.7825436592102051, "learning_rate": 1.2198615553512726e-05, "loss": 0.0113, "step": 2349 }, { "epoch": 4.47, "grad_norm": 0.6692662239074707, "learning_rate": 1.2192608235302915e-05, "loss": 0.0089, "step": 2350 }, { "epoch": 4.47, "grad_norm": 1.2025500535964966, "learning_rate": 1.2186600085750833e-05, "loss": 0.0133, "step": 2351 }, { "epoch": 4.47, "grad_norm": 0.8077725172042847, "learning_rate": 1.2180591107134508e-05, "loss": 0.0152, "step": 2352 }, { "epoch": 4.47, "grad_norm": 1.2262694835662842, "learning_rate": 1.2174581301732289e-05, "loss": 0.017, "step": 2353 }, { "epoch": 4.48, "grad_norm": 1.0347033739089966, "learning_rate": 1.216857067182283e-05, "loss": 0.0111, "step": 2354 }, { "epoch": 4.48, "grad_norm": 1.0651127099990845, "learning_rate": 1.2162559219685104e-05, "loss": 0.0167, "step": 2355 }, { "epoch": 4.48, "grad_norm": 0.6443724036216736, "learning_rate": 1.2156546947598393e-05, "loss": 0.0068, "step": 2356 }, { "epoch": 4.48, "grad_norm": 1.2785027027130127, "learning_rate": 1.2150533857842294e-05, "loss": 0.0191, "step": 2357 }, { "epoch": 4.48, "grad_norm": 1.9944998025894165, "learning_rate": 1.2144519952696707e-05, "loss": 0.0264, "step": 2358 }, { "epoch": 4.48, "grad_norm": 2.014105796813965, "learning_rate": 1.2138505234441846e-05, "loss": 0.0209, "step": 2359 }, { "epoch": 4.49, "grad_norm": 1.7945935726165771, "learning_rate": 1.2132489705358234e-05, "loss": 0.023, "step": 2360 }, { "epoch": 4.49, "grad_norm": 0.6177988052368164, "learning_rate": 1.2126473367726697e-05, "loss": 0.0129, "step": 2361 }, { "epoch": 4.49, "grad_norm": 0.7657280564308167, "learning_rate": 1.2120456223828371e-05, "loss": 0.0107, "step": 2362 }, { "epoch": 4.49, "grad_norm": 1.1159803867340088, "learning_rate": 1.2114438275944697e-05, "loss": 0.0221, "step": 2363 }, { "epoch": 4.49, "grad_norm": 0.8661801218986511, "learning_rate": 1.2108419526357422e-05, "loss": 0.01, "step": 2364 }, { "epoch": 4.5, "grad_norm": 0.7897683382034302, "learning_rate": 1.2102399977348592e-05, "loss": 0.0112, "step": 2365 }, { "epoch": 4.5, "grad_norm": 0.9803110361099243, "learning_rate": 1.2096379631200562e-05, "loss": 0.0187, "step": 2366 }, { "epoch": 4.5, "grad_norm": 1.3702807426452637, "learning_rate": 1.209035849019599e-05, "loss": 0.0127, "step": 2367 }, { "epoch": 4.5, "grad_norm": 1.1137715578079224, "learning_rate": 1.2084336556617826e-05, "loss": 0.0116, "step": 2368 }, { "epoch": 4.5, "grad_norm": 0.5187825560569763, "learning_rate": 1.2078313832749336e-05, "loss": 0.0081, "step": 2369 }, { "epoch": 4.51, "grad_norm": 1.0253242254257202, "learning_rate": 1.2072290320874067e-05, "loss": 0.0113, "step": 2370 }, { "epoch": 4.51, "grad_norm": 1.973004937171936, "learning_rate": 1.2066266023275881e-05, "loss": 0.022, "step": 2371 }, { "epoch": 4.51, "grad_norm": 1.4304826259613037, "learning_rate": 1.2060240942238928e-05, "loss": 0.0158, "step": 2372 }, { "epoch": 4.51, "grad_norm": 1.6053792238235474, "learning_rate": 1.205421508004766e-05, "loss": 0.018, "step": 2373 }, { "epoch": 4.51, "grad_norm": 0.5130646824836731, "learning_rate": 1.204818843898682e-05, "loss": 0.0052, "step": 2374 }, { "epoch": 4.52, "grad_norm": 0.7907960414886475, "learning_rate": 1.2042161021341454e-05, "loss": 0.0089, "step": 2375 }, { "epoch": 4.52, "grad_norm": 0.763360321521759, "learning_rate": 1.2036132829396895e-05, "loss": 0.0149, "step": 2376 }, { "epoch": 4.52, "grad_norm": 0.978381335735321, "learning_rate": 1.2030103865438778e-05, "loss": 0.0081, "step": 2377 }, { "epoch": 4.52, "grad_norm": 1.5516126155853271, "learning_rate": 1.2024074131753018e-05, "loss": 0.0175, "step": 2378 }, { "epoch": 4.52, "grad_norm": 0.9485304355621338, "learning_rate": 1.2018043630625835e-05, "loss": 0.0214, "step": 2379 }, { "epoch": 4.52, "grad_norm": 0.970927894115448, "learning_rate": 1.2012012364343735e-05, "loss": 0.0114, "step": 2380 }, { "epoch": 4.53, "grad_norm": 1.1175932884216309, "learning_rate": 1.2005980335193507e-05, "loss": 0.0207, "step": 2381 }, { "epoch": 4.53, "grad_norm": 0.7195645570755005, "learning_rate": 1.1999947545462242e-05, "loss": 0.0122, "step": 2382 }, { "epoch": 4.53, "grad_norm": 0.9136951565742493, "learning_rate": 1.1993913997437314e-05, "loss": 0.009, "step": 2383 }, { "epoch": 4.53, "grad_norm": 1.1337944269180298, "learning_rate": 1.1987879693406378e-05, "loss": 0.0149, "step": 2384 }, { "epoch": 4.53, "grad_norm": 0.768735945224762, "learning_rate": 1.1981844635657388e-05, "loss": 0.0071, "step": 2385 }, { "epoch": 4.54, "grad_norm": 1.6612995862960815, "learning_rate": 1.1975808826478568e-05, "loss": 0.0194, "step": 2386 }, { "epoch": 4.54, "grad_norm": 0.29878246784210205, "learning_rate": 1.1969772268158444e-05, "loss": 0.0036, "step": 2387 }, { "epoch": 4.54, "grad_norm": 1.020808219909668, "learning_rate": 1.1963734962985812e-05, "loss": 0.0116, "step": 2388 }, { "epoch": 4.54, "grad_norm": 1.010118007659912, "learning_rate": 1.1957696913249761e-05, "loss": 0.0088, "step": 2389 }, { "epoch": 4.54, "grad_norm": 0.9575981497764587, "learning_rate": 1.195165812123966e-05, "loss": 0.013, "step": 2390 }, { "epoch": 4.55, "grad_norm": 1.4687076807022095, "learning_rate": 1.1945618589245152e-05, "loss": 0.0226, "step": 2391 }, { "epoch": 4.55, "grad_norm": 0.7090831398963928, "learning_rate": 1.1939578319556173e-05, "loss": 0.0086, "step": 2392 }, { "epoch": 4.55, "grad_norm": 1.2885676622390747, "learning_rate": 1.1933537314462928e-05, "loss": 0.0155, "step": 2393 }, { "epoch": 4.55, "grad_norm": 1.3992488384246826, "learning_rate": 1.1927495576255908e-05, "loss": 0.0264, "step": 2394 }, { "epoch": 4.55, "grad_norm": 0.9759151935577393, "learning_rate": 1.1921453107225878e-05, "loss": 0.0126, "step": 2395 }, { "epoch": 4.56, "grad_norm": 1.1386523246765137, "learning_rate": 1.1915409909663877e-05, "loss": 0.0195, "step": 2396 }, { "epoch": 4.56, "grad_norm": 0.6335881352424622, "learning_rate": 1.190936598586123e-05, "loss": 0.003, "step": 2397 }, { "epoch": 4.56, "grad_norm": 0.9257863759994507, "learning_rate": 1.1903321338109527e-05, "loss": 0.0086, "step": 2398 }, { "epoch": 4.56, "grad_norm": 0.8542953133583069, "learning_rate": 1.1897275968700642e-05, "loss": 0.0094, "step": 2399 }, { "epoch": 4.56, "grad_norm": 0.7049980759620667, "learning_rate": 1.1891229879926716e-05, "loss": 0.0084, "step": 2400 }, { "epoch": 4.56, "eval_blimp_filtered_avg": 0.7361194029850746, "eval_blimp_filtered_std": 0.00487935872453057, "step": 2400 }, { "epoch": 4.56, "eval_blimp_supplement_avg": 0.7866379310344828, "eval_blimp_supplement_std": 0.017761470046014704, "step": 2400 }, { "epoch": 4.56, "eval_vqa_filtered_avg": 0.35, "eval_vqa_filtered_std": 0.0479372485441102, "step": 2400 }, { "epoch": 4.56, "eval_winoground_filtered_avg": 0.47, "eval_winoground_filtered_std": 0.05016135580465919, "step": 2400 }, { "epoch": 4.56, "grad_norm": 0.9520881772041321, "learning_rate": 1.188518307408016e-05, "loss": 0.0202, "step": 2401 }, { "epoch": 4.57, "grad_norm": 1.4669376611709595, "learning_rate": 1.1879135553453667e-05, "loss": 0.0135, "step": 2402 }, { "epoch": 4.57, "grad_norm": 0.8741647601127625, "learning_rate": 1.1873087320340196e-05, "loss": 0.0209, "step": 2403 }, { "epoch": 4.57, "grad_norm": 1.5621390342712402, "learning_rate": 1.186703837703297e-05, "loss": 0.0141, "step": 2404 }, { "epoch": 4.57, "grad_norm": 0.4446950852870941, "learning_rate": 1.1860988725825491e-05, "loss": 0.0069, "step": 2405 }, { "epoch": 4.57, "grad_norm": 0.9403313994407654, "learning_rate": 1.1854938369011524e-05, "loss": 0.0123, "step": 2406 }, { "epoch": 4.58, "grad_norm": 1.399749994277954, "learning_rate": 1.1848887308885105e-05, "loss": 0.02, "step": 2407 }, { "epoch": 4.58, "grad_norm": 1.0567775964736938, "learning_rate": 1.1842835547740532e-05, "loss": 0.0122, "step": 2408 }, { "epoch": 4.58, "grad_norm": 1.2850054502487183, "learning_rate": 1.1836783087872372e-05, "loss": 0.0245, "step": 2409 }, { "epoch": 4.58, "grad_norm": 0.7780632376670837, "learning_rate": 1.1830729931575456e-05, "loss": 0.0144, "step": 2410 }, { "epoch": 4.58, "grad_norm": 0.7088937759399414, "learning_rate": 1.1824676081144877e-05, "loss": 0.0054, "step": 2411 }, { "epoch": 4.59, "grad_norm": 1.1329115629196167, "learning_rate": 1.1818621538875998e-05, "loss": 0.0169, "step": 2412 }, { "epoch": 4.59, "grad_norm": 1.039542555809021, "learning_rate": 1.1812566307064437e-05, "loss": 0.0102, "step": 2413 }, { "epoch": 4.59, "grad_norm": 0.7765316367149353, "learning_rate": 1.1806510388006074e-05, "loss": 0.009, "step": 2414 }, { "epoch": 4.59, "grad_norm": 0.7796841263771057, "learning_rate": 1.1800453783997057e-05, "loss": 0.0079, "step": 2415 }, { "epoch": 4.59, "grad_norm": 0.9588986039161682, "learning_rate": 1.1794396497333786e-05, "loss": 0.0111, "step": 2416 }, { "epoch": 4.6, "grad_norm": 1.0790221691131592, "learning_rate": 1.1788338530312921e-05, "loss": 0.0158, "step": 2417 }, { "epoch": 4.6, "grad_norm": 0.9277145266532898, "learning_rate": 1.1782279885231385e-05, "loss": 0.0174, "step": 2418 }, { "epoch": 4.6, "grad_norm": 1.8076763153076172, "learning_rate": 1.177622056438635e-05, "loss": 0.0223, "step": 2419 }, { "epoch": 4.6, "grad_norm": 1.127877950668335, "learning_rate": 1.1770160570075249e-05, "loss": 0.0134, "step": 2420 }, { "epoch": 4.6, "grad_norm": 0.6198025941848755, "learning_rate": 1.1764099904595772e-05, "loss": 0.0081, "step": 2421 }, { "epoch": 4.6, "grad_norm": 1.5980852842330933, "learning_rate": 1.175803857024586e-05, "loss": 0.0273, "step": 2422 }, { "epoch": 4.61, "grad_norm": 0.8625930547714233, "learning_rate": 1.175197656932371e-05, "loss": 0.0093, "step": 2423 }, { "epoch": 4.61, "grad_norm": 1.1489616632461548, "learning_rate": 1.174591390412777e-05, "loss": 0.0214, "step": 2424 }, { "epoch": 4.61, "grad_norm": 0.9209426641464233, "learning_rate": 1.1739850576956742e-05, "loss": 0.0123, "step": 2425 }, { "epoch": 4.61, "grad_norm": 1.017325758934021, "learning_rate": 1.1733786590109576e-05, "loss": 0.0103, "step": 2426 }, { "epoch": 4.61, "grad_norm": 0.9573784470558167, "learning_rate": 1.1727721945885474e-05, "loss": 0.012, "step": 2427 }, { "epoch": 4.62, "grad_norm": 0.902164876461029, "learning_rate": 1.1721656646583886e-05, "loss": 0.0155, "step": 2428 }, { "epoch": 4.62, "grad_norm": 1.0715874433517456, "learning_rate": 1.1715590694504515e-05, "loss": 0.0127, "step": 2429 }, { "epoch": 4.62, "grad_norm": 0.8542826175689697, "learning_rate": 1.1709524091947304e-05, "loss": 0.0085, "step": 2430 }, { "epoch": 4.62, "grad_norm": 0.7307683825492859, "learning_rate": 1.1703456841212449e-05, "loss": 0.0089, "step": 2431 }, { "epoch": 4.62, "grad_norm": 0.9842017889022827, "learning_rate": 1.1697388944600385e-05, "loss": 0.0113, "step": 2432 }, { "epoch": 4.63, "grad_norm": 1.447563886642456, "learning_rate": 1.16913204044118e-05, "loss": 0.0299, "step": 2433 }, { "epoch": 4.63, "grad_norm": 1.4897927045822144, "learning_rate": 1.1685251222947622e-05, "loss": 0.0132, "step": 2434 }, { "epoch": 4.63, "grad_norm": 1.2552454471588135, "learning_rate": 1.1679181402509023e-05, "loss": 0.0204, "step": 2435 }, { "epoch": 4.63, "grad_norm": 1.0673441886901855, "learning_rate": 1.1673110945397414e-05, "loss": 0.0104, "step": 2436 }, { "epoch": 4.63, "grad_norm": 1.0226056575775146, "learning_rate": 1.166703985391445e-05, "loss": 0.0108, "step": 2437 }, { "epoch": 4.63, "grad_norm": 1.5819756984710693, "learning_rate": 1.166096813036203e-05, "loss": 0.0182, "step": 2438 }, { "epoch": 4.64, "grad_norm": 0.8554696440696716, "learning_rate": 1.1654895777042285e-05, "loss": 0.0078, "step": 2439 }, { "epoch": 4.64, "grad_norm": 1.0414880514144897, "learning_rate": 1.164882279625759e-05, "loss": 0.0142, "step": 2440 }, { "epoch": 4.64, "grad_norm": 0.7936756610870361, "learning_rate": 1.164274919031056e-05, "loss": 0.0089, "step": 2441 }, { "epoch": 4.64, "grad_norm": 0.5108723640441895, "learning_rate": 1.163667496150404e-05, "loss": 0.0052, "step": 2442 }, { "epoch": 4.64, "grad_norm": 1.6410350799560547, "learning_rate": 1.1630600112141113e-05, "loss": 0.0114, "step": 2443 }, { "epoch": 4.65, "grad_norm": 0.6573932766914368, "learning_rate": 1.1624524644525107e-05, "loss": 0.0062, "step": 2444 }, { "epoch": 4.65, "grad_norm": 0.9852991700172424, "learning_rate": 1.1618448560959572e-05, "loss": 0.0146, "step": 2445 }, { "epoch": 4.65, "grad_norm": 0.515960156917572, "learning_rate": 1.1612371863748295e-05, "loss": 0.0052, "step": 2446 }, { "epoch": 4.65, "grad_norm": 0.6017852425575256, "learning_rate": 1.1606294555195297e-05, "loss": 0.006, "step": 2447 }, { "epoch": 4.65, "grad_norm": 0.7619339227676392, "learning_rate": 1.1600216637604835e-05, "loss": 0.0058, "step": 2448 }, { "epoch": 4.66, "grad_norm": 1.4468237161636353, "learning_rate": 1.159413811328139e-05, "loss": 0.0103, "step": 2449 }, { "epoch": 4.66, "grad_norm": 1.0745768547058105, "learning_rate": 1.1588058984529673e-05, "loss": 0.0086, "step": 2450 }, { "epoch": 4.66, "grad_norm": 1.517622470855713, "learning_rate": 1.1581979253654632e-05, "loss": 0.0232, "step": 2451 }, { "epoch": 4.66, "grad_norm": 1.3080182075500488, "learning_rate": 1.1575898922961435e-05, "loss": 0.0176, "step": 2452 }, { "epoch": 4.66, "grad_norm": 1.2387288808822632, "learning_rate": 1.1569817994755482e-05, "loss": 0.0187, "step": 2453 }, { "epoch": 4.67, "grad_norm": 1.232553243637085, "learning_rate": 1.1563736471342394e-05, "loss": 0.0133, "step": 2454 }, { "epoch": 4.67, "grad_norm": 2.984924793243408, "learning_rate": 1.155765435502803e-05, "loss": 0.0489, "step": 2455 }, { "epoch": 4.67, "grad_norm": 0.7284197211265564, "learning_rate": 1.1551571648118456e-05, "loss": 0.0091, "step": 2456 }, { "epoch": 4.67, "grad_norm": 0.8831286430358887, "learning_rate": 1.154548835291998e-05, "loss": 0.0259, "step": 2457 }, { "epoch": 4.67, "grad_norm": 0.6806700825691223, "learning_rate": 1.1539404471739119e-05, "loss": 0.0086, "step": 2458 }, { "epoch": 4.67, "grad_norm": 0.8711144328117371, "learning_rate": 1.1533320006882622e-05, "loss": 0.0166, "step": 2459 }, { "epoch": 4.68, "grad_norm": 0.6691437363624573, "learning_rate": 1.152723496065745e-05, "loss": 0.0035, "step": 2460 }, { "epoch": 4.68, "grad_norm": 1.14626944065094, "learning_rate": 1.1521149335370794e-05, "loss": 0.0158, "step": 2461 }, { "epoch": 4.68, "grad_norm": 0.8248893022537231, "learning_rate": 1.1515063133330058e-05, "loss": 0.0126, "step": 2462 }, { "epoch": 4.68, "grad_norm": 0.8851649761199951, "learning_rate": 1.1508976356842867e-05, "loss": 0.0115, "step": 2463 }, { "epoch": 4.68, "grad_norm": 2.9352810382843018, "learning_rate": 1.1502889008217064e-05, "loss": 0.0263, "step": 2464 }, { "epoch": 4.69, "grad_norm": 0.8945159316062927, "learning_rate": 1.1496801089760709e-05, "loss": 0.0096, "step": 2465 }, { "epoch": 4.69, "grad_norm": 1.2701833248138428, "learning_rate": 1.1490712603782072e-05, "loss": 0.0239, "step": 2466 }, { "epoch": 4.69, "grad_norm": 0.9095029234886169, "learning_rate": 1.1484623552589653e-05, "loss": 0.007, "step": 2467 }, { "epoch": 4.69, "grad_norm": 1.4001100063323975, "learning_rate": 1.1478533938492154e-05, "loss": 0.0322, "step": 2468 }, { "epoch": 4.69, "grad_norm": 0.46652281284332275, "learning_rate": 1.1472443763798487e-05, "loss": 0.0063, "step": 2469 }, { "epoch": 4.7, "grad_norm": 1.2373088598251343, "learning_rate": 1.1466353030817792e-05, "loss": 0.0208, "step": 2470 }, { "epoch": 4.7, "grad_norm": 1.4272174835205078, "learning_rate": 1.1460261741859407e-05, "loss": 0.0213, "step": 2471 }, { "epoch": 4.7, "grad_norm": 0.9121415019035339, "learning_rate": 1.1454169899232885e-05, "loss": 0.0072, "step": 2472 }, { "epoch": 4.7, "grad_norm": 0.8308503031730652, "learning_rate": 1.1448077505247989e-05, "loss": 0.0119, "step": 2473 }, { "epoch": 4.7, "grad_norm": 0.9290250539779663, "learning_rate": 1.1441984562214694e-05, "loss": 0.0131, "step": 2474 }, { "epoch": 4.71, "grad_norm": 1.0032551288604736, "learning_rate": 1.1435891072443181e-05, "loss": 0.0078, "step": 2475 }, { "epoch": 4.71, "grad_norm": 0.977259635925293, "learning_rate": 1.1429797038243838e-05, "loss": 0.0102, "step": 2476 }, { "epoch": 4.71, "grad_norm": 1.203691005706787, "learning_rate": 1.1423702461927255e-05, "loss": 0.0141, "step": 2477 }, { "epoch": 4.71, "grad_norm": 0.9489505290985107, "learning_rate": 1.1417607345804238e-05, "loss": 0.0073, "step": 2478 }, { "epoch": 4.71, "grad_norm": 1.434558391571045, "learning_rate": 1.1411511692185784e-05, "loss": 0.0242, "step": 2479 }, { "epoch": 4.71, "grad_norm": 0.7467774152755737, "learning_rate": 1.1405415503383109e-05, "loss": 0.0094, "step": 2480 }, { "epoch": 4.72, "grad_norm": 1.1541870832443237, "learning_rate": 1.139931878170762e-05, "loss": 0.01, "step": 2481 }, { "epoch": 4.72, "grad_norm": 0.5372596383094788, "learning_rate": 1.1393221529470931e-05, "loss": 0.0084, "step": 2482 }, { "epoch": 4.72, "grad_norm": 0.8411925435066223, "learning_rate": 1.1387123748984856e-05, "loss": 0.008, "step": 2483 }, { "epoch": 4.72, "grad_norm": 1.0870981216430664, "learning_rate": 1.1381025442561415e-05, "loss": 0.0202, "step": 2484 }, { "epoch": 4.72, "grad_norm": 0.5471855998039246, "learning_rate": 1.1374926612512815e-05, "loss": 0.0046, "step": 2485 }, { "epoch": 4.73, "grad_norm": 0.9199904799461365, "learning_rate": 1.1368827261151473e-05, "loss": 0.0113, "step": 2486 }, { "epoch": 4.73, "grad_norm": 1.0369184017181396, "learning_rate": 1.1362727390789998e-05, "loss": 0.0076, "step": 2487 }, { "epoch": 4.73, "grad_norm": 2.2338039875030518, "learning_rate": 1.1356627003741198e-05, "loss": 0.0453, "step": 2488 }, { "epoch": 4.73, "grad_norm": 0.8560524582862854, "learning_rate": 1.1350526102318072e-05, "loss": 0.0106, "step": 2489 }, { "epoch": 4.73, "grad_norm": 2.3237197399139404, "learning_rate": 1.1344424688833823e-05, "loss": 0.0206, "step": 2490 }, { "epoch": 4.74, "grad_norm": 0.5359383821487427, "learning_rate": 1.1338322765601846e-05, "loss": 0.0103, "step": 2491 }, { "epoch": 4.74, "grad_norm": 0.8362687230110168, "learning_rate": 1.1332220334935716e-05, "loss": 0.009, "step": 2492 }, { "epoch": 4.74, "grad_norm": 0.9607349634170532, "learning_rate": 1.1326117399149221e-05, "loss": 0.0117, "step": 2493 }, { "epoch": 4.74, "grad_norm": 1.7365549802780151, "learning_rate": 1.1320013960556327e-05, "loss": 0.0323, "step": 2494 }, { "epoch": 4.74, "grad_norm": 1.0343732833862305, "learning_rate": 1.1313910021471193e-05, "loss": 0.0139, "step": 2495 }, { "epoch": 4.75, "grad_norm": 0.492567777633667, "learning_rate": 1.1307805584208167e-05, "loss": 0.0052, "step": 2496 }, { "epoch": 4.75, "grad_norm": 0.48145633935928345, "learning_rate": 1.1301700651081794e-05, "loss": 0.0038, "step": 2497 }, { "epoch": 4.75, "grad_norm": 0.8804132342338562, "learning_rate": 1.1295595224406797e-05, "loss": 0.0083, "step": 2498 }, { "epoch": 4.75, "grad_norm": 0.5639855861663818, "learning_rate": 1.1289489306498092e-05, "loss": 0.0089, "step": 2499 }, { "epoch": 4.75, "grad_norm": 1.3099236488342285, "learning_rate": 1.1283382899670773e-05, "loss": 0.0177, "step": 2500 }, { "epoch": 4.75, "eval_blimp_filtered_avg": 0.7364179104477612, "eval_blimp_filtered_std": 0.004836652803238828, "step": 2500 }, { "epoch": 4.75, "eval_blimp_supplement_avg": 0.7823275862068966, "eval_blimp_supplement_std": 0.017806353984061274, "step": 2500 }, { "epoch": 4.75, "eval_vqa_filtered_avg": 0.38, "eval_vqa_filtered_std": 0.04878317312145633, "step": 2500 }, { "epoch": 4.75, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 2500 }, { "epoch": 4.75, "grad_norm": 1.0698009729385376, "learning_rate": 1.1277276006240135e-05, "loss": 0.0151, "step": 2501 }, { "epoch": 4.76, "grad_norm": 1.130595088005066, "learning_rate": 1.1271168628521636e-05, "loss": 0.0098, "step": 2502 }, { "epoch": 4.76, "grad_norm": 0.9644403457641602, "learning_rate": 1.1265060768830943e-05, "loss": 0.0113, "step": 2503 }, { "epoch": 4.76, "grad_norm": 0.940203070640564, "learning_rate": 1.1258952429483882e-05, "loss": 0.0069, "step": 2504 }, { "epoch": 4.76, "grad_norm": 1.0183829069137573, "learning_rate": 1.1252843612796476e-05, "loss": 0.0129, "step": 2505 }, { "epoch": 4.76, "grad_norm": 0.8281590938568115, "learning_rate": 1.1246734321084925e-05, "loss": 0.0071, "step": 2506 }, { "epoch": 4.77, "grad_norm": 1.438960075378418, "learning_rate": 1.1240624556665605e-05, "loss": 0.0156, "step": 2507 }, { "epoch": 4.77, "grad_norm": 1.8981847763061523, "learning_rate": 1.1234514321855078e-05, "loss": 0.0283, "step": 2508 }, { "epoch": 4.77, "grad_norm": 1.3760091066360474, "learning_rate": 1.1228403618970079e-05, "loss": 0.0086, "step": 2509 }, { "epoch": 4.77, "grad_norm": 0.7929789423942566, "learning_rate": 1.1222292450327524e-05, "loss": 0.0089, "step": 2510 }, { "epoch": 4.77, "grad_norm": 0.6908947825431824, "learning_rate": 1.12161808182445e-05, "loss": 0.0068, "step": 2511 }, { "epoch": 4.78, "grad_norm": 1.5067059993743896, "learning_rate": 1.1210068725038277e-05, "loss": 0.0219, "step": 2512 }, { "epoch": 4.78, "grad_norm": 0.9827744960784912, "learning_rate": 1.1203956173026298e-05, "loss": 0.0121, "step": 2513 }, { "epoch": 4.78, "grad_norm": 1.2070138454437256, "learning_rate": 1.1197843164526174e-05, "loss": 0.0219, "step": 2514 }, { "epoch": 4.78, "grad_norm": 2.1584227085113525, "learning_rate": 1.1191729701855696e-05, "loss": 0.0166, "step": 2515 }, { "epoch": 4.78, "grad_norm": 0.7772582173347473, "learning_rate": 1.1185615787332826e-05, "loss": 0.0056, "step": 2516 }, { "epoch": 4.79, "grad_norm": 1.1300486326217651, "learning_rate": 1.1179501423275697e-05, "loss": 0.007, "step": 2517 }, { "epoch": 4.79, "grad_norm": 1.4299899339675903, "learning_rate": 1.1173386612002606e-05, "loss": 0.02, "step": 2518 }, { "epoch": 4.79, "grad_norm": 1.2177149057388306, "learning_rate": 1.1167271355832032e-05, "loss": 0.0126, "step": 2519 }, { "epoch": 4.79, "grad_norm": 1.981110692024231, "learning_rate": 1.1161155657082611e-05, "loss": 0.0257, "step": 2520 }, { "epoch": 4.79, "grad_norm": 0.7783499956130981, "learning_rate": 1.1155039518073156e-05, "loss": 0.008, "step": 2521 }, { "epoch": 4.79, "grad_norm": 0.9505921006202698, "learning_rate": 1.1148922941122637e-05, "loss": 0.0071, "step": 2522 }, { "epoch": 4.8, "grad_norm": 1.6100314855575562, "learning_rate": 1.11428059285502e-05, "loss": 0.0178, "step": 2523 }, { "epoch": 4.8, "grad_norm": 1.101693034172058, "learning_rate": 1.113668848267515e-05, "loss": 0.0117, "step": 2524 }, { "epoch": 4.8, "grad_norm": 1.9672901630401611, "learning_rate": 1.1130570605816957e-05, "loss": 0.025, "step": 2525 }, { "epoch": 4.8, "grad_norm": 1.5686448812484741, "learning_rate": 1.1124452300295257e-05, "loss": 0.0214, "step": 2526 }, { "epoch": 4.8, "grad_norm": 0.5055027008056641, "learning_rate": 1.1118333568429849e-05, "loss": 0.0057, "step": 2527 }, { "epoch": 4.81, "grad_norm": 1.685437798500061, "learning_rate": 1.1112214412540685e-05, "loss": 0.0204, "step": 2528 }, { "epoch": 4.81, "grad_norm": 0.8063406348228455, "learning_rate": 1.1106094834947892e-05, "loss": 0.0059, "step": 2529 }, { "epoch": 4.81, "grad_norm": 0.6020455956459045, "learning_rate": 1.1099974837971746e-05, "loss": 0.0082, "step": 2530 }, { "epoch": 4.81, "grad_norm": 1.2323620319366455, "learning_rate": 1.1093854423932683e-05, "loss": 0.0258, "step": 2531 }, { "epoch": 4.81, "grad_norm": 0.9180220365524292, "learning_rate": 1.1087733595151306e-05, "loss": 0.0094, "step": 2532 }, { "epoch": 4.82, "grad_norm": 1.5568115711212158, "learning_rate": 1.1081612353948364e-05, "loss": 0.0157, "step": 2533 }, { "epoch": 4.82, "grad_norm": 1.360135793685913, "learning_rate": 1.1075490702644764e-05, "loss": 0.0085, "step": 2534 }, { "epoch": 4.82, "grad_norm": 1.1570833921432495, "learning_rate": 1.1069368643561578e-05, "loss": 0.0157, "step": 2535 }, { "epoch": 4.82, "grad_norm": 1.1029887199401855, "learning_rate": 1.1063246179020022e-05, "loss": 0.0123, "step": 2536 }, { "epoch": 4.82, "grad_norm": 1.3283939361572266, "learning_rate": 1.1057123311341474e-05, "loss": 0.0143, "step": 2537 }, { "epoch": 4.83, "grad_norm": 1.1190470457077026, "learning_rate": 1.1051000042847453e-05, "loss": 0.0122, "step": 2538 }, { "epoch": 4.83, "grad_norm": 1.2720216512680054, "learning_rate": 1.1044876375859648e-05, "loss": 0.0195, "step": 2539 }, { "epoch": 4.83, "grad_norm": 1.2524040937423706, "learning_rate": 1.1038752312699884e-05, "loss": 0.0153, "step": 2540 }, { "epoch": 4.83, "grad_norm": 0.904992401599884, "learning_rate": 1.1032627855690136e-05, "loss": 0.0138, "step": 2541 }, { "epoch": 4.83, "grad_norm": 0.8552100658416748, "learning_rate": 1.1026503007152543e-05, "loss": 0.0114, "step": 2542 }, { "epoch": 4.83, "grad_norm": 1.1291240453720093, "learning_rate": 1.1020377769409377e-05, "loss": 0.0212, "step": 2543 }, { "epoch": 4.84, "grad_norm": 1.4021514654159546, "learning_rate": 1.1014252144783061e-05, "loss": 0.0193, "step": 2544 }, { "epoch": 4.84, "grad_norm": 2.1059153079986572, "learning_rate": 1.1008126135596175e-05, "loss": 0.0358, "step": 2545 }, { "epoch": 4.84, "grad_norm": 0.6396322846412659, "learning_rate": 1.1001999744171432e-05, "loss": 0.0062, "step": 2546 }, { "epoch": 4.84, "grad_norm": 0.9502802491188049, "learning_rate": 1.0995872972831693e-05, "loss": 0.0152, "step": 2547 }, { "epoch": 4.84, "grad_norm": 0.9562377333641052, "learning_rate": 1.0989745823899968e-05, "loss": 0.0201, "step": 2548 }, { "epoch": 4.85, "grad_norm": 1.0996085405349731, "learning_rate": 1.0983618299699408e-05, "loss": 0.0118, "step": 2549 }, { "epoch": 4.85, "grad_norm": 1.0094801187515259, "learning_rate": 1.0977490402553301e-05, "loss": 0.0186, "step": 2550 }, { "epoch": 4.85, "grad_norm": 1.0446648597717285, "learning_rate": 1.0971362134785082e-05, "loss": 0.0166, "step": 2551 }, { "epoch": 4.85, "grad_norm": 0.7413753271102905, "learning_rate": 1.096523349871833e-05, "loss": 0.0076, "step": 2552 }, { "epoch": 4.85, "grad_norm": 0.9325522780418396, "learning_rate": 1.0959104496676753e-05, "loss": 0.0145, "step": 2553 }, { "epoch": 4.86, "grad_norm": 0.8828870058059692, "learning_rate": 1.095297513098421e-05, "loss": 0.0093, "step": 2554 }, { "epoch": 4.86, "grad_norm": 1.4980043172836304, "learning_rate": 1.0946845403964683e-05, "loss": 0.0177, "step": 2555 }, { "epoch": 4.86, "grad_norm": 0.9652317762374878, "learning_rate": 1.0940715317942307e-05, "loss": 0.0148, "step": 2556 }, { "epoch": 4.86, "grad_norm": 1.1144206523895264, "learning_rate": 1.0934584875241343e-05, "loss": 0.0168, "step": 2557 }, { "epoch": 4.86, "grad_norm": 1.6368426084518433, "learning_rate": 1.0928454078186186e-05, "loss": 0.0302, "step": 2558 }, { "epoch": 4.87, "grad_norm": 0.4328669011592865, "learning_rate": 1.0922322929101377e-05, "loss": 0.0071, "step": 2559 }, { "epoch": 4.87, "grad_norm": 1.0976009368896484, "learning_rate": 1.0916191430311576e-05, "loss": 0.0197, "step": 2560 }, { "epoch": 4.87, "grad_norm": 1.362871766090393, "learning_rate": 1.0910059584141587e-05, "loss": 0.0203, "step": 2561 }, { "epoch": 4.87, "grad_norm": 0.8734797239303589, "learning_rate": 1.0903927392916336e-05, "loss": 0.0175, "step": 2562 }, { "epoch": 4.87, "grad_norm": 1.0251175165176392, "learning_rate": 1.089779485896089e-05, "loss": 0.0098, "step": 2563 }, { "epoch": 4.87, "grad_norm": 0.9058516621589661, "learning_rate": 1.0891661984600437e-05, "loss": 0.0101, "step": 2564 }, { "epoch": 4.88, "grad_norm": 1.16253662109375, "learning_rate": 1.0885528772160303e-05, "loss": 0.0129, "step": 2565 }, { "epoch": 4.88, "grad_norm": 1.0329480171203613, "learning_rate": 1.0879395223965932e-05, "loss": 0.0123, "step": 2566 }, { "epoch": 4.88, "grad_norm": 1.4211900234222412, "learning_rate": 1.0873261342342902e-05, "loss": 0.0219, "step": 2567 }, { "epoch": 4.88, "grad_norm": 1.0445141792297363, "learning_rate": 1.0867127129616917e-05, "loss": 0.0171, "step": 2568 }, { "epoch": 4.88, "grad_norm": 1.2101994752883911, "learning_rate": 1.0860992588113803e-05, "loss": 0.0238, "step": 2569 }, { "epoch": 4.89, "grad_norm": 0.9544209837913513, "learning_rate": 1.0854857720159515e-05, "loss": 0.0152, "step": 2570 }, { "epoch": 4.89, "grad_norm": 0.8582392930984497, "learning_rate": 1.0848722528080125e-05, "loss": 0.0079, "step": 2571 }, { "epoch": 4.89, "grad_norm": 1.3767743110656738, "learning_rate": 1.084258701420184e-05, "loss": 0.0249, "step": 2572 }, { "epoch": 4.89, "grad_norm": 0.8016901016235352, "learning_rate": 1.0836451180850978e-05, "loss": 0.013, "step": 2573 }, { "epoch": 4.89, "grad_norm": 0.9638240337371826, "learning_rate": 1.083031503035398e-05, "loss": 0.0124, "step": 2574 }, { "epoch": 4.9, "grad_norm": 1.3311415910720825, "learning_rate": 1.0824178565037414e-05, "loss": 0.0154, "step": 2575 }, { "epoch": 4.9, "grad_norm": 1.3612053394317627, "learning_rate": 1.0818041787227958e-05, "loss": 0.0151, "step": 2576 }, { "epoch": 4.9, "grad_norm": 0.6357603073120117, "learning_rate": 1.0811904699252415e-05, "loss": 0.0092, "step": 2577 }, { "epoch": 4.9, "grad_norm": 0.9088477492332458, "learning_rate": 1.0805767303437702e-05, "loss": 0.0138, "step": 2578 }, { "epoch": 4.9, "grad_norm": 1.2040385007858276, "learning_rate": 1.0799629602110857e-05, "loss": 0.0157, "step": 2579 }, { "epoch": 4.9, "grad_norm": 1.173370122909546, "learning_rate": 1.0793491597599027e-05, "loss": 0.0104, "step": 2580 }, { "epoch": 4.91, "grad_norm": 1.1180951595306396, "learning_rate": 1.0787353292229478e-05, "loss": 0.0131, "step": 2581 }, { "epoch": 4.91, "grad_norm": 1.034949541091919, "learning_rate": 1.0781214688329598e-05, "loss": 0.0182, "step": 2582 }, { "epoch": 4.91, "grad_norm": 0.9150624871253967, "learning_rate": 1.0775075788226871e-05, "loss": 0.0162, "step": 2583 }, { "epoch": 4.91, "grad_norm": 0.6998805999755859, "learning_rate": 1.0768936594248905e-05, "loss": 0.0151, "step": 2584 }, { "epoch": 4.91, "grad_norm": 0.9785563945770264, "learning_rate": 1.076279710872342e-05, "loss": 0.0089, "step": 2585 }, { "epoch": 4.92, "grad_norm": 0.7643977403640747, "learning_rate": 1.0756657333978243e-05, "loss": 0.0117, "step": 2586 }, { "epoch": 4.92, "grad_norm": 1.2569513320922852, "learning_rate": 1.0750517272341306e-05, "loss": 0.0228, "step": 2587 }, { "epoch": 4.92, "grad_norm": 0.6600367426872253, "learning_rate": 1.074437692614066e-05, "loss": 0.0107, "step": 2588 }, { "epoch": 4.92, "grad_norm": 1.1647729873657227, "learning_rate": 1.073823629770446e-05, "loss": 0.0138, "step": 2589 }, { "epoch": 4.92, "grad_norm": 0.5510746240615845, "learning_rate": 1.073209538936096e-05, "loss": 0.0123, "step": 2590 }, { "epoch": 4.93, "grad_norm": 0.8254161477088928, "learning_rate": 1.0725954203438538e-05, "loss": 0.0146, "step": 2591 }, { "epoch": 4.93, "grad_norm": 1.1404006481170654, "learning_rate": 1.0719812742265656e-05, "loss": 0.0199, "step": 2592 }, { "epoch": 4.93, "grad_norm": 0.6641539335250854, "learning_rate": 1.0713671008170896e-05, "loss": 0.0071, "step": 2593 }, { "epoch": 4.93, "grad_norm": 0.9509573578834534, "learning_rate": 1.0707529003482933e-05, "loss": 0.0179, "step": 2594 }, { "epoch": 4.93, "grad_norm": 1.0888311862945557, "learning_rate": 1.0701386730530556e-05, "loss": 0.0182, "step": 2595 }, { "epoch": 4.94, "grad_norm": 1.0355093479156494, "learning_rate": 1.0695244191642649e-05, "loss": 0.0158, "step": 2596 }, { "epoch": 4.94, "grad_norm": 1.169411063194275, "learning_rate": 1.0689101389148188e-05, "loss": 0.0159, "step": 2597 }, { "epoch": 4.94, "grad_norm": 1.1880711317062378, "learning_rate": 1.0682958325376271e-05, "loss": 0.0142, "step": 2598 }, { "epoch": 4.94, "grad_norm": 1.1510080099105835, "learning_rate": 1.0676815002656075e-05, "loss": 0.0138, "step": 2599 }, { "epoch": 4.94, "grad_norm": 0.8059406876564026, "learning_rate": 1.0670671423316886e-05, "loss": 0.0117, "step": 2600 }, { "epoch": 4.94, "eval_blimp_filtered_avg": 0.7350746268656716, "eval_blimp_filtered_std": 0.004853859071509846, "step": 2600 }, { "epoch": 4.94, "eval_blimp_supplement_avg": 0.790948275862069, "eval_blimp_supplement_std": 0.01759947366308549, "step": 2600 }, { "epoch": 4.94, "eval_vqa_filtered_avg": 0.36, "eval_vqa_filtered_std": 0.04824181513244218, "step": 2600 }, { "epoch": 4.94, "eval_winoground_filtered_avg": 0.53, "eval_winoground_filtered_std": 0.0501613558046592, "step": 2600 }, { "epoch": 4.94, "grad_norm": 0.9847586154937744, "learning_rate": 1.0664527589688078e-05, "loss": 0.0109, "step": 2601 }, { "epoch": 4.95, "grad_norm": 1.0250589847564697, "learning_rate": 1.0658383504099134e-05, "loss": 0.0188, "step": 2602 }, { "epoch": 4.95, "grad_norm": 1.074036717414856, "learning_rate": 1.065223916887962e-05, "loss": 0.0134, "step": 2603 }, { "epoch": 4.95, "grad_norm": 1.296054482460022, "learning_rate": 1.0646094586359203e-05, "loss": 0.017, "step": 2604 }, { "epoch": 4.95, "grad_norm": 0.7558330297470093, "learning_rate": 1.063994975886765e-05, "loss": 0.0063, "step": 2605 }, { "epoch": 4.95, "grad_norm": 1.60554838180542, "learning_rate": 1.0633804688734806e-05, "loss": 0.019, "step": 2606 }, { "epoch": 4.96, "grad_norm": 0.9685240983963013, "learning_rate": 1.0627659378290618e-05, "loss": 0.0129, "step": 2607 }, { "epoch": 4.96, "grad_norm": 1.1556192636489868, "learning_rate": 1.0621513829865125e-05, "loss": 0.0198, "step": 2608 }, { "epoch": 4.96, "grad_norm": 1.3842321634292603, "learning_rate": 1.061536804578845e-05, "loss": 0.015, "step": 2609 }, { "epoch": 4.96, "grad_norm": 1.0664656162261963, "learning_rate": 1.0609222028390808e-05, "loss": 0.0086, "step": 2610 }, { "epoch": 4.96, "grad_norm": 1.045168161392212, "learning_rate": 1.0603075780002507e-05, "loss": 0.0073, "step": 2611 }, { "epoch": 4.97, "grad_norm": 1.8138666152954102, "learning_rate": 1.0596929302953939e-05, "loss": 0.0274, "step": 2612 }, { "epoch": 4.97, "grad_norm": 1.382598638534546, "learning_rate": 1.0590782599575578e-05, "loss": 0.0232, "step": 2613 }, { "epoch": 4.97, "grad_norm": 0.9088744521141052, "learning_rate": 1.0584635672197992e-05, "loss": 0.0144, "step": 2614 }, { "epoch": 4.97, "grad_norm": 1.0995208024978638, "learning_rate": 1.0578488523151829e-05, "loss": 0.0202, "step": 2615 }, { "epoch": 4.97, "grad_norm": 1.3078370094299316, "learning_rate": 1.0572341154767818e-05, "loss": 0.0147, "step": 2616 }, { "epoch": 4.98, "grad_norm": 0.7466704249382019, "learning_rate": 1.0566193569376782e-05, "loss": 0.0066, "step": 2617 }, { "epoch": 4.98, "grad_norm": 0.6277287006378174, "learning_rate": 1.0560045769309618e-05, "loss": 0.0052, "step": 2618 }, { "epoch": 4.98, "grad_norm": 1.1197216510772705, "learning_rate": 1.0553897756897304e-05, "loss": 0.0123, "step": 2619 }, { "epoch": 4.98, "grad_norm": 1.1685514450073242, "learning_rate": 1.05477495344709e-05, "loss": 0.0179, "step": 2620 }, { "epoch": 4.98, "grad_norm": 1.5213159322738647, "learning_rate": 1.0541601104361549e-05, "loss": 0.0137, "step": 2621 }, { "epoch": 4.98, "grad_norm": 0.8842750191688538, "learning_rate": 1.0535452468900472e-05, "loss": 0.0096, "step": 2622 }, { "epoch": 4.99, "grad_norm": 1.1625151634216309, "learning_rate": 1.052930363041896e-05, "loss": 0.0148, "step": 2623 }, { "epoch": 4.99, "grad_norm": 1.0249075889587402, "learning_rate": 1.0523154591248388e-05, "loss": 0.0082, "step": 2624 }, { "epoch": 4.99, "grad_norm": 0.8722827434539795, "learning_rate": 1.0517005353720208e-05, "loss": 0.0116, "step": 2625 }, { "epoch": 4.99, "grad_norm": 0.5814856886863708, "learning_rate": 1.0510855920165943e-05, "loss": 0.0045, "step": 2626 }, { "epoch": 4.99, "grad_norm": 0.8303266763687134, "learning_rate": 1.0504706292917198e-05, "loss": 0.0132, "step": 2627 }, { "epoch": 5.0, "grad_norm": 0.9731358885765076, "learning_rate": 1.049855647430564e-05, "loss": 0.0148, "step": 2628 }, { "epoch": 5.0, "grad_norm": 0.6852592825889587, "learning_rate": 1.0492406466663012e-05, "loss": 0.01, "step": 2629 }, { "epoch": 5.0, "grad_norm": 0.9717788696289062, "learning_rate": 1.0486256272321138e-05, "loss": 0.0073, "step": 2630 }, { "epoch": 5.0, "grad_norm": 0.31769198179244995, "learning_rate": 1.0480105893611902e-05, "loss": 0.0036, "step": 2631 }, { "epoch": 5.0, "grad_norm": 0.29207301139831543, "learning_rate": 1.0473955332867265e-05, "loss": 0.0037, "step": 2632 }, { "epoch": 5.01, "grad_norm": 0.6190810799598694, "learning_rate": 1.0467804592419249e-05, "loss": 0.006, "step": 2633 }, { "epoch": 5.01, "grad_norm": 0.9833703637123108, "learning_rate": 1.0461653674599951e-05, "loss": 0.0144, "step": 2634 }, { "epoch": 5.01, "grad_norm": 0.8266069889068604, "learning_rate": 1.0455502581741538e-05, "loss": 0.0101, "step": 2635 }, { "epoch": 5.01, "grad_norm": 0.5069021582603455, "learning_rate": 1.044935131617623e-05, "loss": 0.0077, "step": 2636 }, { "epoch": 5.01, "grad_norm": 0.5599841475486755, "learning_rate": 1.0443199880236326e-05, "loss": 0.0069, "step": 2637 }, { "epoch": 5.02, "grad_norm": 0.754288911819458, "learning_rate": 1.0437048276254185e-05, "loss": 0.0073, "step": 2638 }, { "epoch": 5.02, "grad_norm": 0.5206348896026611, "learning_rate": 1.0430896506562224e-05, "loss": 0.0032, "step": 2639 }, { "epoch": 5.02, "grad_norm": 1.2983758449554443, "learning_rate": 1.0424744573492938e-05, "loss": 0.0135, "step": 2640 }, { "epoch": 5.02, "grad_norm": 0.6004075407981873, "learning_rate": 1.0418592479378864e-05, "loss": 0.0054, "step": 2641 }, { "epoch": 5.02, "grad_norm": 1.2100485563278198, "learning_rate": 1.0412440226552617e-05, "loss": 0.0136, "step": 2642 }, { "epoch": 5.02, "grad_norm": 0.3318271338939667, "learning_rate": 1.040628781734686e-05, "loss": 0.0038, "step": 2643 }, { "epoch": 5.03, "grad_norm": 0.5302334427833557, "learning_rate": 1.0400135254094327e-05, "loss": 0.0039, "step": 2644 }, { "epoch": 5.03, "grad_norm": 0.49987542629241943, "learning_rate": 1.03939825391278e-05, "loss": 0.0099, "step": 2645 }, { "epoch": 5.03, "grad_norm": 0.8947672843933105, "learning_rate": 1.0387829674780124e-05, "loss": 0.0085, "step": 2646 }, { "epoch": 5.03, "grad_norm": 0.482349693775177, "learning_rate": 1.0381676663384197e-05, "loss": 0.0042, "step": 2647 }, { "epoch": 5.03, "grad_norm": 1.3311059474945068, "learning_rate": 1.0375523507272976e-05, "loss": 0.0131, "step": 2648 }, { "epoch": 5.04, "grad_norm": 0.8950629830360413, "learning_rate": 1.036937020877947e-05, "loss": 0.0115, "step": 2649 }, { "epoch": 5.04, "grad_norm": 0.6510523557662964, "learning_rate": 1.036321677023675e-05, "loss": 0.0138, "step": 2650 }, { "epoch": 5.04, "grad_norm": 0.9726238250732422, "learning_rate": 1.035706319397793e-05, "loss": 0.0105, "step": 2651 }, { "epoch": 5.04, "grad_norm": 1.2669039964675903, "learning_rate": 1.0350909482336175e-05, "loss": 0.0144, "step": 2652 }, { "epoch": 5.04, "grad_norm": 0.5612160563468933, "learning_rate": 1.0344755637644717e-05, "loss": 0.007, "step": 2653 }, { "epoch": 5.05, "grad_norm": 1.019360065460205, "learning_rate": 1.0338601662236822e-05, "loss": 0.021, "step": 2654 }, { "epoch": 5.05, "grad_norm": 1.1164476871490479, "learning_rate": 1.033244755844581e-05, "loss": 0.0157, "step": 2655 }, { "epoch": 5.05, "grad_norm": 0.8829558491706848, "learning_rate": 1.0326293328605052e-05, "loss": 0.0074, "step": 2656 }, { "epoch": 5.05, "grad_norm": 1.05487060546875, "learning_rate": 1.0320138975047971e-05, "loss": 0.0101, "step": 2657 }, { "epoch": 5.05, "grad_norm": 0.32994696497917175, "learning_rate": 1.0313984500108025e-05, "loss": 0.0029, "step": 2658 }, { "epoch": 5.06, "grad_norm": 1.3625861406326294, "learning_rate": 1.030782990611873e-05, "loss": 0.008, "step": 2659 }, { "epoch": 5.06, "grad_norm": 0.909106433391571, "learning_rate": 1.030167519541364e-05, "loss": 0.0174, "step": 2660 }, { "epoch": 5.06, "grad_norm": 0.7279454469680786, "learning_rate": 1.0295520370326355e-05, "loss": 0.0041, "step": 2661 }, { "epoch": 5.06, "grad_norm": 0.672481119632721, "learning_rate": 1.0289365433190514e-05, "loss": 0.0068, "step": 2662 }, { "epoch": 5.06, "grad_norm": 0.5814129710197449, "learning_rate": 1.0283210386339812e-05, "loss": 0.0037, "step": 2663 }, { "epoch": 5.06, "grad_norm": 0.3487740457057953, "learning_rate": 1.0277055232107975e-05, "loss": 0.0043, "step": 2664 }, { "epoch": 5.07, "grad_norm": 0.6543669700622559, "learning_rate": 1.0270899972828766e-05, "loss": 0.0067, "step": 2665 }, { "epoch": 5.07, "grad_norm": 0.7041730284690857, "learning_rate": 1.0264744610835994e-05, "loss": 0.0076, "step": 2666 }, { "epoch": 5.07, "grad_norm": 0.990839421749115, "learning_rate": 1.0258589148463514e-05, "loss": 0.0118, "step": 2667 }, { "epoch": 5.07, "grad_norm": 0.776813805103302, "learning_rate": 1.0252433588045203e-05, "loss": 0.007, "step": 2668 }, { "epoch": 5.07, "grad_norm": 1.092699408531189, "learning_rate": 1.0246277931914987e-05, "loss": 0.0124, "step": 2669 }, { "epoch": 5.08, "grad_norm": 0.4810187518596649, "learning_rate": 1.0240122182406825e-05, "loss": 0.0062, "step": 2670 }, { "epoch": 5.08, "grad_norm": 1.3452767133712769, "learning_rate": 1.0233966341854709e-05, "loss": 0.005, "step": 2671 }, { "epoch": 5.08, "grad_norm": 3.055159091949463, "learning_rate": 1.0227810412592667e-05, "loss": 0.0109, "step": 2672 }, { "epoch": 5.08, "grad_norm": 1.130212664604187, "learning_rate": 1.0221654396954765e-05, "loss": 0.0151, "step": 2673 }, { "epoch": 5.08, "grad_norm": 1.4275736808776855, "learning_rate": 1.0215498297275095e-05, "loss": 0.0166, "step": 2674 }, { "epoch": 5.09, "grad_norm": 0.9879239797592163, "learning_rate": 1.0209342115887786e-05, "loss": 0.0083, "step": 2675 }, { "epoch": 5.09, "grad_norm": 0.49458053708076477, "learning_rate": 1.0203185855126996e-05, "loss": 0.0039, "step": 2676 }, { "epoch": 5.09, "grad_norm": 0.7537707090377808, "learning_rate": 1.0197029517326911e-05, "loss": 0.0117, "step": 2677 }, { "epoch": 5.09, "grad_norm": 0.7667996287345886, "learning_rate": 1.0190873104821747e-05, "loss": 0.0056, "step": 2678 }, { "epoch": 5.09, "grad_norm": 0.7595605850219727, "learning_rate": 1.0184716619945753e-05, "loss": 0.0083, "step": 2679 }, { "epoch": 5.1, "grad_norm": 0.7509440183639526, "learning_rate": 1.0178560065033203e-05, "loss": 0.0054, "step": 2680 }, { "epoch": 5.1, "grad_norm": 0.3913893401622772, "learning_rate": 1.0172403442418394e-05, "loss": 0.0028, "step": 2681 }, { "epoch": 5.1, "grad_norm": 0.9685683250427246, "learning_rate": 1.0166246754435651e-05, "loss": 0.0058, "step": 2682 }, { "epoch": 5.1, "grad_norm": 0.8154886960983276, "learning_rate": 1.0160090003419325e-05, "loss": 0.0143, "step": 2683 }, { "epoch": 5.1, "grad_norm": 0.7820063829421997, "learning_rate": 1.0153933191703788e-05, "loss": 0.0094, "step": 2684 }, { "epoch": 5.1, "grad_norm": 1.2096391916275024, "learning_rate": 1.0147776321623437e-05, "loss": 0.0114, "step": 2685 }, { "epoch": 5.11, "grad_norm": 0.8933798670768738, "learning_rate": 1.0141619395512694e-05, "loss": 0.009, "step": 2686 }, { "epoch": 5.11, "grad_norm": 0.5873507261276245, "learning_rate": 1.0135462415705997e-05, "loss": 0.0037, "step": 2687 }, { "epoch": 5.11, "grad_norm": 0.7295310497283936, "learning_rate": 1.0129305384537805e-05, "loss": 0.007, "step": 2688 }, { "epoch": 5.11, "grad_norm": 0.9669502973556519, "learning_rate": 1.01231483043426e-05, "loss": 0.0118, "step": 2689 }, { "epoch": 5.11, "grad_norm": 0.8444024324417114, "learning_rate": 1.0116991177454885e-05, "loss": 0.0063, "step": 2690 }, { "epoch": 5.12, "grad_norm": 0.5263653993606567, "learning_rate": 1.011083400620917e-05, "loss": 0.006, "step": 2691 }, { "epoch": 5.12, "grad_norm": 0.8379173874855042, "learning_rate": 1.0104676792939991e-05, "loss": 0.0042, "step": 2692 }, { "epoch": 5.12, "grad_norm": 1.2927318811416626, "learning_rate": 1.0098519539981895e-05, "loss": 0.0371, "step": 2693 }, { "epoch": 5.12, "grad_norm": 1.0873409509658813, "learning_rate": 1.0092362249669449e-05, "loss": 0.0058, "step": 2694 }, { "epoch": 5.12, "grad_norm": 0.5121276378631592, "learning_rate": 1.0086204924337229e-05, "loss": 0.0072, "step": 2695 }, { "epoch": 5.13, "grad_norm": 0.6629571914672852, "learning_rate": 1.0080047566319829e-05, "loss": 0.0059, "step": 2696 }, { "epoch": 5.13, "grad_norm": 0.6250250935554504, "learning_rate": 1.007389017795185e-05, "loss": 0.0041, "step": 2697 }, { "epoch": 5.13, "grad_norm": 0.6457865238189697, "learning_rate": 1.006773276156791e-05, "loss": 0.0079, "step": 2698 }, { "epoch": 5.13, "grad_norm": 0.7030337452888489, "learning_rate": 1.0061575319502634e-05, "loss": 0.0065, "step": 2699 }, { "epoch": 5.13, "grad_norm": 1.265428066253662, "learning_rate": 1.0055417854090661e-05, "loss": 0.0143, "step": 2700 }, { "epoch": 5.13, "eval_blimp_filtered_avg": 0.7401492537313433, "eval_blimp_filtered_std": 0.004796874659920703, "step": 2700 }, { "epoch": 5.13, "eval_blimp_supplement_avg": 0.7974137931034483, "eval_blimp_supplement_std": 0.01759280264712939, "step": 2700 }, { "epoch": 5.13, "eval_vqa_filtered_avg": 0.35, "eval_vqa_filtered_std": 0.047937248544110196, "step": 2700 }, { "epoch": 5.13, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 2700 }, { "epoch": 5.13, "grad_norm": 0.9961838126182556, "learning_rate": 1.0049260367666629e-05, "loss": 0.0079, "step": 2701 }, { "epoch": 5.14, "grad_norm": 0.7426487803459167, "learning_rate": 1.0043102862565198e-05, "loss": 0.0117, "step": 2702 }, { "epoch": 5.14, "grad_norm": 1.4380080699920654, "learning_rate": 1.0036945341121022e-05, "loss": 0.0169, "step": 2703 }, { "epoch": 5.14, "grad_norm": 1.0621839761734009, "learning_rate": 1.0030787805668771e-05, "loss": 0.0139, "step": 2704 }, { "epoch": 5.14, "grad_norm": 0.8427950143814087, "learning_rate": 1.0024630258543115e-05, "loss": 0.0073, "step": 2705 }, { "epoch": 5.14, "grad_norm": 0.3341779112815857, "learning_rate": 1.0018472702078733e-05, "loss": 0.0067, "step": 2706 }, { "epoch": 5.15, "grad_norm": 1.2191026210784912, "learning_rate": 1.0012315138610296e-05, "loss": 0.0068, "step": 2707 }, { "epoch": 5.15, "grad_norm": 0.5478968024253845, "learning_rate": 1.000615757047249e-05, "loss": 0.004, "step": 2708 }, { "epoch": 5.15, "grad_norm": 1.1841620206832886, "learning_rate": 1e-05, "loss": 0.0103, "step": 2709 }, { "epoch": 5.15, "grad_norm": 0.6114522814750671, "learning_rate": 9.99384242952751e-06, "loss": 0.004, "step": 2710 }, { "epoch": 5.15, "grad_norm": 0.4559280276298523, "learning_rate": 9.987684861389709e-06, "loss": 0.0045, "step": 2711 }, { "epoch": 5.16, "grad_norm": 1.3414071798324585, "learning_rate": 9.981527297921272e-06, "loss": 0.0154, "step": 2712 }, { "epoch": 5.16, "grad_norm": 0.9023210406303406, "learning_rate": 9.975369741456887e-06, "loss": 0.0119, "step": 2713 }, { "epoch": 5.16, "grad_norm": 1.3286763429641724, "learning_rate": 9.96921219433123e-06, "loss": 0.0127, "step": 2714 }, { "epoch": 5.16, "grad_norm": 1.1417698860168457, "learning_rate": 9.96305465887898e-06, "loss": 0.0091, "step": 2715 }, { "epoch": 5.16, "grad_norm": 1.1354831457138062, "learning_rate": 9.956897137434804e-06, "loss": 0.0116, "step": 2716 }, { "epoch": 5.17, "grad_norm": 1.0087381601333618, "learning_rate": 9.950739632333374e-06, "loss": 0.0134, "step": 2717 }, { "epoch": 5.17, "grad_norm": 1.234533667564392, "learning_rate": 9.944582145909344e-06, "loss": 0.0197, "step": 2718 }, { "epoch": 5.17, "grad_norm": 1.0098137855529785, "learning_rate": 9.938424680497366e-06, "loss": 0.0129, "step": 2719 }, { "epoch": 5.17, "grad_norm": 0.900110125541687, "learning_rate": 9.932267238432093e-06, "loss": 0.0234, "step": 2720 }, { "epoch": 5.17, "grad_norm": 0.7072737216949463, "learning_rate": 9.926109822048153e-06, "loss": 0.0117, "step": 2721 }, { "epoch": 5.17, "grad_norm": 1.0280436277389526, "learning_rate": 9.919952433680176e-06, "loss": 0.0095, "step": 2722 }, { "epoch": 5.18, "grad_norm": 0.7399468421936035, "learning_rate": 9.913795075662774e-06, "loss": 0.0035, "step": 2723 }, { "epoch": 5.18, "grad_norm": 0.7819843888282776, "learning_rate": 9.907637750330553e-06, "loss": 0.0055, "step": 2724 }, { "epoch": 5.18, "grad_norm": 0.6925773620605469, "learning_rate": 9.901480460018109e-06, "loss": 0.0033, "step": 2725 }, { "epoch": 5.18, "grad_norm": 0.7109422087669373, "learning_rate": 9.895323207060012e-06, "loss": 0.0094, "step": 2726 }, { "epoch": 5.18, "grad_norm": 0.9094730615615845, "learning_rate": 9.889165993790834e-06, "loss": 0.0087, "step": 2727 }, { "epoch": 5.19, "grad_norm": 0.8169094324111938, "learning_rate": 9.883008822545118e-06, "loss": 0.0029, "step": 2728 }, { "epoch": 5.19, "grad_norm": 1.3500930070877075, "learning_rate": 9.876851695657402e-06, "loss": 0.0129, "step": 2729 }, { "epoch": 5.19, "grad_norm": 0.9244000911712646, "learning_rate": 9.870694615462197e-06, "loss": 0.0065, "step": 2730 }, { "epoch": 5.19, "grad_norm": 0.39528918266296387, "learning_rate": 9.86453758429401e-06, "loss": 0.0081, "step": 2731 }, { "epoch": 5.19, "grad_norm": 1.2135567665100098, "learning_rate": 9.85838060448731e-06, "loss": 0.0095, "step": 2732 }, { "epoch": 5.2, "grad_norm": 0.48261260986328125, "learning_rate": 9.852223678376565e-06, "loss": 0.0041, "step": 2733 }, { "epoch": 5.2, "grad_norm": 0.7397365570068359, "learning_rate": 9.846066808296217e-06, "loss": 0.0044, "step": 2734 }, { "epoch": 5.2, "grad_norm": 0.6328552961349487, "learning_rate": 9.839909996580678e-06, "loss": 0.0119, "step": 2735 }, { "epoch": 5.2, "grad_norm": 0.7186647653579712, "learning_rate": 9.833753245564352e-06, "loss": 0.0097, "step": 2736 }, { "epoch": 5.2, "grad_norm": 0.7614459991455078, "learning_rate": 9.827596557581609e-06, "loss": 0.014, "step": 2737 }, { "epoch": 5.21, "grad_norm": 0.4123227000236511, "learning_rate": 9.821439934966799e-06, "loss": 0.0084, "step": 2738 }, { "epoch": 5.21, "grad_norm": 1.3240638971328735, "learning_rate": 9.815283380054245e-06, "loss": 0.012, "step": 2739 }, { "epoch": 5.21, "grad_norm": 1.0346254110336304, "learning_rate": 9.809126895178256e-06, "loss": 0.0095, "step": 2740 }, { "epoch": 5.21, "grad_norm": 0.8740264773368835, "learning_rate": 9.802970482673092e-06, "loss": 0.0061, "step": 2741 }, { "epoch": 5.21, "grad_norm": 0.6357085108757019, "learning_rate": 9.796814144873006e-06, "loss": 0.0059, "step": 2742 }, { "epoch": 5.21, "grad_norm": 0.8260998129844666, "learning_rate": 9.790657884112218e-06, "loss": 0.0085, "step": 2743 }, { "epoch": 5.22, "grad_norm": 0.8793674111366272, "learning_rate": 9.784501702724907e-06, "loss": 0.0128, "step": 2744 }, { "epoch": 5.22, "grad_norm": 0.8145027160644531, "learning_rate": 9.778345603045236e-06, "loss": 0.0103, "step": 2745 }, { "epoch": 5.22, "grad_norm": 0.40415138006210327, "learning_rate": 9.772189587407336e-06, "loss": 0.007, "step": 2746 }, { "epoch": 5.22, "grad_norm": 0.731745719909668, "learning_rate": 9.766033658145294e-06, "loss": 0.0062, "step": 2747 }, { "epoch": 5.22, "grad_norm": 0.7942315936088562, "learning_rate": 9.75987781759318e-06, "loss": 0.0047, "step": 2748 }, { "epoch": 5.23, "grad_norm": 1.1686955690383911, "learning_rate": 9.753722068085016e-06, "loss": 0.0178, "step": 2749 }, { "epoch": 5.23, "grad_norm": 0.4726708233356476, "learning_rate": 9.747566411954799e-06, "loss": 0.0058, "step": 2750 }, { "epoch": 5.23, "grad_norm": 0.9288876056671143, "learning_rate": 9.74141085153649e-06, "loss": 0.0129, "step": 2751 }, { "epoch": 5.23, "grad_norm": 0.47259706258773804, "learning_rate": 9.735255389164007e-06, "loss": 0.0033, "step": 2752 }, { "epoch": 5.23, "grad_norm": 0.9448795914649963, "learning_rate": 9.729100027171236e-06, "loss": 0.0122, "step": 2753 }, { "epoch": 5.24, "grad_norm": 1.44041907787323, "learning_rate": 9.722944767892032e-06, "loss": 0.0127, "step": 2754 }, { "epoch": 5.24, "grad_norm": 1.0374212265014648, "learning_rate": 9.71678961366019e-06, "loss": 0.0084, "step": 2755 }, { "epoch": 5.24, "grad_norm": 0.9793757200241089, "learning_rate": 9.710634566809484e-06, "loss": 0.0127, "step": 2756 }, { "epoch": 5.24, "grad_norm": 1.0885908603668213, "learning_rate": 9.704479629673652e-06, "loss": 0.0127, "step": 2757 }, { "epoch": 5.24, "grad_norm": 0.3297397494316101, "learning_rate": 9.698324804586363e-06, "loss": 0.0034, "step": 2758 }, { "epoch": 5.25, "grad_norm": 0.7030024528503418, "learning_rate": 9.692170093881273e-06, "loss": 0.0051, "step": 2759 }, { "epoch": 5.25, "grad_norm": 0.9775123596191406, "learning_rate": 9.686015499891977e-06, "loss": 0.0062, "step": 2760 }, { "epoch": 5.25, "grad_norm": 0.47190043330192566, "learning_rate": 9.679861024952032e-06, "loss": 0.0027, "step": 2761 }, { "epoch": 5.25, "grad_norm": 0.9764920473098755, "learning_rate": 9.673706671394948e-06, "loss": 0.0105, "step": 2762 }, { "epoch": 5.25, "grad_norm": 0.5478046536445618, "learning_rate": 9.667552441554195e-06, "loss": 0.0046, "step": 2763 }, { "epoch": 5.25, "grad_norm": 2.3590545654296875, "learning_rate": 9.661398337763182e-06, "loss": 0.0437, "step": 2764 }, { "epoch": 5.26, "grad_norm": 0.6231750249862671, "learning_rate": 9.655244362355285e-06, "loss": 0.0063, "step": 2765 }, { "epoch": 5.26, "grad_norm": 0.944160521030426, "learning_rate": 9.649090517663826e-06, "loss": 0.0042, "step": 2766 }, { "epoch": 5.26, "grad_norm": 1.3603475093841553, "learning_rate": 9.642936806022074e-06, "loss": 0.0116, "step": 2767 }, { "epoch": 5.26, "grad_norm": 1.651041865348816, "learning_rate": 9.63678322976325e-06, "loss": 0.0108, "step": 2768 }, { "epoch": 5.26, "grad_norm": 1.3626219034194946, "learning_rate": 9.630629791220532e-06, "loss": 0.0081, "step": 2769 }, { "epoch": 5.27, "grad_norm": 0.3176211714744568, "learning_rate": 9.624476492727026e-06, "loss": 0.0021, "step": 2770 }, { "epoch": 5.27, "grad_norm": 0.8193247318267822, "learning_rate": 9.61832333661581e-06, "loss": 0.0032, "step": 2771 }, { "epoch": 5.27, "grad_norm": 0.5903834104537964, "learning_rate": 9.61217032521988e-06, "loss": 0.0055, "step": 2772 }, { "epoch": 5.27, "grad_norm": 0.596726655960083, "learning_rate": 9.606017460872203e-06, "loss": 0.0095, "step": 2773 }, { "epoch": 5.27, "grad_norm": 0.9054610133171082, "learning_rate": 9.599864745905676e-06, "loss": 0.0079, "step": 2774 }, { "epoch": 5.28, "grad_norm": 0.578330934047699, "learning_rate": 9.593712182653142e-06, "loss": 0.0042, "step": 2775 }, { "epoch": 5.28, "grad_norm": 0.738797664642334, "learning_rate": 9.587559773447386e-06, "loss": 0.0076, "step": 2776 }, { "epoch": 5.28, "grad_norm": 0.7348605394363403, "learning_rate": 9.581407520621139e-06, "loss": 0.0109, "step": 2777 }, { "epoch": 5.28, "grad_norm": 1.287216067314148, "learning_rate": 9.575255426507066e-06, "loss": 0.0185, "step": 2778 }, { "epoch": 5.28, "grad_norm": 0.6637338399887085, "learning_rate": 9.569103493437776e-06, "loss": 0.0041, "step": 2779 }, { "epoch": 5.29, "grad_norm": 0.6306352019309998, "learning_rate": 9.56295172374582e-06, "loss": 0.0182, "step": 2780 }, { "epoch": 5.29, "grad_norm": 1.1775792837142944, "learning_rate": 9.556800119763675e-06, "loss": 0.0096, "step": 2781 }, { "epoch": 5.29, "grad_norm": 0.661463737487793, "learning_rate": 9.550648683823774e-06, "loss": 0.005, "step": 2782 }, { "epoch": 5.29, "grad_norm": 0.6423759460449219, "learning_rate": 9.544497418258467e-06, "loss": 0.0113, "step": 2783 }, { "epoch": 5.29, "grad_norm": 1.5248243808746338, "learning_rate": 9.53834632540005e-06, "loss": 0.0035, "step": 2784 }, { "epoch": 5.29, "grad_norm": 0.7123475670814514, "learning_rate": 9.532195407580753e-06, "loss": 0.0129, "step": 2785 }, { "epoch": 5.3, "grad_norm": 1.2912342548370361, "learning_rate": 9.52604466713274e-06, "loss": 0.0196, "step": 2786 }, { "epoch": 5.3, "grad_norm": 0.920992910861969, "learning_rate": 9.519894106388101e-06, "loss": 0.0072, "step": 2787 }, { "epoch": 5.3, "grad_norm": 0.595790445804596, "learning_rate": 9.513743727678862e-06, "loss": 0.0063, "step": 2788 }, { "epoch": 5.3, "grad_norm": 0.7139922380447388, "learning_rate": 9.507593533336992e-06, "loss": 0.0057, "step": 2789 }, { "epoch": 5.3, "grad_norm": 0.879625141620636, "learning_rate": 9.501443525694364e-06, "loss": 0.0049, "step": 2790 }, { "epoch": 5.31, "grad_norm": 0.3033978044986725, "learning_rate": 9.495293707082804e-06, "loss": 0.0027, "step": 2791 }, { "epoch": 5.31, "grad_norm": 1.6135318279266357, "learning_rate": 9.489144079834059e-06, "loss": 0.0129, "step": 2792 }, { "epoch": 5.31, "grad_norm": 0.8616555333137512, "learning_rate": 9.482994646279794e-06, "loss": 0.0049, "step": 2793 }, { "epoch": 5.31, "grad_norm": 0.47900426387786865, "learning_rate": 9.476845408751614e-06, "loss": 0.0041, "step": 2794 }, { "epoch": 5.31, "grad_norm": 0.8925184011459351, "learning_rate": 9.470696369581044e-06, "loss": 0.0081, "step": 2795 }, { "epoch": 5.32, "grad_norm": 1.6408107280731201, "learning_rate": 9.464547531099532e-06, "loss": 0.0145, "step": 2796 }, { "epoch": 5.32, "grad_norm": 1.3082157373428345, "learning_rate": 9.458398895638453e-06, "loss": 0.0076, "step": 2797 }, { "epoch": 5.32, "grad_norm": 0.4799700975418091, "learning_rate": 9.452250465529102e-06, "loss": 0.0084, "step": 2798 }, { "epoch": 5.32, "grad_norm": 0.8146674036979675, "learning_rate": 9.446102243102698e-06, "loss": 0.0095, "step": 2799 }, { "epoch": 5.32, "grad_norm": 0.7753108143806458, "learning_rate": 9.439954230690387e-06, "loss": 0.0088, "step": 2800 }, { "epoch": 5.32, "eval_blimp_filtered_avg": 0.7332835820895522, "eval_blimp_filtered_std": 0.004862288972234911, "step": 2800 }, { "epoch": 5.32, "eval_blimp_supplement_avg": 0.7931034482758621, "eval_blimp_supplement_std": 0.017575696611345212, "step": 2800 }, { "epoch": 5.32, "eval_vqa_filtered_avg": 0.33, "eval_vqa_filtered_std": 0.04725815626252604, "step": 2800 }, { "epoch": 5.32, "eval_winoground_filtered_avg": 0.51, "eval_winoground_filtered_std": 0.05024183937956911, "step": 2800 }, { "epoch": 5.33, "grad_norm": 0.5243006348609924, "learning_rate": 9.43380643062322e-06, "loss": 0.0099, "step": 2801 }, { "epoch": 5.33, "grad_norm": 0.6419429779052734, "learning_rate": 9.427658845232182e-06, "loss": 0.0091, "step": 2802 }, { "epoch": 5.33, "grad_norm": 0.6939083933830261, "learning_rate": 9.421511476848178e-06, "loss": 0.0034, "step": 2803 }, { "epoch": 5.33, "grad_norm": 0.5936410427093506, "learning_rate": 9.415364327802011e-06, "loss": 0.0035, "step": 2804 }, { "epoch": 5.33, "grad_norm": 0.5917549729347229, "learning_rate": 9.409217400424425e-06, "loss": 0.0082, "step": 2805 }, { "epoch": 5.33, "grad_norm": 0.5054365396499634, "learning_rate": 9.403070697046065e-06, "loss": 0.0111, "step": 2806 }, { "epoch": 5.34, "grad_norm": 1.5605640411376953, "learning_rate": 9.396924219997494e-06, "loss": 0.0048, "step": 2807 }, { "epoch": 5.34, "grad_norm": 0.9877591729164124, "learning_rate": 9.390777971609193e-06, "loss": 0.0137, "step": 2808 }, { "epoch": 5.34, "grad_norm": 1.4774881601333618, "learning_rate": 9.384631954211556e-06, "loss": 0.0121, "step": 2809 }, { "epoch": 5.34, "grad_norm": 1.1676702499389648, "learning_rate": 9.378486170134879e-06, "loss": 0.0101, "step": 2810 }, { "epoch": 5.34, "grad_norm": 0.5817092061042786, "learning_rate": 9.372340621709384e-06, "loss": 0.0036, "step": 2811 }, { "epoch": 5.35, "grad_norm": 0.6720291376113892, "learning_rate": 9.366195311265199e-06, "loss": 0.0061, "step": 2812 }, { "epoch": 5.35, "grad_norm": 0.8584147095680237, "learning_rate": 9.360050241132354e-06, "loss": 0.0081, "step": 2813 }, { "epoch": 5.35, "grad_norm": 0.9383136630058289, "learning_rate": 9.353905413640795e-06, "loss": 0.0088, "step": 2814 }, { "epoch": 5.35, "grad_norm": 0.7501009702682495, "learning_rate": 9.347760831120384e-06, "loss": 0.0117, "step": 2815 }, { "epoch": 5.35, "grad_norm": 1.4118932485580444, "learning_rate": 9.34161649590087e-06, "loss": 0.0149, "step": 2816 }, { "epoch": 5.36, "grad_norm": 1.2690788507461548, "learning_rate": 9.335472410311924e-06, "loss": 0.0111, "step": 2817 }, { "epoch": 5.36, "grad_norm": 1.0960172414779663, "learning_rate": 9.329328576683118e-06, "loss": 0.011, "step": 2818 }, { "epoch": 5.36, "grad_norm": 1.1107579469680786, "learning_rate": 9.323184997343926e-06, "loss": 0.0137, "step": 2819 }, { "epoch": 5.36, "grad_norm": 0.9062906503677368, "learning_rate": 9.31704167462373e-06, "loss": 0.0118, "step": 2820 }, { "epoch": 5.36, "grad_norm": 0.46481001377105713, "learning_rate": 9.310898610851814e-06, "loss": 0.0044, "step": 2821 }, { "epoch": 5.37, "grad_norm": 0.43385711312294006, "learning_rate": 9.304755808357355e-06, "loss": 0.0055, "step": 2822 }, { "epoch": 5.37, "grad_norm": 0.6207031607627869, "learning_rate": 9.29861326946945e-06, "loss": 0.0035, "step": 2823 }, { "epoch": 5.37, "grad_norm": 0.7404689788818359, "learning_rate": 9.292470996517069e-06, "loss": 0.0063, "step": 2824 }, { "epoch": 5.37, "grad_norm": 0.37913385033607483, "learning_rate": 9.286328991829107e-06, "loss": 0.0047, "step": 2825 }, { "epoch": 5.37, "grad_norm": 0.5681896209716797, "learning_rate": 9.280187257734349e-06, "loss": 0.0047, "step": 2826 }, { "epoch": 5.37, "grad_norm": 1.3171000480651855, "learning_rate": 9.274045796561465e-06, "loss": 0.0103, "step": 2827 }, { "epoch": 5.38, "grad_norm": 0.9931049942970276, "learning_rate": 9.267904610639041e-06, "loss": 0.0081, "step": 2828 }, { "epoch": 5.38, "grad_norm": 0.9705025553703308, "learning_rate": 9.261763702295544e-06, "loss": 0.0105, "step": 2829 }, { "epoch": 5.38, "grad_norm": 0.9136056303977966, "learning_rate": 9.255623073859343e-06, "loss": 0.01, "step": 2830 }, { "epoch": 5.38, "grad_norm": 0.8734104037284851, "learning_rate": 9.249482727658696e-06, "loss": 0.0187, "step": 2831 }, { "epoch": 5.38, "grad_norm": 0.4831297993659973, "learning_rate": 9.243342666021764e-06, "loss": 0.004, "step": 2832 }, { "epoch": 5.39, "grad_norm": 0.9606015682220459, "learning_rate": 9.237202891276584e-06, "loss": 0.0181, "step": 2833 }, { "epoch": 5.39, "grad_norm": 0.4558781087398529, "learning_rate": 9.231063405751097e-06, "loss": 0.0048, "step": 2834 }, { "epoch": 5.39, "grad_norm": 0.9498893618583679, "learning_rate": 9.224924211773134e-06, "loss": 0.0087, "step": 2835 }, { "epoch": 5.39, "grad_norm": 0.5434033274650574, "learning_rate": 9.218785311670406e-06, "loss": 0.0048, "step": 2836 }, { "epoch": 5.39, "grad_norm": 0.9440822601318359, "learning_rate": 9.21264670777052e-06, "loss": 0.021, "step": 2837 }, { "epoch": 5.4, "grad_norm": 0.5796136856079102, "learning_rate": 9.206508402400978e-06, "loss": 0.0047, "step": 2838 }, { "epoch": 5.4, "grad_norm": 1.4690492153167725, "learning_rate": 9.200370397889145e-06, "loss": 0.0089, "step": 2839 }, { "epoch": 5.4, "grad_norm": 1.1516879796981812, "learning_rate": 9.1942326965623e-06, "loss": 0.019, "step": 2840 }, { "epoch": 5.4, "grad_norm": 0.7079547047615051, "learning_rate": 9.188095300747588e-06, "loss": 0.0112, "step": 2841 }, { "epoch": 5.4, "grad_norm": 1.0133174657821655, "learning_rate": 9.181958212772045e-06, "loss": 0.0069, "step": 2842 }, { "epoch": 5.4, "grad_norm": 0.6787105202674866, "learning_rate": 9.175821434962588e-06, "loss": 0.0053, "step": 2843 }, { "epoch": 5.41, "grad_norm": 0.5936436057090759, "learning_rate": 9.169684969646022e-06, "loss": 0.0103, "step": 2844 }, { "epoch": 5.41, "grad_norm": 0.6616678237915039, "learning_rate": 9.163548819149023e-06, "loss": 0.0048, "step": 2845 }, { "epoch": 5.41, "grad_norm": 1.0158860683441162, "learning_rate": 9.157412985798165e-06, "loss": 0.0117, "step": 2846 }, { "epoch": 5.41, "grad_norm": 1.339171051979065, "learning_rate": 9.151277471919877e-06, "loss": 0.0104, "step": 2847 }, { "epoch": 5.41, "grad_norm": 0.8183921575546265, "learning_rate": 9.145142279840488e-06, "loss": 0.007, "step": 2848 }, { "epoch": 5.42, "grad_norm": 1.1264934539794922, "learning_rate": 9.139007411886202e-06, "loss": 0.0048, "step": 2849 }, { "epoch": 5.42, "grad_norm": 0.8395534157752991, "learning_rate": 9.132872870383086e-06, "loss": 0.0058, "step": 2850 }, { "epoch": 5.42, "grad_norm": 1.3353909254074097, "learning_rate": 9.126738657657101e-06, "loss": 0.0102, "step": 2851 }, { "epoch": 5.42, "grad_norm": 0.6169289350509644, "learning_rate": 9.120604776034071e-06, "loss": 0.0044, "step": 2852 }, { "epoch": 5.42, "grad_norm": 0.5314533114433289, "learning_rate": 9.1144712278397e-06, "loss": 0.005, "step": 2853 }, { "epoch": 5.43, "grad_norm": 0.37534889578819275, "learning_rate": 9.108338015399563e-06, "loss": 0.0054, "step": 2854 }, { "epoch": 5.43, "grad_norm": 0.8692418932914734, "learning_rate": 9.102205141039115e-06, "loss": 0.0095, "step": 2855 }, { "epoch": 5.43, "grad_norm": 0.7542033195495605, "learning_rate": 9.096072607083668e-06, "loss": 0.0078, "step": 2856 }, { "epoch": 5.43, "grad_norm": 1.0600666999816895, "learning_rate": 9.089940415858415e-06, "loss": 0.0187, "step": 2857 }, { "epoch": 5.43, "grad_norm": 0.7204827070236206, "learning_rate": 9.083808569688429e-06, "loss": 0.0066, "step": 2858 }, { "epoch": 5.44, "grad_norm": 0.7334670424461365, "learning_rate": 9.077677070898628e-06, "loss": 0.0056, "step": 2859 }, { "epoch": 5.44, "grad_norm": 1.1790317296981812, "learning_rate": 9.071545921813814e-06, "loss": 0.0121, "step": 2860 }, { "epoch": 5.44, "grad_norm": 0.5313602089881897, "learning_rate": 9.065415124758664e-06, "loss": 0.0073, "step": 2861 }, { "epoch": 5.44, "grad_norm": 0.5966873168945312, "learning_rate": 9.059284682057695e-06, "loss": 0.0053, "step": 2862 }, { "epoch": 5.44, "grad_norm": 1.25361168384552, "learning_rate": 9.053154596035319e-06, "loss": 0.0203, "step": 2863 }, { "epoch": 5.44, "grad_norm": 0.36362606287002563, "learning_rate": 9.047024869015794e-06, "loss": 0.003, "step": 2864 }, { "epoch": 5.45, "grad_norm": 1.4799773693084717, "learning_rate": 9.040895503323249e-06, "loss": 0.0136, "step": 2865 }, { "epoch": 5.45, "grad_norm": 0.6341240406036377, "learning_rate": 9.034766501281671e-06, "loss": 0.0099, "step": 2866 }, { "epoch": 5.45, "grad_norm": 0.7267678380012512, "learning_rate": 9.02863786521492e-06, "loss": 0.0101, "step": 2867 }, { "epoch": 5.45, "grad_norm": 1.399789571762085, "learning_rate": 9.022509597446702e-06, "loss": 0.0141, "step": 2868 }, { "epoch": 5.45, "grad_norm": 0.5077410340309143, "learning_rate": 9.016381700300598e-06, "loss": 0.0115, "step": 2869 }, { "epoch": 5.46, "grad_norm": 1.4204094409942627, "learning_rate": 9.010254176100035e-06, "loss": 0.0158, "step": 2870 }, { "epoch": 5.46, "grad_norm": 0.7041140198707581, "learning_rate": 9.004127027168309e-06, "loss": 0.0124, "step": 2871 }, { "epoch": 5.46, "grad_norm": 0.7171865701675415, "learning_rate": 8.998000255828573e-06, "loss": 0.0067, "step": 2872 }, { "epoch": 5.46, "grad_norm": 0.5524980425834656, "learning_rate": 8.991873864403827e-06, "loss": 0.0106, "step": 2873 }, { "epoch": 5.46, "grad_norm": 0.6481577754020691, "learning_rate": 8.98574785521694e-06, "loss": 0.0051, "step": 2874 }, { "epoch": 5.47, "grad_norm": 0.6084297299385071, "learning_rate": 8.979622230590628e-06, "loss": 0.0052, "step": 2875 }, { "epoch": 5.47, "grad_norm": 0.5839225649833679, "learning_rate": 8.97349699284746e-06, "loss": 0.0079, "step": 2876 }, { "epoch": 5.47, "grad_norm": 1.314699411392212, "learning_rate": 8.967372144309865e-06, "loss": 0.0135, "step": 2877 }, { "epoch": 5.47, "grad_norm": 0.4989725649356842, "learning_rate": 8.961247687300121e-06, "loss": 0.0031, "step": 2878 }, { "epoch": 5.47, "grad_norm": 0.6313132643699646, "learning_rate": 8.955123624140355e-06, "loss": 0.0054, "step": 2879 }, { "epoch": 5.48, "grad_norm": 0.7262178659439087, "learning_rate": 8.948999957152547e-06, "loss": 0.0051, "step": 2880 }, { "epoch": 5.48, "grad_norm": 1.0252774953842163, "learning_rate": 8.942876688658531e-06, "loss": 0.0104, "step": 2881 }, { "epoch": 5.48, "grad_norm": 1.5696474313735962, "learning_rate": 8.936753820979981e-06, "loss": 0.0097, "step": 2882 }, { "epoch": 5.48, "grad_norm": 0.31917932629585266, "learning_rate": 8.930631356438424e-06, "loss": 0.0022, "step": 2883 }, { "epoch": 5.48, "grad_norm": 0.6020964980125427, "learning_rate": 8.92450929735524e-06, "loss": 0.004, "step": 2884 }, { "epoch": 5.48, "grad_norm": 0.422773540019989, "learning_rate": 8.918387646051641e-06, "loss": 0.0047, "step": 2885 }, { "epoch": 5.49, "grad_norm": 0.5856144428253174, "learning_rate": 8.912266404848697e-06, "loss": 0.003, "step": 2886 }, { "epoch": 5.49, "grad_norm": 0.972169816493988, "learning_rate": 8.906145576067318e-06, "loss": 0.0056, "step": 2887 }, { "epoch": 5.49, "grad_norm": 1.0200867652893066, "learning_rate": 8.900025162028258e-06, "loss": 0.0113, "step": 2888 }, { "epoch": 5.49, "grad_norm": 0.8143047094345093, "learning_rate": 8.893905165052108e-06, "loss": 0.0084, "step": 2889 }, { "epoch": 5.49, "grad_norm": 0.6570079922676086, "learning_rate": 8.887785587459318e-06, "loss": 0.0075, "step": 2890 }, { "epoch": 5.5, "grad_norm": 0.8363119959831238, "learning_rate": 8.881666431570155e-06, "loss": 0.0037, "step": 2891 }, { "epoch": 5.5, "grad_norm": 0.5997493267059326, "learning_rate": 8.875547699704743e-06, "loss": 0.0047, "step": 2892 }, { "epoch": 5.5, "grad_norm": 0.7209751009941101, "learning_rate": 8.869429394183046e-06, "loss": 0.0158, "step": 2893 }, { "epoch": 5.5, "grad_norm": 1.5799803733825684, "learning_rate": 8.863311517324851e-06, "loss": 0.0165, "step": 2894 }, { "epoch": 5.5, "grad_norm": 1.048028826713562, "learning_rate": 8.857194071449803e-06, "loss": 0.0091, "step": 2895 }, { "epoch": 5.51, "grad_norm": 0.5320727825164795, "learning_rate": 8.851077058877365e-06, "loss": 0.0058, "step": 2896 }, { "epoch": 5.51, "grad_norm": 0.49479788541793823, "learning_rate": 8.844960481926847e-06, "loss": 0.0032, "step": 2897 }, { "epoch": 5.51, "grad_norm": 0.5072466731071472, "learning_rate": 8.83884434291739e-06, "loss": 0.0038, "step": 2898 }, { "epoch": 5.51, "grad_norm": 1.0262993574142456, "learning_rate": 8.832728644167971e-06, "loss": 0.0056, "step": 2899 }, { "epoch": 5.51, "grad_norm": 0.8321154117584229, "learning_rate": 8.826613387997394e-06, "loss": 0.0102, "step": 2900 }, { "epoch": 5.51, "eval_blimp_filtered_avg": 0.734776119402985, "eval_blimp_filtered_std": 0.00484690032972839, "step": 2900 }, { "epoch": 5.51, "eval_blimp_supplement_avg": 0.7974137931034483, "eval_blimp_supplement_std": 0.017517913709363928, "step": 2900 }, { "epoch": 5.51, "eval_vqa_filtered_avg": 0.34, "eval_vqa_filtered_std": 0.04760952285695236, "step": 2900 }, { "epoch": 5.51, "eval_winoground_filtered_avg": 0.53, "eval_winoground_filtered_std": 0.05016135580465919, "step": 2900 }, { "epoch": 5.52, "grad_norm": 0.4522905945777893, "learning_rate": 8.820498576724308e-06, "loss": 0.0022, "step": 2901 }, { "epoch": 5.52, "grad_norm": 1.0879539251327515, "learning_rate": 8.814384212667175e-06, "loss": 0.0159, "step": 2902 }, { "epoch": 5.52, "grad_norm": 0.8944164514541626, "learning_rate": 8.808270298144304e-06, "loss": 0.0083, "step": 2903 }, { "epoch": 5.52, "grad_norm": 0.88984614610672, "learning_rate": 8.80215683547383e-06, "loss": 0.0074, "step": 2904 }, { "epoch": 5.52, "grad_norm": 1.4346444606781006, "learning_rate": 8.796043826973706e-06, "loss": 0.0105, "step": 2905 }, { "epoch": 5.52, "grad_norm": 1.1789182424545288, "learning_rate": 8.789931274961725e-06, "loss": 0.0048, "step": 2906 }, { "epoch": 5.53, "grad_norm": 0.2844679057598114, "learning_rate": 8.783819181755504e-06, "loss": 0.0028, "step": 2907 }, { "epoch": 5.53, "grad_norm": 1.0186409950256348, "learning_rate": 8.77770754967248e-06, "loss": 0.0082, "step": 2908 }, { "epoch": 5.53, "grad_norm": 0.8175330758094788, "learning_rate": 8.771596381029923e-06, "loss": 0.0048, "step": 2909 }, { "epoch": 5.53, "grad_norm": 0.939536452293396, "learning_rate": 8.765485678144925e-06, "loss": 0.0078, "step": 2910 }, { "epoch": 5.53, "grad_norm": 0.8893749713897705, "learning_rate": 8.759375443334396e-06, "loss": 0.0084, "step": 2911 }, { "epoch": 5.54, "grad_norm": 0.6033971309661865, "learning_rate": 8.753265678915077e-06, "loss": 0.0048, "step": 2912 }, { "epoch": 5.54, "grad_norm": 0.8443581461906433, "learning_rate": 8.747156387203529e-06, "loss": 0.0045, "step": 2913 }, { "epoch": 5.54, "grad_norm": 1.0759599208831787, "learning_rate": 8.74104757051612e-06, "loss": 0.0055, "step": 2914 }, { "epoch": 5.54, "grad_norm": 0.5323286056518555, "learning_rate": 8.734939231169059e-06, "loss": 0.003, "step": 2915 }, { "epoch": 5.54, "grad_norm": 0.4673987329006195, "learning_rate": 8.728831371478366e-06, "loss": 0.0067, "step": 2916 }, { "epoch": 5.55, "grad_norm": 0.668768048286438, "learning_rate": 8.72272399375987e-06, "loss": 0.0069, "step": 2917 }, { "epoch": 5.55, "grad_norm": 0.5230073928833008, "learning_rate": 8.716617100329232e-06, "loss": 0.0044, "step": 2918 }, { "epoch": 5.55, "grad_norm": 0.3373948335647583, "learning_rate": 8.710510693501913e-06, "loss": 0.0027, "step": 2919 }, { "epoch": 5.55, "grad_norm": 0.5427036881446838, "learning_rate": 8.704404775593204e-06, "loss": 0.005, "step": 2920 }, { "epoch": 5.55, "grad_norm": 1.9478414058685303, "learning_rate": 8.698299348918209e-06, "loss": 0.0167, "step": 2921 }, { "epoch": 5.56, "grad_norm": 0.5038382411003113, "learning_rate": 8.692194415791834e-06, "loss": 0.0024, "step": 2922 }, { "epoch": 5.56, "grad_norm": 0.5960957407951355, "learning_rate": 8.68608997852881e-06, "loss": 0.0072, "step": 2923 }, { "epoch": 5.56, "grad_norm": 0.768137514591217, "learning_rate": 8.679986039443678e-06, "loss": 0.0094, "step": 2924 }, { "epoch": 5.56, "grad_norm": 0.6990363001823425, "learning_rate": 8.673882600850782e-06, "loss": 0.0063, "step": 2925 }, { "epoch": 5.56, "grad_norm": 1.1474758386611938, "learning_rate": 8.667779665064284e-06, "loss": 0.0062, "step": 2926 }, { "epoch": 5.56, "grad_norm": 1.024536371231079, "learning_rate": 8.66167723439816e-06, "loss": 0.0057, "step": 2927 }, { "epoch": 5.57, "grad_norm": 0.44723016023635864, "learning_rate": 8.655575311166178e-06, "loss": 0.0036, "step": 2928 }, { "epoch": 5.57, "grad_norm": 0.22883862257003784, "learning_rate": 8.649473897681928e-06, "loss": 0.0028, "step": 2929 }, { "epoch": 5.57, "grad_norm": 1.0351108312606812, "learning_rate": 8.643372996258808e-06, "loss": 0.0074, "step": 2930 }, { "epoch": 5.57, "grad_norm": 1.1787837743759155, "learning_rate": 8.637272609210005e-06, "loss": 0.0111, "step": 2931 }, { "epoch": 5.57, "grad_norm": 1.4843478202819824, "learning_rate": 8.63117273884853e-06, "loss": 0.0138, "step": 2932 }, { "epoch": 5.58, "grad_norm": 0.7261542677879333, "learning_rate": 8.625073387487188e-06, "loss": 0.004, "step": 2933 }, { "epoch": 5.58, "grad_norm": 0.9987730383872986, "learning_rate": 8.618974557438588e-06, "loss": 0.0113, "step": 2934 }, { "epoch": 5.58, "grad_norm": 1.0791114568710327, "learning_rate": 8.612876251015144e-06, "loss": 0.0126, "step": 2935 }, { "epoch": 5.58, "grad_norm": 0.83797287940979, "learning_rate": 8.606778470529072e-06, "loss": 0.0161, "step": 2936 }, { "epoch": 5.58, "grad_norm": 1.0164716243743896, "learning_rate": 8.600681218292383e-06, "loss": 0.0059, "step": 2937 }, { "epoch": 5.59, "grad_norm": 0.5821194648742676, "learning_rate": 8.594584496616893e-06, "loss": 0.0039, "step": 2938 }, { "epoch": 5.59, "grad_norm": 0.8136178851127625, "learning_rate": 8.588488307814219e-06, "loss": 0.0076, "step": 2939 }, { "epoch": 5.59, "grad_norm": 0.4572877287864685, "learning_rate": 8.582392654195765e-06, "loss": 0.0048, "step": 2940 }, { "epoch": 5.59, "grad_norm": 0.6890029907226562, "learning_rate": 8.576297538072745e-06, "loss": 0.0066, "step": 2941 }, { "epoch": 5.59, "grad_norm": 0.8079728484153748, "learning_rate": 8.570202961756166e-06, "loss": 0.0076, "step": 2942 }, { "epoch": 5.6, "grad_norm": 0.7597805261611938, "learning_rate": 8.56410892755682e-06, "loss": 0.0076, "step": 2943 }, { "epoch": 5.6, "grad_norm": 0.591293454170227, "learning_rate": 8.558015437785307e-06, "loss": 0.0136, "step": 2944 }, { "epoch": 5.6, "grad_norm": 1.0032306909561157, "learning_rate": 8.551922494752014e-06, "loss": 0.0061, "step": 2945 }, { "epoch": 5.6, "grad_norm": 0.9124030470848083, "learning_rate": 8.545830100767118e-06, "loss": 0.0085, "step": 2946 }, { "epoch": 5.6, "grad_norm": 0.5016440153121948, "learning_rate": 8.5397382581406e-06, "loss": 0.0043, "step": 2947 }, { "epoch": 5.6, "grad_norm": 1.5140125751495361, "learning_rate": 8.533646969182212e-06, "loss": 0.0211, "step": 2948 }, { "epoch": 5.61, "grad_norm": 1.424112319946289, "learning_rate": 8.527556236201513e-06, "loss": 0.0109, "step": 2949 }, { "epoch": 5.61, "grad_norm": 0.6592426300048828, "learning_rate": 8.521466061507851e-06, "loss": 0.008, "step": 2950 }, { "epoch": 5.61, "grad_norm": 0.6598025560379028, "learning_rate": 8.515376447410349e-06, "loss": 0.0035, "step": 2951 }, { "epoch": 5.61, "grad_norm": 1.0740962028503418, "learning_rate": 8.509287396217926e-06, "loss": 0.011, "step": 2952 }, { "epoch": 5.61, "grad_norm": 0.3622305393218994, "learning_rate": 8.503198910239296e-06, "loss": 0.0043, "step": 2953 }, { "epoch": 5.62, "grad_norm": 0.403953492641449, "learning_rate": 8.497110991782939e-06, "loss": 0.0045, "step": 2954 }, { "epoch": 5.62, "grad_norm": 0.28661271929740906, "learning_rate": 8.491023643157136e-06, "loss": 0.0031, "step": 2955 }, { "epoch": 5.62, "grad_norm": 0.708873450756073, "learning_rate": 8.484936866669945e-06, "loss": 0.0073, "step": 2956 }, { "epoch": 5.62, "grad_norm": 0.7887547612190247, "learning_rate": 8.47885066462921e-06, "loss": 0.0065, "step": 2957 }, { "epoch": 5.62, "grad_norm": 0.19265826046466827, "learning_rate": 8.472765039342551e-06, "loss": 0.002, "step": 2958 }, { "epoch": 5.63, "grad_norm": 0.4046173393726349, "learning_rate": 8.466679993117383e-06, "loss": 0.0028, "step": 2959 }, { "epoch": 5.63, "grad_norm": 0.7501336336135864, "learning_rate": 8.460595528260883e-06, "loss": 0.0046, "step": 2960 }, { "epoch": 5.63, "grad_norm": 0.3825424611568451, "learning_rate": 8.45451164708002e-06, "loss": 0.0073, "step": 2961 }, { "epoch": 5.63, "grad_norm": 0.5944058299064636, "learning_rate": 8.448428351881546e-06, "loss": 0.0035, "step": 2962 }, { "epoch": 5.63, "grad_norm": 0.948167622089386, "learning_rate": 8.442345644971972e-06, "loss": 0.0067, "step": 2963 }, { "epoch": 5.63, "grad_norm": 0.9050383567810059, "learning_rate": 8.436263528657604e-06, "loss": 0.016, "step": 2964 }, { "epoch": 5.64, "grad_norm": 1.4030303955078125, "learning_rate": 8.430182005244522e-06, "loss": 0.0063, "step": 2965 }, { "epoch": 5.64, "grad_norm": 0.5765962600708008, "learning_rate": 8.424101077038567e-06, "loss": 0.0033, "step": 2966 }, { "epoch": 5.64, "grad_norm": 0.9967532157897949, "learning_rate": 8.418020746345371e-06, "loss": 0.0114, "step": 2967 }, { "epoch": 5.64, "grad_norm": 1.4299652576446533, "learning_rate": 8.41194101547033e-06, "loss": 0.0137, "step": 2968 }, { "epoch": 5.64, "grad_norm": 1.1223411560058594, "learning_rate": 8.405861886718613e-06, "loss": 0.008, "step": 2969 }, { "epoch": 5.65, "grad_norm": 1.040266752243042, "learning_rate": 8.39978336239517e-06, "loss": 0.0036, "step": 2970 }, { "epoch": 5.65, "grad_norm": 0.555033802986145, "learning_rate": 8.393705444804705e-06, "loss": 0.0048, "step": 2971 }, { "epoch": 5.65, "grad_norm": 1.0872474908828735, "learning_rate": 8.387628136251708e-06, "loss": 0.0066, "step": 2972 }, { "epoch": 5.65, "grad_norm": 0.3700757622718811, "learning_rate": 8.381551439040433e-06, "loss": 0.0057, "step": 2973 }, { "epoch": 5.65, "grad_norm": 1.021499752998352, "learning_rate": 8.375475355474896e-06, "loss": 0.0131, "step": 2974 }, { "epoch": 5.66, "grad_norm": 0.5422534346580505, "learning_rate": 8.369399887858887e-06, "loss": 0.0058, "step": 2975 }, { "epoch": 5.66, "grad_norm": 0.6793419718742371, "learning_rate": 8.363325038495965e-06, "loss": 0.0049, "step": 2976 }, { "epoch": 5.66, "grad_norm": 1.0820586681365967, "learning_rate": 8.357250809689444e-06, "loss": 0.0223, "step": 2977 }, { "epoch": 5.66, "grad_norm": 1.4393885135650635, "learning_rate": 8.351177203742413e-06, "loss": 0.0238, "step": 2978 }, { "epoch": 5.66, "grad_norm": 0.47903093695640564, "learning_rate": 8.345104222957718e-06, "loss": 0.0038, "step": 2979 }, { "epoch": 5.67, "grad_norm": 1.2142705917358398, "learning_rate": 8.339031869637974e-06, "loss": 0.0098, "step": 2980 }, { "epoch": 5.67, "grad_norm": 1.2928980588912964, "learning_rate": 8.332960146085552e-06, "loss": 0.0065, "step": 2981 }, { "epoch": 5.67, "grad_norm": 0.7214081287384033, "learning_rate": 8.326889054602591e-06, "loss": 0.0089, "step": 2982 }, { "epoch": 5.67, "grad_norm": 1.1877713203430176, "learning_rate": 8.320818597490979e-06, "loss": 0.0198, "step": 2983 }, { "epoch": 5.67, "grad_norm": 1.01655113697052, "learning_rate": 8.314748777052378e-06, "loss": 0.0108, "step": 2984 }, { "epoch": 5.67, "grad_norm": 0.8976530432701111, "learning_rate": 8.308679595588203e-06, "loss": 0.0077, "step": 2985 }, { "epoch": 5.68, "grad_norm": 0.6029825210571289, "learning_rate": 8.302611055399616e-06, "loss": 0.006, "step": 2986 }, { "epoch": 5.68, "grad_norm": 0.5889975428581238, "learning_rate": 8.296543158787553e-06, "loss": 0.0077, "step": 2987 }, { "epoch": 5.68, "grad_norm": 1.1212551593780518, "learning_rate": 8.290475908052699e-06, "loss": 0.0133, "step": 2988 }, { "epoch": 5.68, "grad_norm": 0.8074066638946533, "learning_rate": 8.284409305495488e-06, "loss": 0.0109, "step": 2989 }, { "epoch": 5.68, "grad_norm": 0.694240391254425, "learning_rate": 8.278343353416114e-06, "loss": 0.005, "step": 2990 }, { "epoch": 5.69, "grad_norm": 1.085967779159546, "learning_rate": 8.272278054114531e-06, "loss": 0.0075, "step": 2991 }, { "epoch": 5.69, "grad_norm": 0.9180393815040588, "learning_rate": 8.266213409890427e-06, "loss": 0.0082, "step": 2992 }, { "epoch": 5.69, "grad_norm": 0.6341254711151123, "learning_rate": 8.260149423043263e-06, "loss": 0.0072, "step": 2993 }, { "epoch": 5.69, "grad_norm": 0.32262948155403137, "learning_rate": 8.254086095872232e-06, "loss": 0.0027, "step": 2994 }, { "epoch": 5.69, "grad_norm": 0.6841192245483398, "learning_rate": 8.248023430676292e-06, "loss": 0.0149, "step": 2995 }, { "epoch": 5.7, "grad_norm": 0.8991929292678833, "learning_rate": 8.241961429754145e-06, "loss": 0.0078, "step": 2996 }, { "epoch": 5.7, "grad_norm": 0.572518527507782, "learning_rate": 8.235900095404231e-06, "loss": 0.0153, "step": 2997 }, { "epoch": 5.7, "grad_norm": 0.6778451204299927, "learning_rate": 8.229839429924753e-06, "loss": 0.006, "step": 2998 }, { "epoch": 5.7, "grad_norm": 0.34471070766448975, "learning_rate": 8.223779435613654e-06, "loss": 0.0037, "step": 2999 }, { "epoch": 5.7, "grad_norm": 1.05422043800354, "learning_rate": 8.217720114768618e-06, "loss": 0.0065, "step": 3000 }, { "epoch": 5.7, "eval_blimp_filtered_avg": 0.7337313432835821, "eval_blimp_filtered_std": 0.004844369511162009, "step": 3000 }, { "epoch": 5.7, "eval_blimp_supplement_avg": 0.7952586206896551, "eval_blimp_supplement_std": 0.017538004602551293, "step": 3000 }, { "epoch": 5.7, "eval_vqa_filtered_avg": 0.37, "eval_vqa_filtered_std": 0.048523658709391, "step": 3000 }, { "epoch": 5.7, "eval_winoground_filtered_avg": 0.51, "eval_winoground_filtered_std": 0.05024183937956911, "step": 3000 }, { "epoch": 5.71, "grad_norm": 0.6797051429748535, "learning_rate": 8.21166146968708e-06, "loss": 0.0093, "step": 3001 }, { "epoch": 5.71, "grad_norm": 0.8463801145553589, "learning_rate": 8.205603502666217e-06, "loss": 0.0094, "step": 3002 }, { "epoch": 5.71, "grad_norm": 0.5222827792167664, "learning_rate": 8.199546216002945e-06, "loss": 0.002, "step": 3003 }, { "epoch": 5.71, "grad_norm": 0.9044995903968811, "learning_rate": 8.193489611993927e-06, "loss": 0.0081, "step": 3004 }, { "epoch": 5.71, "grad_norm": 0.6418446898460388, "learning_rate": 8.18743369293557e-06, "loss": 0.0062, "step": 3005 }, { "epoch": 5.71, "grad_norm": 0.5336630344390869, "learning_rate": 8.181378461124005e-06, "loss": 0.0034, "step": 3006 }, { "epoch": 5.72, "grad_norm": 0.5504428744316101, "learning_rate": 8.175323918855125e-06, "loss": 0.0092, "step": 3007 }, { "epoch": 5.72, "grad_norm": 1.0920084714889526, "learning_rate": 8.169270068424549e-06, "loss": 0.0255, "step": 3008 }, { "epoch": 5.72, "grad_norm": 0.2115766555070877, "learning_rate": 8.163216912127631e-06, "loss": 0.0021, "step": 3009 }, { "epoch": 5.72, "grad_norm": 0.8597580194473267, "learning_rate": 8.15716445225947e-06, "loss": 0.0118, "step": 3010 }, { "epoch": 5.72, "grad_norm": 1.4923555850982666, "learning_rate": 8.151112691114898e-06, "loss": 0.0068, "step": 3011 }, { "epoch": 5.73, "grad_norm": 0.8211161494255066, "learning_rate": 8.145061630988479e-06, "loss": 0.0061, "step": 3012 }, { "epoch": 5.73, "grad_norm": 0.5532383918762207, "learning_rate": 8.13901127417451e-06, "loss": 0.0047, "step": 3013 }, { "epoch": 5.73, "grad_norm": 0.6300356388092041, "learning_rate": 8.132961622967035e-06, "loss": 0.0085, "step": 3014 }, { "epoch": 5.73, "grad_norm": 0.43873628973960876, "learning_rate": 8.126912679659809e-06, "loss": 0.0032, "step": 3015 }, { "epoch": 5.73, "grad_norm": 0.8327696323394775, "learning_rate": 8.120864446546338e-06, "loss": 0.0066, "step": 3016 }, { "epoch": 5.74, "grad_norm": 1.3220216035842896, "learning_rate": 8.114816925919844e-06, "loss": 0.009, "step": 3017 }, { "epoch": 5.74, "grad_norm": 0.6519470810890198, "learning_rate": 8.108770120073289e-06, "loss": 0.0073, "step": 3018 }, { "epoch": 5.74, "grad_norm": 0.33919501304626465, "learning_rate": 8.102724031299363e-06, "loss": 0.0064, "step": 3019 }, { "epoch": 5.74, "grad_norm": 0.449057936668396, "learning_rate": 8.096678661890476e-06, "loss": 0.0044, "step": 3020 }, { "epoch": 5.74, "grad_norm": 0.6129758954048157, "learning_rate": 8.090634014138772e-06, "loss": 0.0055, "step": 3021 }, { "epoch": 5.75, "grad_norm": 0.9111959934234619, "learning_rate": 8.084590090336128e-06, "loss": 0.0181, "step": 3022 }, { "epoch": 5.75, "grad_norm": 0.4519435167312622, "learning_rate": 8.078546892774126e-06, "loss": 0.0032, "step": 3023 }, { "epoch": 5.75, "grad_norm": 1.0628105401992798, "learning_rate": 8.072504423744094e-06, "loss": 0.0129, "step": 3024 }, { "epoch": 5.75, "grad_norm": 1.3507657051086426, "learning_rate": 8.066462685537074e-06, "loss": 0.0179, "step": 3025 }, { "epoch": 5.75, "grad_norm": 0.3922567665576935, "learning_rate": 8.06042168044383e-06, "loss": 0.0035, "step": 3026 }, { "epoch": 5.75, "grad_norm": 0.7971218228340149, "learning_rate": 8.054381410754848e-06, "loss": 0.0084, "step": 3027 }, { "epoch": 5.76, "grad_norm": 1.0001416206359863, "learning_rate": 8.048341878760344e-06, "loss": 0.0112, "step": 3028 }, { "epoch": 5.76, "grad_norm": 0.4591410756111145, "learning_rate": 8.042303086750242e-06, "loss": 0.0041, "step": 3029 }, { "epoch": 5.76, "grad_norm": 0.9389916062355042, "learning_rate": 8.03626503701419e-06, "loss": 0.0153, "step": 3030 }, { "epoch": 5.76, "grad_norm": 0.12685033679008484, "learning_rate": 8.030227731841563e-06, "loss": 0.0012, "step": 3031 }, { "epoch": 5.76, "grad_norm": 0.6067968010902405, "learning_rate": 8.024191173521435e-06, "loss": 0.0059, "step": 3032 }, { "epoch": 5.77, "grad_norm": 0.6420630812644958, "learning_rate": 8.018155364342614e-06, "loss": 0.0075, "step": 3033 }, { "epoch": 5.77, "grad_norm": 0.8981173038482666, "learning_rate": 8.012120306593623e-06, "loss": 0.0071, "step": 3034 }, { "epoch": 5.77, "grad_norm": 0.9249387383460999, "learning_rate": 8.00608600256269e-06, "loss": 0.0053, "step": 3035 }, { "epoch": 5.77, "grad_norm": 0.7265663743019104, "learning_rate": 8.000052454537756e-06, "loss": 0.0061, "step": 3036 }, { "epoch": 5.77, "grad_norm": 0.6487223505973816, "learning_rate": 7.994019664806494e-06, "loss": 0.0076, "step": 3037 }, { "epoch": 5.78, "grad_norm": 0.6549334526062012, "learning_rate": 7.987987635656268e-06, "loss": 0.0046, "step": 3038 }, { "epoch": 5.78, "grad_norm": 0.4932333528995514, "learning_rate": 7.981956369374164e-06, "loss": 0.0042, "step": 3039 }, { "epoch": 5.78, "grad_norm": 0.46984729170799255, "learning_rate": 7.975925868246985e-06, "loss": 0.004, "step": 3040 }, { "epoch": 5.78, "grad_norm": 1.0883030891418457, "learning_rate": 7.969896134561227e-06, "loss": 0.0052, "step": 3041 }, { "epoch": 5.78, "grad_norm": 0.8454027771949768, "learning_rate": 7.96386717060311e-06, "loss": 0.009, "step": 3042 }, { "epoch": 5.79, "grad_norm": 0.617936909198761, "learning_rate": 7.95783897865855e-06, "loss": 0.0068, "step": 3043 }, { "epoch": 5.79, "grad_norm": 0.861457884311676, "learning_rate": 7.951811561013181e-06, "loss": 0.01, "step": 3044 }, { "epoch": 5.79, "grad_norm": 0.49779820442199707, "learning_rate": 7.945784919952346e-06, "loss": 0.0053, "step": 3045 }, { "epoch": 5.79, "grad_norm": 0.5415629744529724, "learning_rate": 7.939759057761075e-06, "loss": 0.0027, "step": 3046 }, { "epoch": 5.79, "grad_norm": 0.7718970775604248, "learning_rate": 7.933733976724122e-06, "loss": 0.004, "step": 3047 }, { "epoch": 5.79, "grad_norm": 0.8107953667640686, "learning_rate": 7.927709679125936e-06, "loss": 0.0069, "step": 3048 }, { "epoch": 5.8, "grad_norm": 0.7031359672546387, "learning_rate": 7.921686167250668e-06, "loss": 0.0032, "step": 3049 }, { "epoch": 5.8, "grad_norm": 0.25715333223342896, "learning_rate": 7.915663443382173e-06, "loss": 0.003, "step": 3050 }, { "epoch": 5.8, "grad_norm": 0.7476741671562195, "learning_rate": 7.909641509804015e-06, "loss": 0.0189, "step": 3051 }, { "epoch": 5.8, "grad_norm": 0.47001755237579346, "learning_rate": 7.90362036879944e-06, "loss": 0.0098, "step": 3052 }, { "epoch": 5.8, "grad_norm": 0.5560104250907898, "learning_rate": 7.89760002265141e-06, "loss": 0.0038, "step": 3053 }, { "epoch": 5.81, "grad_norm": 0.834393322467804, "learning_rate": 7.891580473642584e-06, "loss": 0.0033, "step": 3054 }, { "epoch": 5.81, "grad_norm": 0.99601811170578, "learning_rate": 7.885561724055304e-06, "loss": 0.0102, "step": 3055 }, { "epoch": 5.81, "grad_norm": 0.7538038492202759, "learning_rate": 7.87954377617163e-06, "loss": 0.0099, "step": 3056 }, { "epoch": 5.81, "grad_norm": 1.2428215742111206, "learning_rate": 7.873526632273305e-06, "loss": 0.0132, "step": 3057 }, { "epoch": 5.81, "grad_norm": 1.1312519311904907, "learning_rate": 7.867510294641769e-06, "loss": 0.0117, "step": 3058 }, { "epoch": 5.82, "grad_norm": 0.7685214281082153, "learning_rate": 7.861494765558154e-06, "loss": 0.0078, "step": 3059 }, { "epoch": 5.82, "grad_norm": 0.463198721408844, "learning_rate": 7.855480047303297e-06, "loss": 0.0042, "step": 3060 }, { "epoch": 5.82, "grad_norm": 0.8277850151062012, "learning_rate": 7.849466142157708e-06, "loss": 0.0103, "step": 3061 }, { "epoch": 5.82, "grad_norm": 0.565680742263794, "learning_rate": 7.843453052401605e-06, "loss": 0.0054, "step": 3062 }, { "epoch": 5.82, "grad_norm": 0.3592250645160675, "learning_rate": 7.8374407803149e-06, "loss": 0.003, "step": 3063 }, { "epoch": 5.83, "grad_norm": 0.747665524482727, "learning_rate": 7.831429328177173e-06, "loss": 0.0053, "step": 3064 }, { "epoch": 5.83, "grad_norm": 0.26165321469306946, "learning_rate": 7.825418698267718e-06, "loss": 0.0032, "step": 3065 }, { "epoch": 5.83, "grad_norm": 0.7390199899673462, "learning_rate": 7.819408892865495e-06, "loss": 0.0083, "step": 3066 }, { "epoch": 5.83, "grad_norm": 1.3302205801010132, "learning_rate": 7.81339991424917e-06, "loss": 0.0107, "step": 3067 }, { "epoch": 5.83, "grad_norm": 0.4715717136859894, "learning_rate": 7.80739176469709e-06, "loss": 0.0034, "step": 3068 }, { "epoch": 5.83, "grad_norm": 0.6387840509414673, "learning_rate": 7.80138444648728e-06, "loss": 0.0044, "step": 3069 }, { "epoch": 5.84, "grad_norm": 0.4748729169368744, "learning_rate": 7.795377961897458e-06, "loss": 0.0029, "step": 3070 }, { "epoch": 5.84, "grad_norm": 0.37944379448890686, "learning_rate": 7.789372313205022e-06, "loss": 0.003, "step": 3071 }, { "epoch": 5.84, "grad_norm": 0.788965106010437, "learning_rate": 7.783367502687056e-06, "loss": 0.0046, "step": 3072 }, { "epoch": 5.84, "grad_norm": 0.929844319820404, "learning_rate": 7.777363532620321e-06, "loss": 0.0158, "step": 3073 }, { "epoch": 5.84, "grad_norm": 0.5607960820198059, "learning_rate": 7.771360405281271e-06, "loss": 0.0055, "step": 3074 }, { "epoch": 5.85, "grad_norm": 0.6325613856315613, "learning_rate": 7.765358122946023e-06, "loss": 0.0037, "step": 3075 }, { "epoch": 5.85, "grad_norm": 0.5117875337600708, "learning_rate": 7.759356687890387e-06, "loss": 0.0048, "step": 3076 }, { "epoch": 5.85, "grad_norm": 1.1377332210540771, "learning_rate": 7.753356102389852e-06, "loss": 0.0088, "step": 3077 }, { "epoch": 5.85, "grad_norm": 0.644646406173706, "learning_rate": 7.747356368719574e-06, "loss": 0.0068, "step": 3078 }, { "epoch": 5.85, "grad_norm": 0.7551855444908142, "learning_rate": 7.741357489154392e-06, "loss": 0.0131, "step": 3079 }, { "epoch": 5.86, "grad_norm": 0.3654152452945709, "learning_rate": 7.735359465968833e-06, "loss": 0.0014, "step": 3080 }, { "epoch": 5.86, "grad_norm": 0.7270171046257019, "learning_rate": 7.729362301437076e-06, "loss": 0.0033, "step": 3081 }, { "epoch": 5.86, "grad_norm": 1.0426105260849, "learning_rate": 7.72336599783299e-06, "loss": 0.0088, "step": 3082 }, { "epoch": 5.86, "grad_norm": 0.8715769648551941, "learning_rate": 7.717370557430119e-06, "loss": 0.0029, "step": 3083 }, { "epoch": 5.86, "grad_norm": 1.151613712310791, "learning_rate": 7.711375982501666e-06, "loss": 0.0102, "step": 3084 }, { "epoch": 5.87, "grad_norm": 0.27304890751838684, "learning_rate": 7.70538227532052e-06, "loss": 0.0019, "step": 3085 }, { "epoch": 5.87, "grad_norm": 0.8416791558265686, "learning_rate": 7.69938943815924e-06, "loss": 0.0035, "step": 3086 }, { "epoch": 5.87, "grad_norm": 0.5330768823623657, "learning_rate": 7.693397473290042e-06, "loss": 0.0078, "step": 3087 }, { "epoch": 5.87, "grad_norm": 0.34454116225242615, "learning_rate": 7.687406382984824e-06, "loss": 0.0037, "step": 3088 }, { "epoch": 5.87, "grad_norm": 0.36105650663375854, "learning_rate": 7.681416169515153e-06, "loss": 0.0039, "step": 3089 }, { "epoch": 5.87, "grad_norm": 0.8451157808303833, "learning_rate": 7.67542683515225e-06, "loss": 0.0061, "step": 3090 }, { "epoch": 5.88, "grad_norm": 0.3343273103237152, "learning_rate": 7.669438382167023e-06, "loss": 0.0019, "step": 3091 }, { "epoch": 5.88, "grad_norm": 0.7033060193061829, "learning_rate": 7.663450812830021e-06, "loss": 0.0057, "step": 3092 }, { "epoch": 5.88, "grad_norm": 0.7945072650909424, "learning_rate": 7.657464129411483e-06, "loss": 0.0042, "step": 3093 }, { "epoch": 5.88, "grad_norm": 0.7038367986679077, "learning_rate": 7.651478334181295e-06, "loss": 0.0066, "step": 3094 }, { "epoch": 5.88, "grad_norm": 0.5180373191833496, "learning_rate": 7.645493429409012e-06, "loss": 0.0052, "step": 3095 }, { "epoch": 5.89, "grad_norm": 1.3223347663879395, "learning_rate": 7.63950941736385e-06, "loss": 0.0102, "step": 3096 }, { "epoch": 5.89, "grad_norm": 0.7993056178092957, "learning_rate": 7.633526300314695e-06, "loss": 0.01, "step": 3097 }, { "epoch": 5.89, "grad_norm": 1.1631295680999756, "learning_rate": 7.627544080530078e-06, "loss": 0.0127, "step": 3098 }, { "epoch": 5.89, "grad_norm": 0.8879690170288086, "learning_rate": 7.621562760278201e-06, "loss": 0.0103, "step": 3099 }, { "epoch": 5.89, "grad_norm": 0.6835720539093018, "learning_rate": 7.615582341826925e-06, "loss": 0.0055, "step": 3100 }, { "epoch": 5.89, "eval_blimp_filtered_avg": 0.7344776119402985, "eval_blimp_filtered_std": 0.004855852071704358, "step": 3100 }, { "epoch": 5.89, "eval_blimp_supplement_avg": 0.7931034482758621, "eval_blimp_supplement_std": 0.017555398111543406, "step": 3100 }, { "epoch": 5.89, "eval_vqa_filtered_avg": 0.34, "eval_vqa_filtered_std": 0.04760952285695235, "step": 3100 }, { "epoch": 5.89, "eval_winoground_filtered_avg": 0.52, "eval_winoground_filtered_std": 0.05021167315686779, "step": 3100 }, { "epoch": 5.9, "grad_norm": 0.7470795512199402, "learning_rate": 7.609602827443761e-06, "loss": 0.0083, "step": 3101 }, { "epoch": 5.9, "grad_norm": 0.4163796901702881, "learning_rate": 7.6036242193958865e-06, "loss": 0.0063, "step": 3102 }, { "epoch": 5.9, "grad_norm": 0.9648752808570862, "learning_rate": 7.59764651995013e-06, "loss": 0.0061, "step": 3103 }, { "epoch": 5.9, "grad_norm": 0.7537296414375305, "learning_rate": 7.591669731372977e-06, "loss": 0.0155, "step": 3104 }, { "epoch": 5.9, "grad_norm": 0.5503124594688416, "learning_rate": 7.585693855930565e-06, "loss": 0.0032, "step": 3105 }, { "epoch": 5.9, "grad_norm": 0.542940616607666, "learning_rate": 7.5797188958886935e-06, "loss": 0.0062, "step": 3106 }, { "epoch": 5.91, "grad_norm": 1.5847084522247314, "learning_rate": 7.573744853512801e-06, "loss": 0.0125, "step": 3107 }, { "epoch": 5.91, "grad_norm": 0.3670158088207245, "learning_rate": 7.567771731067991e-06, "loss": 0.0031, "step": 3108 }, { "epoch": 5.91, "grad_norm": 1.152847170829773, "learning_rate": 7.5617995308190164e-06, "loss": 0.0069, "step": 3109 }, { "epoch": 5.91, "grad_norm": 0.5219510197639465, "learning_rate": 7.55582825503027e-06, "loss": 0.0052, "step": 3110 }, { "epoch": 5.91, "grad_norm": 0.5246201753616333, "learning_rate": 7.549857905965805e-06, "loss": 0.0076, "step": 3111 }, { "epoch": 5.92, "grad_norm": 1.0785002708435059, "learning_rate": 7.543888485889325e-06, "loss": 0.0047, "step": 3112 }, { "epoch": 5.92, "grad_norm": 0.7492117881774902, "learning_rate": 7.537919997064166e-06, "loss": 0.0095, "step": 3113 }, { "epoch": 5.92, "grad_norm": 0.920526921749115, "learning_rate": 7.5319524417533306e-06, "loss": 0.0103, "step": 3114 }, { "epoch": 5.92, "grad_norm": 1.151892066001892, "learning_rate": 7.525985822219449e-06, "loss": 0.0119, "step": 3115 }, { "epoch": 5.92, "grad_norm": 0.6006476879119873, "learning_rate": 7.520020140724812e-06, "loss": 0.0027, "step": 3116 }, { "epoch": 5.93, "grad_norm": 0.8769104480743408, "learning_rate": 7.514055399531346e-06, "loss": 0.0059, "step": 3117 }, { "epoch": 5.93, "grad_norm": 0.51897794008255, "learning_rate": 7.508091600900622e-06, "loss": 0.0036, "step": 3118 }, { "epoch": 5.93, "grad_norm": 1.2290927171707153, "learning_rate": 7.502128747093855e-06, "loss": 0.017, "step": 3119 }, { "epoch": 5.93, "grad_norm": 0.767949104309082, "learning_rate": 7.496166840371905e-06, "loss": 0.0055, "step": 3120 }, { "epoch": 5.93, "grad_norm": 2.0354063510894775, "learning_rate": 7.490205882995263e-06, "loss": 0.0042, "step": 3121 }, { "epoch": 5.94, "grad_norm": 0.2829437851905823, "learning_rate": 7.4842458772240704e-06, "loss": 0.0022, "step": 3122 }, { "epoch": 5.94, "grad_norm": 0.8037561774253845, "learning_rate": 7.478286825318107e-06, "loss": 0.0058, "step": 3123 }, { "epoch": 5.94, "grad_norm": 0.9408490061759949, "learning_rate": 7.472328729536777e-06, "loss": 0.0091, "step": 3124 }, { "epoch": 5.94, "grad_norm": 0.6673359274864197, "learning_rate": 7.466371592139144e-06, "loss": 0.0031, "step": 3125 }, { "epoch": 5.94, "grad_norm": 0.6346657276153564, "learning_rate": 7.4604154153838926e-06, "loss": 0.0039, "step": 3126 }, { "epoch": 5.94, "grad_norm": 1.0313618183135986, "learning_rate": 7.454460201529348e-06, "loss": 0.0059, "step": 3127 }, { "epoch": 5.95, "grad_norm": 1.1979306936264038, "learning_rate": 7.448505952833467e-06, "loss": 0.0202, "step": 3128 }, { "epoch": 5.95, "grad_norm": 0.8682224154472351, "learning_rate": 7.442552671553851e-06, "loss": 0.0098, "step": 3129 }, { "epoch": 5.95, "grad_norm": 0.9332149624824524, "learning_rate": 7.436600359947717e-06, "loss": 0.0087, "step": 3130 }, { "epoch": 5.95, "grad_norm": 0.5240464210510254, "learning_rate": 7.430649020271928e-06, "loss": 0.0062, "step": 3131 }, { "epoch": 5.95, "grad_norm": 0.6132489442825317, "learning_rate": 7.424698654782983e-06, "loss": 0.0099, "step": 3132 }, { "epoch": 5.96, "grad_norm": 0.6921342611312866, "learning_rate": 7.418749265736988e-06, "loss": 0.0063, "step": 3133 }, { "epoch": 5.96, "grad_norm": 0.5971888303756714, "learning_rate": 7.412800855389706e-06, "loss": 0.0092, "step": 3134 }, { "epoch": 5.96, "grad_norm": 0.9010554552078247, "learning_rate": 7.406853425996517e-06, "loss": 0.0076, "step": 3135 }, { "epoch": 5.96, "grad_norm": 0.42928147315979004, "learning_rate": 7.40090697981242e-06, "loss": 0.0029, "step": 3136 }, { "epoch": 5.96, "grad_norm": 0.4487017095088959, "learning_rate": 7.394961519092059e-06, "loss": 0.0038, "step": 3137 }, { "epoch": 5.97, "grad_norm": 0.7924085259437561, "learning_rate": 7.389017046089694e-06, "loss": 0.017, "step": 3138 }, { "epoch": 5.97, "grad_norm": 0.3647805452346802, "learning_rate": 7.38307356305921e-06, "loss": 0.0031, "step": 3139 }, { "epoch": 5.97, "grad_norm": 1.0253535509109497, "learning_rate": 7.37713107225412e-06, "loss": 0.0144, "step": 3140 }, { "epoch": 5.97, "grad_norm": 0.7886066436767578, "learning_rate": 7.37118957592756e-06, "loss": 0.0057, "step": 3141 }, { "epoch": 5.97, "grad_norm": 1.5593057870864868, "learning_rate": 7.365249076332286e-06, "loss": 0.0045, "step": 3142 }, { "epoch": 5.98, "grad_norm": 0.46946409344673157, "learning_rate": 7.359309575720684e-06, "loss": 0.0059, "step": 3143 }, { "epoch": 5.98, "grad_norm": 1.5821799039840698, "learning_rate": 7.353371076344751e-06, "loss": 0.0097, "step": 3144 }, { "epoch": 5.98, "grad_norm": 1.0830352306365967, "learning_rate": 7.347433580456109e-06, "loss": 0.0087, "step": 3145 }, { "epoch": 5.98, "grad_norm": 0.7060543894767761, "learning_rate": 7.341497090306007e-06, "loss": 0.005, "step": 3146 }, { "epoch": 5.98, "grad_norm": 1.1333385705947876, "learning_rate": 7.335561608145295e-06, "loss": 0.0071, "step": 3147 }, { "epoch": 5.98, "grad_norm": 1.0445750951766968, "learning_rate": 7.32962713622446e-06, "loss": 0.009, "step": 3148 }, { "epoch": 5.99, "grad_norm": 0.5765860676765442, "learning_rate": 7.323693676793593e-06, "loss": 0.004, "step": 3149 }, { "epoch": 5.99, "grad_norm": 1.2417584657669067, "learning_rate": 7.317761232102408e-06, "loss": 0.0121, "step": 3150 }, { "epoch": 5.99, "grad_norm": 0.8023884892463684, "learning_rate": 7.311829804400226e-06, "loss": 0.0101, "step": 3151 }, { "epoch": 5.99, "grad_norm": 0.47061967849731445, "learning_rate": 7.305899395935996e-06, "loss": 0.0027, "step": 3152 }, { "epoch": 5.99, "grad_norm": 1.1256605386734009, "learning_rate": 7.299970008958264e-06, "loss": 0.0102, "step": 3153 }, { "epoch": 6.0, "grad_norm": 0.8763269186019897, "learning_rate": 7.2940416457152034e-06, "loss": 0.0051, "step": 3154 }, { "epoch": 6.0, "grad_norm": 0.7952509522438049, "learning_rate": 7.2881143084545945e-06, "loss": 0.0145, "step": 3155 }, { "epoch": 6.0, "grad_norm": 0.14507077634334564, "learning_rate": 7.282187999423819e-06, "loss": 0.0014, "step": 3156 }, { "epoch": 6.0, "grad_norm": 0.8913110494613647, "learning_rate": 7.2762627208698855e-06, "loss": 0.0057, "step": 3157 }, { "epoch": 6.0, "grad_norm": 0.7758547067642212, "learning_rate": 7.2703384750394035e-06, "loss": 0.0033, "step": 3158 }, { "epoch": 6.01, "grad_norm": 0.5844603776931763, "learning_rate": 7.264415264178584e-06, "loss": 0.0095, "step": 3159 }, { "epoch": 6.01, "grad_norm": 0.8660953640937805, "learning_rate": 7.258493090533258e-06, "loss": 0.0044, "step": 3160 }, { "epoch": 6.01, "grad_norm": 0.7499370574951172, "learning_rate": 7.2525719563488574e-06, "loss": 0.0094, "step": 3161 }, { "epoch": 6.01, "grad_norm": 0.361605703830719, "learning_rate": 7.2466518638704195e-06, "loss": 0.0029, "step": 3162 }, { "epoch": 6.01, "grad_norm": 0.7033261060714722, "learning_rate": 7.240732815342586e-06, "loss": 0.0037, "step": 3163 }, { "epoch": 6.02, "grad_norm": 0.542722225189209, "learning_rate": 7.234814813009607e-06, "loss": 0.0015, "step": 3164 }, { "epoch": 6.02, "grad_norm": 0.5977662205696106, "learning_rate": 7.228897859115329e-06, "loss": 0.0068, "step": 3165 }, { "epoch": 6.02, "grad_norm": 1.0372097492218018, "learning_rate": 7.222981955903212e-06, "loss": 0.0033, "step": 3166 }, { "epoch": 6.02, "grad_norm": 0.4871741831302643, "learning_rate": 7.217067105616303e-06, "loss": 0.0083, "step": 3167 }, { "epoch": 6.02, "grad_norm": 1.21262526512146, "learning_rate": 7.211153310497261e-06, "loss": 0.0087, "step": 3168 }, { "epoch": 6.02, "grad_norm": 0.5506594181060791, "learning_rate": 7.2052405727883465e-06, "loss": 0.0028, "step": 3169 }, { "epoch": 6.03, "grad_norm": 0.3137950599193573, "learning_rate": 7.199328894731405e-06, "loss": 0.0041, "step": 3170 }, { "epoch": 6.03, "grad_norm": 0.5221046209335327, "learning_rate": 7.193418278567897e-06, "loss": 0.0047, "step": 3171 }, { "epoch": 6.03, "grad_norm": 0.8224542737007141, "learning_rate": 7.187508726538868e-06, "loss": 0.009, "step": 3172 }, { "epoch": 6.03, "grad_norm": 0.5343019962310791, "learning_rate": 7.181600240884965e-06, "loss": 0.0035, "step": 3173 }, { "epoch": 6.03, "grad_norm": 0.5198337435722351, "learning_rate": 7.17569282384643e-06, "loss": 0.003, "step": 3174 }, { "epoch": 6.04, "grad_norm": 0.5791093111038208, "learning_rate": 7.169786477663108e-06, "loss": 0.0053, "step": 3175 }, { "epoch": 6.04, "grad_norm": 0.3605826497077942, "learning_rate": 7.163881204574416e-06, "loss": 0.0011, "step": 3176 }, { "epoch": 6.04, "grad_norm": 1.037620186805725, "learning_rate": 7.15797700681939e-06, "loss": 0.0082, "step": 3177 }, { "epoch": 6.04, "grad_norm": 0.3025871217250824, "learning_rate": 7.152073886636644e-06, "loss": 0.0025, "step": 3178 }, { "epoch": 6.04, "grad_norm": 0.3360064923763275, "learning_rate": 7.146171846264383e-06, "loss": 0.0022, "step": 3179 }, { "epoch": 6.05, "grad_norm": 0.4801334738731384, "learning_rate": 7.140270887940406e-06, "loss": 0.0039, "step": 3180 }, { "epoch": 6.05, "grad_norm": 0.6673445701599121, "learning_rate": 7.134371013902106e-06, "loss": 0.0036, "step": 3181 }, { "epoch": 6.05, "grad_norm": 0.2897481918334961, "learning_rate": 7.128472226386455e-06, "loss": 0.0021, "step": 3182 }, { "epoch": 6.05, "grad_norm": 0.21410073339939117, "learning_rate": 7.12257452763002e-06, "loss": 0.0014, "step": 3183 }, { "epoch": 6.05, "grad_norm": 0.3363342881202698, "learning_rate": 7.116677919868955e-06, "loss": 0.0017, "step": 3184 }, { "epoch": 6.06, "grad_norm": 0.6853935122489929, "learning_rate": 7.110782405338998e-06, "loss": 0.0043, "step": 3185 }, { "epoch": 6.06, "grad_norm": 0.521338939666748, "learning_rate": 7.10488798627547e-06, "loss": 0.006, "step": 3186 }, { "epoch": 6.06, "grad_norm": 0.528075098991394, "learning_rate": 7.0989946649132864e-06, "loss": 0.003, "step": 3187 }, { "epoch": 6.06, "grad_norm": 0.9496704339981079, "learning_rate": 7.0931024434869325e-06, "loss": 0.0233, "step": 3188 }, { "epoch": 6.06, "grad_norm": 1.3903065919876099, "learning_rate": 7.087211324230492e-06, "loss": 0.0087, "step": 3189 }, { "epoch": 6.06, "grad_norm": 0.9544508457183838, "learning_rate": 7.0813213093776145e-06, "loss": 0.0054, "step": 3190 }, { "epoch": 6.07, "grad_norm": 0.8110653162002563, "learning_rate": 7.075432401161541e-06, "loss": 0.0067, "step": 3191 }, { "epoch": 6.07, "grad_norm": 0.7609295845031738, "learning_rate": 7.069544601815099e-06, "loss": 0.0113, "step": 3192 }, { "epoch": 6.07, "grad_norm": 0.6286706924438477, "learning_rate": 7.063657913570678e-06, "loss": 0.0031, "step": 3193 }, { "epoch": 6.07, "grad_norm": 0.46832790970802307, "learning_rate": 7.05777233866026e-06, "loss": 0.0066, "step": 3194 }, { "epoch": 6.07, "grad_norm": 1.2711615562438965, "learning_rate": 7.0518878793154e-06, "loss": 0.0074, "step": 3195 }, { "epoch": 6.08, "grad_norm": 0.6953765153884888, "learning_rate": 7.04600453776723e-06, "loss": 0.005, "step": 3196 }, { "epoch": 6.08, "grad_norm": 0.781500518321991, "learning_rate": 7.0401223162464584e-06, "loss": 0.0025, "step": 3197 }, { "epoch": 6.08, "grad_norm": 0.9398418664932251, "learning_rate": 7.034241216983373e-06, "loss": 0.0081, "step": 3198 }, { "epoch": 6.08, "grad_norm": 0.36833953857421875, "learning_rate": 7.0283612422078264e-06, "loss": 0.0016, "step": 3199 }, { "epoch": 6.08, "grad_norm": 0.7052181959152222, "learning_rate": 7.022482394149253e-06, "loss": 0.0044, "step": 3200 }, { "epoch": 6.08, "eval_blimp_filtered_avg": 0.7319402985074627, "eval_blimp_filtered_std": 0.004865180486485855, "step": 3200 }, { "epoch": 6.08, "eval_blimp_supplement_avg": 0.8017241379310345, "eval_blimp_supplement_std": 0.01733901004783174, "step": 3200 }, { "epoch": 6.08, "eval_vqa_filtered_avg": 0.34, "eval_vqa_filtered_std": 0.04760952285695236, "step": 3200 }, { "epoch": 6.08, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 3200 }, { "epoch": 6.09, "grad_norm": 0.5271984934806824, "learning_rate": 7.016604675036665e-06, "loss": 0.0037, "step": 3201 }, { "epoch": 6.09, "grad_norm": 1.070693016052246, "learning_rate": 7.010728087098627e-06, "loss": 0.0048, "step": 3202 }, { "epoch": 6.09, "grad_norm": 0.7874064445495605, "learning_rate": 7.0048526325632944e-06, "loss": 0.0111, "step": 3203 }, { "epoch": 6.09, "grad_norm": 0.23603397607803345, "learning_rate": 6.998978313658391e-06, "loss": 0.0019, "step": 3204 }, { "epoch": 6.09, "grad_norm": 0.23850268125534058, "learning_rate": 6.993105132611192e-06, "loss": 0.0022, "step": 3205 }, { "epoch": 6.1, "grad_norm": 0.7210130095481873, "learning_rate": 6.9872330916485635e-06, "loss": 0.0041, "step": 3206 }, { "epoch": 6.1, "grad_norm": 0.9449563026428223, "learning_rate": 6.9813621929969254e-06, "loss": 0.0032, "step": 3207 }, { "epoch": 6.1, "grad_norm": 1.117221713066101, "learning_rate": 6.97549243888227e-06, "loss": 0.0058, "step": 3208 }, { "epoch": 6.1, "grad_norm": 0.9108210802078247, "learning_rate": 6.969623831530153e-06, "loss": 0.0087, "step": 3209 }, { "epoch": 6.1, "grad_norm": 0.7961142659187317, "learning_rate": 6.9637563731657e-06, "loss": 0.0096, "step": 3210 }, { "epoch": 6.1, "grad_norm": 1.294947862625122, "learning_rate": 6.9578900660135916e-06, "loss": 0.0068, "step": 3211 }, { "epoch": 6.11, "grad_norm": 0.6410894989967346, "learning_rate": 6.952024912298086e-06, "loss": 0.0128, "step": 3212 }, { "epoch": 6.11, "grad_norm": 0.3232981562614441, "learning_rate": 6.9461609142429875e-06, "loss": 0.002, "step": 3213 }, { "epoch": 6.11, "grad_norm": 0.5209748148918152, "learning_rate": 6.940298074071675e-06, "loss": 0.0018, "step": 3214 }, { "epoch": 6.11, "grad_norm": 0.5952796339988708, "learning_rate": 6.934436394007088e-06, "loss": 0.0096, "step": 3215 }, { "epoch": 6.11, "grad_norm": 0.4597105383872986, "learning_rate": 6.928575876271714e-06, "loss": 0.0032, "step": 3216 }, { "epoch": 6.12, "grad_norm": 0.6348119378089905, "learning_rate": 6.922716523087613e-06, "loss": 0.0069, "step": 3217 }, { "epoch": 6.12, "grad_norm": 1.099178671836853, "learning_rate": 6.9168583366764e-06, "loss": 0.0073, "step": 3218 }, { "epoch": 6.12, "grad_norm": 0.7787715792655945, "learning_rate": 6.911001319259241e-06, "loss": 0.0078, "step": 3219 }, { "epoch": 6.12, "grad_norm": 0.5945113897323608, "learning_rate": 6.905145473056866e-06, "loss": 0.0045, "step": 3220 }, { "epoch": 6.12, "grad_norm": 0.508833646774292, "learning_rate": 6.899290800289562e-06, "loss": 0.0042, "step": 3221 }, { "epoch": 6.13, "grad_norm": 0.48504704236984253, "learning_rate": 6.893437303177163e-06, "loss": 0.0028, "step": 3222 }, { "epoch": 6.13, "grad_norm": 0.4830033481121063, "learning_rate": 6.887584983939063e-06, "loss": 0.003, "step": 3223 }, { "epoch": 6.13, "grad_norm": 0.9626103639602661, "learning_rate": 6.881733844794214e-06, "loss": 0.0061, "step": 3224 }, { "epoch": 6.13, "grad_norm": 1.016807198524475, "learning_rate": 6.875883887961106e-06, "loss": 0.0112, "step": 3225 }, { "epoch": 6.13, "grad_norm": 0.2027806043624878, "learning_rate": 6.870035115657796e-06, "loss": 0.0032, "step": 3226 }, { "epoch": 6.13, "grad_norm": 0.3546462953090668, "learning_rate": 6.864187530101887e-06, "loss": 0.0021, "step": 3227 }, { "epoch": 6.14, "grad_norm": 0.4282340705394745, "learning_rate": 6.858341133510525e-06, "loss": 0.0021, "step": 3228 }, { "epoch": 6.14, "grad_norm": 0.5709601044654846, "learning_rate": 6.852495928100415e-06, "loss": 0.0041, "step": 3229 }, { "epoch": 6.14, "grad_norm": 0.8997888565063477, "learning_rate": 6.846651916087806e-06, "loss": 0.0057, "step": 3230 }, { "epoch": 6.14, "grad_norm": 0.5830583572387695, "learning_rate": 6.840809099688492e-06, "loss": 0.0063, "step": 3231 }, { "epoch": 6.14, "grad_norm": 0.6774285435676575, "learning_rate": 6.834967481117818e-06, "loss": 0.0058, "step": 3232 }, { "epoch": 6.15, "grad_norm": 0.421989768743515, "learning_rate": 6.829127062590677e-06, "loss": 0.0045, "step": 3233 }, { "epoch": 6.15, "grad_norm": 0.41865524649620056, "learning_rate": 6.823287846321496e-06, "loss": 0.0022, "step": 3234 }, { "epoch": 6.15, "grad_norm": 0.42181557416915894, "learning_rate": 6.817449834524256e-06, "loss": 0.0065, "step": 3235 }, { "epoch": 6.15, "grad_norm": 0.4883459806442261, "learning_rate": 6.811613029412485e-06, "loss": 0.0044, "step": 3236 }, { "epoch": 6.15, "grad_norm": 0.6429097652435303, "learning_rate": 6.805777433199237e-06, "loss": 0.0037, "step": 3237 }, { "epoch": 6.16, "grad_norm": 1.2976914644241333, "learning_rate": 6.799943048097129e-06, "loss": 0.0067, "step": 3238 }, { "epoch": 6.16, "grad_norm": 0.37791022658348083, "learning_rate": 6.794109876318297e-06, "loss": 0.0013, "step": 3239 }, { "epoch": 6.16, "grad_norm": 0.8990184664726257, "learning_rate": 6.788277920074433e-06, "loss": 0.0064, "step": 3240 }, { "epoch": 6.16, "grad_norm": 0.5203665494918823, "learning_rate": 6.782447181576764e-06, "loss": 0.0033, "step": 3241 }, { "epoch": 6.16, "grad_norm": 0.32252073287963867, "learning_rate": 6.7766176630360504e-06, "loss": 0.0031, "step": 3242 }, { "epoch": 6.17, "grad_norm": 0.6596770882606506, "learning_rate": 6.7707893666625934e-06, "loss": 0.0048, "step": 3243 }, { "epoch": 6.17, "grad_norm": 0.16444234549999237, "learning_rate": 6.764962294666237e-06, "loss": 0.0009, "step": 3244 }, { "epoch": 6.17, "grad_norm": 0.9420425295829773, "learning_rate": 6.759136449256348e-06, "loss": 0.0126, "step": 3245 }, { "epoch": 6.17, "grad_norm": 1.307861566543579, "learning_rate": 6.7533118326418375e-06, "loss": 0.009, "step": 3246 }, { "epoch": 6.17, "grad_norm": 0.8453923463821411, "learning_rate": 6.747488447031154e-06, "loss": 0.0058, "step": 3247 }, { "epoch": 6.17, "grad_norm": 0.7734345197677612, "learning_rate": 6.741666294632263e-06, "loss": 0.0082, "step": 3248 }, { "epoch": 6.18, "grad_norm": 0.520740807056427, "learning_rate": 6.735845377652679e-06, "loss": 0.0036, "step": 3249 }, { "epoch": 6.18, "grad_norm": 0.2230827510356903, "learning_rate": 6.730025698299447e-06, "loss": 0.0017, "step": 3250 }, { "epoch": 6.18, "grad_norm": 1.0240176916122437, "learning_rate": 6.724207258779128e-06, "loss": 0.0114, "step": 3251 }, { "epoch": 6.18, "grad_norm": 0.7838350534439087, "learning_rate": 6.718390061297829e-06, "loss": 0.0041, "step": 3252 }, { "epoch": 6.18, "grad_norm": 0.271128386259079, "learning_rate": 6.712574108061178e-06, "loss": 0.0018, "step": 3253 }, { "epoch": 6.19, "grad_norm": 0.5101606249809265, "learning_rate": 6.706759401274334e-06, "loss": 0.0061, "step": 3254 }, { "epoch": 6.19, "grad_norm": 0.8591563105583191, "learning_rate": 6.700945943141981e-06, "loss": 0.0071, "step": 3255 }, { "epoch": 6.19, "grad_norm": 0.4737778604030609, "learning_rate": 6.6951337358683356e-06, "loss": 0.0027, "step": 3256 }, { "epoch": 6.19, "grad_norm": 0.8167467713356018, "learning_rate": 6.689322781657126e-06, "loss": 0.0051, "step": 3257 }, { "epoch": 6.19, "grad_norm": 0.6879462003707886, "learning_rate": 6.683513082711622e-06, "loss": 0.0096, "step": 3258 }, { "epoch": 6.2, "grad_norm": 0.25046834349632263, "learning_rate": 6.6777046412346135e-06, "loss": 0.0018, "step": 3259 }, { "epoch": 6.2, "grad_norm": 0.7003083229064941, "learning_rate": 6.671897459428403e-06, "loss": 0.0029, "step": 3260 }, { "epoch": 6.2, "grad_norm": 0.8384617567062378, "learning_rate": 6.666091539494828e-06, "loss": 0.004, "step": 3261 }, { "epoch": 6.2, "grad_norm": 0.3872980773448944, "learning_rate": 6.660286883635236e-06, "loss": 0.0026, "step": 3262 }, { "epoch": 6.2, "grad_norm": 0.09376481175422668, "learning_rate": 6.6544834940505095e-06, "loss": 0.0007, "step": 3263 }, { "epoch": 6.21, "grad_norm": 0.9434680342674255, "learning_rate": 6.6486813729410385e-06, "loss": 0.0137, "step": 3264 }, { "epoch": 6.21, "grad_norm": 1.1336722373962402, "learning_rate": 6.642880522506737e-06, "loss": 0.0093, "step": 3265 }, { "epoch": 6.21, "grad_norm": 0.280961275100708, "learning_rate": 6.637080944947036e-06, "loss": 0.0034, "step": 3266 }, { "epoch": 6.21, "grad_norm": 0.43590980768203735, "learning_rate": 6.63128264246089e-06, "loss": 0.0049, "step": 3267 }, { "epoch": 6.21, "grad_norm": 0.5017274022102356, "learning_rate": 6.625485617246756e-06, "loss": 0.0066, "step": 3268 }, { "epoch": 6.21, "grad_norm": 0.7454277873039246, "learning_rate": 6.619689871502619e-06, "loss": 0.0045, "step": 3269 }, { "epoch": 6.22, "grad_norm": 1.2962037324905396, "learning_rate": 6.613895407425982e-06, "loss": 0.0042, "step": 3270 }, { "epoch": 6.22, "grad_norm": 0.3235902488231659, "learning_rate": 6.608102227213844e-06, "loss": 0.001, "step": 3271 }, { "epoch": 6.22, "grad_norm": 0.6383742094039917, "learning_rate": 6.602310333062736e-06, "loss": 0.0035, "step": 3272 }, { "epoch": 6.22, "grad_norm": 0.4341609477996826, "learning_rate": 6.596519727168693e-06, "loss": 0.0025, "step": 3273 }, { "epoch": 6.22, "grad_norm": 0.7930417656898499, "learning_rate": 6.59073041172726e-06, "loss": 0.0094, "step": 3274 }, { "epoch": 6.23, "grad_norm": 0.7220954895019531, "learning_rate": 6.5849423889334975e-06, "loss": 0.0151, "step": 3275 }, { "epoch": 6.23, "grad_norm": 0.3093663156032562, "learning_rate": 6.579155660981973e-06, "loss": 0.0035, "step": 3276 }, { "epoch": 6.23, "grad_norm": 0.5116583704948425, "learning_rate": 6.573370230066763e-06, "loss": 0.0044, "step": 3277 }, { "epoch": 6.23, "grad_norm": 0.6527223587036133, "learning_rate": 6.567586098381451e-06, "loss": 0.017, "step": 3278 }, { "epoch": 6.23, "grad_norm": 0.6700079441070557, "learning_rate": 6.561803268119137e-06, "loss": 0.0052, "step": 3279 }, { "epoch": 6.24, "grad_norm": 0.5898664593696594, "learning_rate": 6.55602174147241e-06, "loss": 0.0033, "step": 3280 }, { "epoch": 6.24, "grad_norm": 0.23147790133953094, "learning_rate": 6.55024152063338e-06, "loss": 0.0011, "step": 3281 }, { "epoch": 6.24, "grad_norm": 0.3522895872592926, "learning_rate": 6.5444626077936624e-06, "loss": 0.0028, "step": 3282 }, { "epoch": 6.24, "grad_norm": 0.7811453342437744, "learning_rate": 6.538685005144361e-06, "loss": 0.004, "step": 3283 }, { "epoch": 6.24, "grad_norm": 0.42742660641670227, "learning_rate": 6.532908714876099e-06, "loss": 0.0021, "step": 3284 }, { "epoch": 6.25, "grad_norm": 0.3927798271179199, "learning_rate": 6.527133739178997e-06, "loss": 0.0038, "step": 3285 }, { "epoch": 6.25, "grad_norm": 0.31797322630882263, "learning_rate": 6.521360080242672e-06, "loss": 0.0015, "step": 3286 }, { "epoch": 6.25, "grad_norm": 1.3531923294067383, "learning_rate": 6.515587740256249e-06, "loss": 0.0045, "step": 3287 }, { "epoch": 6.25, "grad_norm": 0.45768141746520996, "learning_rate": 6.5098167214083486e-06, "loss": 0.0047, "step": 3288 }, { "epoch": 6.25, "grad_norm": 0.39065051078796387, "learning_rate": 6.5040470258870905e-06, "loss": 0.0026, "step": 3289 }, { "epoch": 6.25, "grad_norm": 0.2550913989543915, "learning_rate": 6.498278655880098e-06, "loss": 0.0014, "step": 3290 }, { "epoch": 6.26, "grad_norm": 0.23469240963459015, "learning_rate": 6.492511613574481e-06, "loss": 0.002, "step": 3291 }, { "epoch": 6.26, "grad_norm": 0.2839074730873108, "learning_rate": 6.4867459011568566e-06, "loss": 0.0015, "step": 3292 }, { "epoch": 6.26, "grad_norm": 0.6605426669120789, "learning_rate": 6.480981520813339e-06, "loss": 0.0021, "step": 3293 }, { "epoch": 6.26, "grad_norm": 0.9431490302085876, "learning_rate": 6.475218474729521e-06, "loss": 0.0066, "step": 3294 }, { "epoch": 6.26, "grad_norm": 0.60699063539505, "learning_rate": 6.469456765090507e-06, "loss": 0.005, "step": 3295 }, { "epoch": 6.27, "grad_norm": 0.5263561606407166, "learning_rate": 6.463696394080892e-06, "loss": 0.003, "step": 3296 }, { "epoch": 6.27, "grad_norm": 0.31413111090660095, "learning_rate": 6.457937363884752e-06, "loss": 0.0014, "step": 3297 }, { "epoch": 6.27, "grad_norm": 0.37383705377578735, "learning_rate": 6.452179676685666e-06, "loss": 0.0057, "step": 3298 }, { "epoch": 6.27, "grad_norm": 0.34922704100608826, "learning_rate": 6.446423334666701e-06, "loss": 0.0035, "step": 3299 }, { "epoch": 6.27, "grad_norm": 1.4939842224121094, "learning_rate": 6.440668340010412e-06, "loss": 0.0193, "step": 3300 }, { "epoch": 6.27, "eval_blimp_filtered_avg": 0.7328358208955223, "eval_blimp_filtered_std": 0.004853928608547513, "step": 3300 }, { "epoch": 6.27, "eval_blimp_supplement_avg": 0.7974137931034483, "eval_blimp_supplement_std": 0.017425449861332327, "step": 3300 }, { "epoch": 6.27, "eval_vqa_filtered_avg": 0.37, "eval_vqa_filtered_std": 0.048523658709391, "step": 3300 }, { "epoch": 6.27, "eval_winoground_filtered_avg": 0.55, "eval_winoground_filtered_std": 0.05, "step": 3300 }, { "epoch": 6.28, "grad_norm": 0.220005065202713, "learning_rate": 6.434914694898842e-06, "loss": 0.0024, "step": 3301 }, { "epoch": 6.28, "grad_norm": 1.3146708011627197, "learning_rate": 6.42916240151353e-06, "loss": 0.0066, "step": 3302 }, { "epoch": 6.28, "grad_norm": 0.30849045515060425, "learning_rate": 6.4234114620354895e-06, "loss": 0.0021, "step": 3303 }, { "epoch": 6.28, "grad_norm": 2.0074832439422607, "learning_rate": 6.417661878645234e-06, "loss": 0.0104, "step": 3304 }, { "epoch": 6.28, "grad_norm": 0.5770412087440491, "learning_rate": 6.411913653522756e-06, "loss": 0.0035, "step": 3305 }, { "epoch": 6.29, "grad_norm": 0.4483131766319275, "learning_rate": 6.406166788847532e-06, "loss": 0.0048, "step": 3306 }, { "epoch": 6.29, "grad_norm": 0.9197181463241577, "learning_rate": 6.400421286798526e-06, "loss": 0.0044, "step": 3307 }, { "epoch": 6.29, "grad_norm": 0.8624308705329895, "learning_rate": 6.394677149554188e-06, "loss": 0.0063, "step": 3308 }, { "epoch": 6.29, "grad_norm": 1.0219485759735107, "learning_rate": 6.38893437929244e-06, "loss": 0.0029, "step": 3309 }, { "epoch": 6.29, "grad_norm": 0.6195585131645203, "learning_rate": 6.383192978190696e-06, "loss": 0.0032, "step": 3310 }, { "epoch": 6.29, "grad_norm": 0.5224176645278931, "learning_rate": 6.3774529484258445e-06, "loss": 0.005, "step": 3311 }, { "epoch": 6.3, "grad_norm": 0.3973945379257202, "learning_rate": 6.371714292174257e-06, "loss": 0.0038, "step": 3312 }, { "epoch": 6.3, "grad_norm": 0.5294157862663269, "learning_rate": 6.36597701161179e-06, "loss": 0.004, "step": 3313 }, { "epoch": 6.3, "grad_norm": 0.17699307203292847, "learning_rate": 6.360241108913763e-06, "loss": 0.0012, "step": 3314 }, { "epoch": 6.3, "grad_norm": 0.62530517578125, "learning_rate": 6.354506586254985e-06, "loss": 0.0118, "step": 3315 }, { "epoch": 6.3, "grad_norm": 0.45826396346092224, "learning_rate": 6.348773445809747e-06, "loss": 0.0051, "step": 3316 }, { "epoch": 6.31, "grad_norm": 0.3284226357936859, "learning_rate": 6.343041689751798e-06, "loss": 0.0017, "step": 3317 }, { "epoch": 6.31, "grad_norm": 1.1893086433410645, "learning_rate": 6.337311320254376e-06, "loss": 0.0038, "step": 3318 }, { "epoch": 6.31, "grad_norm": 0.3832707405090332, "learning_rate": 6.3315823394901924e-06, "loss": 0.0026, "step": 3319 }, { "epoch": 6.31, "grad_norm": 0.22655059397220612, "learning_rate": 6.325854749631424e-06, "loss": 0.0016, "step": 3320 }, { "epoch": 6.31, "grad_norm": 0.5284161567687988, "learning_rate": 6.320128552849728e-06, "loss": 0.0046, "step": 3321 }, { "epoch": 6.32, "grad_norm": 0.43661558628082275, "learning_rate": 6.314403751316232e-06, "loss": 0.0037, "step": 3322 }, { "epoch": 6.32, "grad_norm": 0.5180267691612244, "learning_rate": 6.30868034720153e-06, "loss": 0.0066, "step": 3323 }, { "epoch": 6.32, "grad_norm": 0.5642206072807312, "learning_rate": 6.302958342675689e-06, "loss": 0.0059, "step": 3324 }, { "epoch": 6.32, "grad_norm": 0.7281386852264404, "learning_rate": 6.297237739908253e-06, "loss": 0.0043, "step": 3325 }, { "epoch": 6.32, "grad_norm": 1.3194490671157837, "learning_rate": 6.2915185410682175e-06, "loss": 0.0167, "step": 3326 }, { "epoch": 6.33, "grad_norm": 1.2378895282745361, "learning_rate": 6.285800748324061e-06, "loss": 0.0091, "step": 3327 }, { "epoch": 6.33, "grad_norm": 1.2544723749160767, "learning_rate": 6.280084363843726e-06, "loss": 0.0108, "step": 3328 }, { "epoch": 6.33, "grad_norm": 0.2614884674549103, "learning_rate": 6.274369389794612e-06, "loss": 0.0023, "step": 3329 }, { "epoch": 6.33, "grad_norm": 0.5202069878578186, "learning_rate": 6.268655828343591e-06, "loss": 0.0039, "step": 3330 }, { "epoch": 6.33, "grad_norm": 0.16651323437690735, "learning_rate": 6.262943681657006e-06, "loss": 0.0015, "step": 3331 }, { "epoch": 6.33, "grad_norm": 0.7302066087722778, "learning_rate": 6.257232951900649e-06, "loss": 0.0078, "step": 3332 }, { "epoch": 6.34, "grad_norm": 0.6394038200378418, "learning_rate": 6.251523641239781e-06, "loss": 0.0034, "step": 3333 }, { "epoch": 6.34, "grad_norm": 0.10211703926324844, "learning_rate": 6.245815751839134e-06, "loss": 0.0006, "step": 3334 }, { "epoch": 6.34, "grad_norm": 0.28105610609054565, "learning_rate": 6.240109285862882e-06, "loss": 0.0021, "step": 3335 }, { "epoch": 6.34, "grad_norm": 0.3842293620109558, "learning_rate": 6.23440424547468e-06, "loss": 0.0032, "step": 3336 }, { "epoch": 6.34, "grad_norm": 1.2689810991287231, "learning_rate": 6.228700632837624e-06, "loss": 0.01, "step": 3337 }, { "epoch": 6.35, "grad_norm": 0.505107581615448, "learning_rate": 6.222998450114283e-06, "loss": 0.0026, "step": 3338 }, { "epoch": 6.35, "grad_norm": 0.3614455461502075, "learning_rate": 6.21729769946668e-06, "loss": 0.0018, "step": 3339 }, { "epoch": 6.35, "grad_norm": 1.0001804828643799, "learning_rate": 6.211598383056288e-06, "loss": 0.0094, "step": 3340 }, { "epoch": 6.35, "grad_norm": 0.8546343445777893, "learning_rate": 6.2059005030440415e-06, "loss": 0.0141, "step": 3341 }, { "epoch": 6.35, "grad_norm": 0.44139695167541504, "learning_rate": 6.200204061590337e-06, "loss": 0.0044, "step": 3342 }, { "epoch": 6.36, "grad_norm": 1.0915659666061401, "learning_rate": 6.19450906085501e-06, "loss": 0.0057, "step": 3343 }, { "epoch": 6.36, "grad_norm": 0.415785014629364, "learning_rate": 6.188815502997367e-06, "loss": 0.0017, "step": 3344 }, { "epoch": 6.36, "grad_norm": 0.8399573564529419, "learning_rate": 6.183123390176154e-06, "loss": 0.0062, "step": 3345 }, { "epoch": 6.36, "grad_norm": 1.1203078031539917, "learning_rate": 6.1774327245495745e-06, "loss": 0.0042, "step": 3346 }, { "epoch": 6.36, "grad_norm": 0.7238085865974426, "learning_rate": 6.171743508275283e-06, "loss": 0.0022, "step": 3347 }, { "epoch": 6.37, "grad_norm": 0.9114409685134888, "learning_rate": 6.166055743510388e-06, "loss": 0.009, "step": 3348 }, { "epoch": 6.37, "grad_norm": 0.7505013346672058, "learning_rate": 6.160369432411438e-06, "loss": 0.006, "step": 3349 }, { "epoch": 6.37, "grad_norm": 0.3875947594642639, "learning_rate": 6.15468457713444e-06, "loss": 0.0019, "step": 3350 }, { "epoch": 6.37, "grad_norm": 0.6880106925964355, "learning_rate": 6.1490011798348484e-06, "loss": 0.0071, "step": 3351 }, { "epoch": 6.37, "grad_norm": 0.7879195809364319, "learning_rate": 6.143319242667554e-06, "loss": 0.0073, "step": 3352 }, { "epoch": 6.37, "grad_norm": 0.36364808678627014, "learning_rate": 6.137638767786906e-06, "loss": 0.0021, "step": 3353 }, { "epoch": 6.38, "grad_norm": 0.6069850325584412, "learning_rate": 6.131959757346699e-06, "loss": 0.0087, "step": 3354 }, { "epoch": 6.38, "grad_norm": 0.14508424699306488, "learning_rate": 6.1262822135001635e-06, "loss": 0.0019, "step": 3355 }, { "epoch": 6.38, "grad_norm": 0.6462864279747009, "learning_rate": 6.120606138399977e-06, "loss": 0.0035, "step": 3356 }, { "epoch": 6.38, "grad_norm": 0.5149251818656921, "learning_rate": 6.114931534198268e-06, "loss": 0.0044, "step": 3357 }, { "epoch": 6.38, "grad_norm": 0.21136066317558289, "learning_rate": 6.109258403046594e-06, "loss": 0.0023, "step": 3358 }, { "epoch": 6.39, "grad_norm": 0.5575568675994873, "learning_rate": 6.103586747095965e-06, "loss": 0.0026, "step": 3359 }, { "epoch": 6.39, "grad_norm": 0.8316485285758972, "learning_rate": 6.097916568496831e-06, "loss": 0.0048, "step": 3360 }, { "epoch": 6.39, "grad_norm": 0.6549404263496399, "learning_rate": 6.0922478693990726e-06, "loss": 0.004, "step": 3361 }, { "epoch": 6.39, "grad_norm": 0.4045282304286957, "learning_rate": 6.086580651952021e-06, "loss": 0.005, "step": 3362 }, { "epoch": 6.39, "grad_norm": 0.7397811412811279, "learning_rate": 6.080914918304433e-06, "loss": 0.0058, "step": 3363 }, { "epoch": 6.4, "grad_norm": 0.24607554078102112, "learning_rate": 6.075250670604513e-06, "loss": 0.0031, "step": 3364 }, { "epoch": 6.4, "grad_norm": 1.1355741024017334, "learning_rate": 6.069587910999905e-06, "loss": 0.0096, "step": 3365 }, { "epoch": 6.4, "grad_norm": 0.7466241121292114, "learning_rate": 6.063926641637674e-06, "loss": 0.0042, "step": 3366 }, { "epoch": 6.4, "grad_norm": 0.6461073756217957, "learning_rate": 6.058266864664335e-06, "loss": 0.0066, "step": 3367 }, { "epoch": 6.4, "grad_norm": 0.8816578984260559, "learning_rate": 6.052608582225828e-06, "loss": 0.0023, "step": 3368 }, { "epoch": 6.4, "grad_norm": 0.47599801421165466, "learning_rate": 6.046951796467527e-06, "loss": 0.0028, "step": 3369 }, { "epoch": 6.41, "grad_norm": 0.5763357281684875, "learning_rate": 6.041296509534242e-06, "loss": 0.01, "step": 3370 }, { "epoch": 6.41, "grad_norm": 0.2781674563884735, "learning_rate": 6.035642723570219e-06, "loss": 0.001, "step": 3371 }, { "epoch": 6.41, "grad_norm": 0.11664582788944244, "learning_rate": 6.02999044071912e-06, "loss": 0.0011, "step": 3372 }, { "epoch": 6.41, "grad_norm": 0.6108739376068115, "learning_rate": 6.02433966312405e-06, "loss": 0.004, "step": 3373 }, { "epoch": 6.41, "grad_norm": 0.8571146130561829, "learning_rate": 6.018690392927546e-06, "loss": 0.0106, "step": 3374 }, { "epoch": 6.42, "grad_norm": 0.5626128315925598, "learning_rate": 6.0130426322715566e-06, "loss": 0.003, "step": 3375 }, { "epoch": 6.42, "grad_norm": 0.7364801168441772, "learning_rate": 6.007396383297473e-06, "loss": 0.0039, "step": 3376 }, { "epoch": 6.42, "grad_norm": 0.15239077806472778, "learning_rate": 6.001751648146115e-06, "loss": 0.0014, "step": 3377 }, { "epoch": 6.42, "grad_norm": 0.29629600048065186, "learning_rate": 5.996108428957713e-06, "loss": 0.0016, "step": 3378 }, { "epoch": 6.42, "grad_norm": 0.47156503796577454, "learning_rate": 5.990466727871933e-06, "loss": 0.0046, "step": 3379 }, { "epoch": 6.43, "grad_norm": 0.07441487908363342, "learning_rate": 5.984826547027871e-06, "loss": 0.0005, "step": 3380 }, { "epoch": 6.43, "grad_norm": 0.6310750246047974, "learning_rate": 5.979187888564032e-06, "loss": 0.0055, "step": 3381 }, { "epoch": 6.43, "grad_norm": 0.8223636150360107, "learning_rate": 5.973550754618353e-06, "loss": 0.0032, "step": 3382 }, { "epoch": 6.43, "grad_norm": 0.14533443748950958, "learning_rate": 5.967915147328201e-06, "loss": 0.0014, "step": 3383 }, { "epoch": 6.43, "grad_norm": 0.4316496253013611, "learning_rate": 5.9622810688303405e-06, "loss": 0.0016, "step": 3384 }, { "epoch": 6.44, "grad_norm": 0.6907292008399963, "learning_rate": 5.956648521260979e-06, "loss": 0.0107, "step": 3385 }, { "epoch": 6.44, "grad_norm": 0.6452047824859619, "learning_rate": 5.951017506755732e-06, "loss": 0.0039, "step": 3386 }, { "epoch": 6.44, "grad_norm": 0.5913615226745605, "learning_rate": 5.945388027449637e-06, "loss": 0.0024, "step": 3387 }, { "epoch": 6.44, "grad_norm": 0.3298959732055664, "learning_rate": 5.939760085477155e-06, "loss": 0.0012, "step": 3388 }, { "epoch": 6.44, "grad_norm": 0.40552306175231934, "learning_rate": 5.9341336829721495e-06, "loss": 0.0019, "step": 3389 }, { "epoch": 6.44, "grad_norm": 0.9217585921287537, "learning_rate": 5.928508822067914e-06, "loss": 0.0031, "step": 3390 }, { "epoch": 6.45, "grad_norm": 0.3993256092071533, "learning_rate": 5.922885504897153e-06, "loss": 0.0023, "step": 3391 }, { "epoch": 6.45, "grad_norm": 1.579289197921753, "learning_rate": 5.917263733591983e-06, "loss": 0.0106, "step": 3392 }, { "epoch": 6.45, "grad_norm": 0.46546947956085205, "learning_rate": 5.911643510283937e-06, "loss": 0.0034, "step": 3393 }, { "epoch": 6.45, "grad_norm": 0.5204914212226868, "learning_rate": 5.906024837103965e-06, "loss": 0.0047, "step": 3394 }, { "epoch": 6.45, "grad_norm": 0.4715994894504547, "learning_rate": 5.900407716182418e-06, "loss": 0.0023, "step": 3395 }, { "epoch": 6.46, "grad_norm": 0.1998923271894455, "learning_rate": 5.894792149649069e-06, "loss": 0.0017, "step": 3396 }, { "epoch": 6.46, "grad_norm": 1.037575602531433, "learning_rate": 5.889178139633102e-06, "loss": 0.0052, "step": 3397 }, { "epoch": 6.46, "grad_norm": 0.3515752851963043, "learning_rate": 5.8835656882630985e-06, "loss": 0.0026, "step": 3398 }, { "epoch": 6.46, "grad_norm": 0.6171612739562988, "learning_rate": 5.877954797667063e-06, "loss": 0.0058, "step": 3399 }, { "epoch": 6.46, "grad_norm": 0.3798573613166809, "learning_rate": 5.872345469972405e-06, "loss": 0.001, "step": 3400 }, { "epoch": 6.46, "eval_blimp_filtered_avg": 0.7316417910447761, "eval_blimp_filtered_std": 0.004868370723637538, "step": 3400 }, { "epoch": 6.46, "eval_blimp_supplement_avg": 0.7952586206896551, "eval_blimp_supplement_std": 0.017488657018070182, "step": 3400 }, { "epoch": 6.46, "eval_vqa_filtered_avg": 0.35, "eval_vqa_filtered_std": 0.047937248544110196, "step": 3400 }, { "epoch": 6.46, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 3400 }, { "epoch": 6.47, "grad_norm": 0.41391393542289734, "learning_rate": 5.866737707305935e-06, "loss": 0.0016, "step": 3401 }, { "epoch": 6.47, "grad_norm": 0.44936954975128174, "learning_rate": 5.861131511793872e-06, "loss": 0.0037, "step": 3402 }, { "epoch": 6.47, "grad_norm": 0.14085574448108673, "learning_rate": 5.855526885561851e-06, "loss": 0.001, "step": 3403 }, { "epoch": 6.47, "grad_norm": 0.4373472034931183, "learning_rate": 5.849923830734895e-06, "loss": 0.0039, "step": 3404 }, { "epoch": 6.47, "grad_norm": 0.2520005702972412, "learning_rate": 5.844322349437443e-06, "loss": 0.0014, "step": 3405 }, { "epoch": 6.48, "grad_norm": 0.6406676173210144, "learning_rate": 5.838722443793342e-06, "loss": 0.0029, "step": 3406 }, { "epoch": 6.48, "grad_norm": 1.1713651418685913, "learning_rate": 5.833124115925825e-06, "loss": 0.0048, "step": 3407 }, { "epoch": 6.48, "grad_norm": 0.36383894085884094, "learning_rate": 5.827527367957536e-06, "loss": 0.0032, "step": 3408 }, { "epoch": 6.48, "grad_norm": 1.6856151819229126, "learning_rate": 5.821932202010531e-06, "loss": 0.0098, "step": 3409 }, { "epoch": 6.48, "grad_norm": 0.3480919599533081, "learning_rate": 5.8163386202062386e-06, "loss": 0.0029, "step": 3410 }, { "epoch": 6.48, "grad_norm": 0.3940490484237671, "learning_rate": 5.8107466246655196e-06, "loss": 0.0023, "step": 3411 }, { "epoch": 6.49, "grad_norm": 0.38717180490493774, "learning_rate": 5.805156217508601e-06, "loss": 0.0064, "step": 3412 }, { "epoch": 6.49, "grad_norm": 0.3407024145126343, "learning_rate": 5.799567400855136e-06, "loss": 0.0008, "step": 3413 }, { "epoch": 6.49, "grad_norm": 0.4036506712436676, "learning_rate": 5.793980176824159e-06, "loss": 0.0018, "step": 3414 }, { "epoch": 6.49, "grad_norm": 0.1893645077943802, "learning_rate": 5.788394547534101e-06, "loss": 0.0014, "step": 3415 }, { "epoch": 6.49, "grad_norm": 0.6057380437850952, "learning_rate": 5.78281051510279e-06, "loss": 0.0019, "step": 3416 }, { "epoch": 6.5, "grad_norm": 0.40562647581100464, "learning_rate": 5.777228081647461e-06, "loss": 0.0048, "step": 3417 }, { "epoch": 6.5, "grad_norm": 0.5295743942260742, "learning_rate": 5.7716472492847154e-06, "loss": 0.0017, "step": 3418 }, { "epoch": 6.5, "grad_norm": 0.8024502992630005, "learning_rate": 5.766068020130575e-06, "loss": 0.0044, "step": 3419 }, { "epoch": 6.5, "grad_norm": 0.30689483880996704, "learning_rate": 5.760490396300443e-06, "loss": 0.0015, "step": 3420 }, { "epoch": 6.5, "grad_norm": 0.9260954260826111, "learning_rate": 5.754914379909102e-06, "loss": 0.0029, "step": 3421 }, { "epoch": 6.51, "grad_norm": 0.6365328431129456, "learning_rate": 5.749339973070747e-06, "loss": 0.0081, "step": 3422 }, { "epoch": 6.51, "grad_norm": 0.47775524854660034, "learning_rate": 5.743767177898948e-06, "loss": 0.0047, "step": 3423 }, { "epoch": 6.51, "grad_norm": 0.20689156651496887, "learning_rate": 5.73819599650667e-06, "loss": 0.0025, "step": 3424 }, { "epoch": 6.51, "grad_norm": 0.9933105111122131, "learning_rate": 5.7326264310062585e-06, "loss": 0.0058, "step": 3425 }, { "epoch": 6.51, "grad_norm": 0.32028913497924805, "learning_rate": 5.727058483509463e-06, "loss": 0.0042, "step": 3426 }, { "epoch": 6.52, "grad_norm": 0.40589672327041626, "learning_rate": 5.721492156127395e-06, "loss": 0.0035, "step": 3427 }, { "epoch": 6.52, "grad_norm": 0.7774873375892639, "learning_rate": 5.715927450970578e-06, "loss": 0.0033, "step": 3428 }, { "epoch": 6.52, "grad_norm": 0.8757108449935913, "learning_rate": 5.7103643701489e-06, "loss": 0.0052, "step": 3429 }, { "epoch": 6.52, "grad_norm": 0.9874801635742188, "learning_rate": 5.7048029157716424e-06, "loss": 0.0069, "step": 3430 }, { "epoch": 6.52, "grad_norm": 0.6790274977684021, "learning_rate": 5.6992430899474685e-06, "loss": 0.0049, "step": 3431 }, { "epoch": 6.52, "grad_norm": 0.9625439047813416, "learning_rate": 5.693684894784424e-06, "loss": 0.004, "step": 3432 }, { "epoch": 6.53, "grad_norm": 0.7841812372207642, "learning_rate": 5.688128332389937e-06, "loss": 0.008, "step": 3433 }, { "epoch": 6.53, "grad_norm": 0.16739685833454132, "learning_rate": 5.682573404870816e-06, "loss": 0.0013, "step": 3434 }, { "epoch": 6.53, "grad_norm": 0.41867807507514954, "learning_rate": 5.677020114333247e-06, "loss": 0.0028, "step": 3435 }, { "epoch": 6.53, "grad_norm": 0.18072399497032166, "learning_rate": 5.671468462882796e-06, "loss": 0.0013, "step": 3436 }, { "epoch": 6.53, "grad_norm": 0.6599328517913818, "learning_rate": 5.665918452624421e-06, "loss": 0.008, "step": 3437 }, { "epoch": 6.54, "grad_norm": 0.8140948414802551, "learning_rate": 5.660370085662428e-06, "loss": 0.0023, "step": 3438 }, { "epoch": 6.54, "grad_norm": 0.8469983339309692, "learning_rate": 5.654823364100533e-06, "loss": 0.007, "step": 3439 }, { "epoch": 6.54, "grad_norm": 0.12014531344175339, "learning_rate": 5.649278290041806e-06, "loss": 0.0008, "step": 3440 }, { "epoch": 6.54, "grad_norm": 0.41965726017951965, "learning_rate": 5.643734865588701e-06, "loss": 0.003, "step": 3441 }, { "epoch": 6.54, "grad_norm": 0.24263420701026917, "learning_rate": 5.638193092843047e-06, "loss": 0.0018, "step": 3442 }, { "epoch": 6.55, "grad_norm": 0.42715707421302795, "learning_rate": 5.632652973906041e-06, "loss": 0.0014, "step": 3443 }, { "epoch": 6.55, "grad_norm": 0.7695058584213257, "learning_rate": 5.627114510878257e-06, "loss": 0.0056, "step": 3444 }, { "epoch": 6.55, "grad_norm": 0.3324934244155884, "learning_rate": 5.621577705859641e-06, "loss": 0.0017, "step": 3445 }, { "epoch": 6.55, "grad_norm": 1.572588324546814, "learning_rate": 5.6160425609495175e-06, "loss": 0.009, "step": 3446 }, { "epoch": 6.55, "grad_norm": 0.6794803142547607, "learning_rate": 5.61050907824656e-06, "loss": 0.0028, "step": 3447 }, { "epoch": 6.56, "grad_norm": 0.11694911122322083, "learning_rate": 5.604977259848838e-06, "loss": 0.0009, "step": 3448 }, { "epoch": 6.56, "grad_norm": 0.8422719240188599, "learning_rate": 5.599447107853774e-06, "loss": 0.0038, "step": 3449 }, { "epoch": 6.56, "grad_norm": 0.9016570448875427, "learning_rate": 5.593918624358161e-06, "loss": 0.008, "step": 3450 }, { "epoch": 6.56, "grad_norm": 0.7822037935256958, "learning_rate": 5.588391811458163e-06, "loss": 0.0065, "step": 3451 }, { "epoch": 6.56, "grad_norm": 0.44994309544563293, "learning_rate": 5.582866671249309e-06, "loss": 0.0018, "step": 3452 }, { "epoch": 6.56, "grad_norm": 0.8132146596908569, "learning_rate": 5.577343205826492e-06, "loss": 0.0035, "step": 3453 }, { "epoch": 6.57, "grad_norm": 0.68254554271698, "learning_rate": 5.571821417283967e-06, "loss": 0.0027, "step": 3454 }, { "epoch": 6.57, "grad_norm": 0.35920828580856323, "learning_rate": 5.56630130771537e-06, "loss": 0.0021, "step": 3455 }, { "epoch": 6.57, "grad_norm": 0.1621566265821457, "learning_rate": 5.560782879213673e-06, "loss": 0.0008, "step": 3456 }, { "epoch": 6.57, "grad_norm": 0.7344982624053955, "learning_rate": 5.555266133871235e-06, "loss": 0.0034, "step": 3457 }, { "epoch": 6.57, "grad_norm": 0.5861952900886536, "learning_rate": 5.549751073779768e-06, "loss": 0.0111, "step": 3458 }, { "epoch": 6.58, "grad_norm": 0.5601547956466675, "learning_rate": 5.5442377010303395e-06, "loss": 0.0039, "step": 3459 }, { "epoch": 6.58, "grad_norm": 0.32663196325302124, "learning_rate": 5.538726017713385e-06, "loss": 0.0027, "step": 3460 }, { "epoch": 6.58, "grad_norm": 0.40054914355278015, "learning_rate": 5.533216025918695e-06, "loss": 0.0028, "step": 3461 }, { "epoch": 6.58, "grad_norm": 0.4717665910720825, "learning_rate": 5.527707727735416e-06, "loss": 0.0024, "step": 3462 }, { "epoch": 6.58, "grad_norm": 1.9651135206222534, "learning_rate": 5.522201125252071e-06, "loss": 0.0042, "step": 3463 }, { "epoch": 6.59, "grad_norm": 0.18855077028274536, "learning_rate": 5.516696220556508e-06, "loss": 0.0012, "step": 3464 }, { "epoch": 6.59, "grad_norm": 1.04792058467865, "learning_rate": 5.511193015735962e-06, "loss": 0.013, "step": 3465 }, { "epoch": 6.59, "grad_norm": 0.4971976578235626, "learning_rate": 5.505691512877007e-06, "loss": 0.0028, "step": 3466 }, { "epoch": 6.59, "grad_norm": 0.7409549951553345, "learning_rate": 5.500191714065568e-06, "loss": 0.0025, "step": 3467 }, { "epoch": 6.59, "grad_norm": 0.9793545603752136, "learning_rate": 5.49469362138694e-06, "loss": 0.0036, "step": 3468 }, { "epoch": 6.6, "grad_norm": 0.4475288689136505, "learning_rate": 5.489197236925758e-06, "loss": 0.0025, "step": 3469 }, { "epoch": 6.6, "grad_norm": 0.5577232241630554, "learning_rate": 5.483702562766015e-06, "loss": 0.0017, "step": 3470 }, { "epoch": 6.6, "grad_norm": 0.07369358837604523, "learning_rate": 5.478209600991049e-06, "loss": 0.0006, "step": 3471 }, { "epoch": 6.6, "grad_norm": 0.655319094657898, "learning_rate": 5.472718353683565e-06, "loss": 0.0087, "step": 3472 }, { "epoch": 6.6, "grad_norm": 0.1856210082769394, "learning_rate": 5.467228822925592e-06, "loss": 0.0012, "step": 3473 }, { "epoch": 6.6, "grad_norm": 0.07349150627851486, "learning_rate": 5.461741010798534e-06, "loss": 0.0007, "step": 3474 }, { "epoch": 6.61, "grad_norm": 0.19637830555438995, "learning_rate": 5.4562549193831285e-06, "loss": 0.0014, "step": 3475 }, { "epoch": 6.61, "grad_norm": 0.23885001242160797, "learning_rate": 5.450770550759463e-06, "loss": 0.0016, "step": 3476 }, { "epoch": 6.61, "grad_norm": 0.5570257902145386, "learning_rate": 5.445287907006975e-06, "loss": 0.0061, "step": 3477 }, { "epoch": 6.61, "grad_norm": 0.24061700701713562, "learning_rate": 5.439806990204446e-06, "loss": 0.0027, "step": 3478 }, { "epoch": 6.61, "grad_norm": 0.21822181344032288, "learning_rate": 5.434327802430002e-06, "loss": 0.0017, "step": 3479 }, { "epoch": 6.62, "grad_norm": 0.4799270033836365, "learning_rate": 5.428850345761108e-06, "loss": 0.0027, "step": 3480 }, { "epoch": 6.62, "grad_norm": 0.5836166739463806, "learning_rate": 5.423374622274595e-06, "loss": 0.0027, "step": 3481 }, { "epoch": 6.62, "grad_norm": 0.4737652838230133, "learning_rate": 5.4179006340466e-06, "loss": 0.0037, "step": 3482 }, { "epoch": 6.62, "grad_norm": 0.3462498188018799, "learning_rate": 5.412428383152644e-06, "loss": 0.0014, "step": 3483 }, { "epoch": 6.62, "grad_norm": 0.28104132413864136, "learning_rate": 5.406957871667549e-06, "loss": 0.0025, "step": 3484 }, { "epoch": 6.63, "grad_norm": 0.8405001163482666, "learning_rate": 5.401489101665507e-06, "loss": 0.0071, "step": 3485 }, { "epoch": 6.63, "grad_norm": 0.47153568267822266, "learning_rate": 5.396022075220037e-06, "loss": 0.0013, "step": 3486 }, { "epoch": 6.63, "grad_norm": 0.7726248502731323, "learning_rate": 5.390556794404e-06, "loss": 0.0027, "step": 3487 }, { "epoch": 6.63, "grad_norm": 0.40875256061553955, "learning_rate": 5.385093261289594e-06, "loss": 0.0009, "step": 3488 }, { "epoch": 6.63, "grad_norm": 0.6353889107704163, "learning_rate": 5.379631477948355e-06, "loss": 0.0042, "step": 3489 }, { "epoch": 6.63, "grad_norm": 0.2615882158279419, "learning_rate": 5.374171446451153e-06, "loss": 0.0012, "step": 3490 }, { "epoch": 6.64, "grad_norm": 0.22072675824165344, "learning_rate": 5.368713168868191e-06, "loss": 0.0015, "step": 3491 }, { "epoch": 6.64, "grad_norm": 0.5836942195892334, "learning_rate": 5.363256647269029e-06, "loss": 0.0021, "step": 3492 }, { "epoch": 6.64, "grad_norm": 0.29403653740882874, "learning_rate": 5.357801883722524e-06, "loss": 0.0015, "step": 3493 }, { "epoch": 6.64, "grad_norm": 0.5918364524841309, "learning_rate": 5.3523488802969e-06, "loss": 0.004, "step": 3494 }, { "epoch": 6.64, "grad_norm": 0.268078476190567, "learning_rate": 5.346897639059696e-06, "loss": 0.0013, "step": 3495 }, { "epoch": 6.65, "grad_norm": 0.2991231679916382, "learning_rate": 5.341448162077787e-06, "loss": 0.003, "step": 3496 }, { "epoch": 6.65, "grad_norm": 0.1856318861246109, "learning_rate": 5.33600045141738e-06, "loss": 0.0009, "step": 3497 }, { "epoch": 6.65, "grad_norm": 0.60875004529953, "learning_rate": 5.33055450914401e-06, "loss": 0.0089, "step": 3498 }, { "epoch": 6.65, "grad_norm": 0.7296956181526184, "learning_rate": 5.325110337322543e-06, "loss": 0.0081, "step": 3499 }, { "epoch": 6.65, "grad_norm": 0.7473583817481995, "learning_rate": 5.31966793801717e-06, "loss": 0.0075, "step": 3500 }, { "epoch": 6.65, "eval_blimp_filtered_avg": 0.7383582089552239, "eval_blimp_filtered_std": 0.004833697672449886, "step": 3500 }, { "epoch": 6.65, "eval_blimp_supplement_avg": 0.7974137931034483, "eval_blimp_supplement_std": 0.017506099557336195, "step": 3500 }, { "epoch": 6.65, "eval_vqa_filtered_avg": 0.36, "eval_vqa_filtered_std": 0.048241815132442176, "step": 3500 }, { "epoch": 6.65, "eval_winoground_filtered_avg": 0.52, "eval_winoground_filtered_std": 0.05021167315686779, "step": 3500 }, { "epoch": 6.66, "grad_norm": 0.128826305270195, "learning_rate": 5.314227313291427e-06, "loss": 0.0014, "step": 3501 }, { "epoch": 6.66, "grad_norm": 0.8060730695724487, "learning_rate": 5.3087884652081455e-06, "loss": 0.0045, "step": 3502 }, { "epoch": 6.66, "grad_norm": 0.8632144331932068, "learning_rate": 5.3033513958295145e-06, "loss": 0.0014, "step": 3503 }, { "epoch": 6.66, "grad_norm": 1.7806357145309448, "learning_rate": 5.2979161072170325e-06, "loss": 0.0033, "step": 3504 }, { "epoch": 6.66, "grad_norm": 1.0718048810958862, "learning_rate": 5.292482601431525e-06, "loss": 0.008, "step": 3505 }, { "epoch": 6.67, "grad_norm": 0.628686249256134, "learning_rate": 5.287050880533138e-06, "loss": 0.0158, "step": 3506 }, { "epoch": 6.67, "grad_norm": 0.4369146227836609, "learning_rate": 5.281620946581359e-06, "loss": 0.0011, "step": 3507 }, { "epoch": 6.67, "grad_norm": 1.315252423286438, "learning_rate": 5.276192801634967e-06, "loss": 0.0073, "step": 3508 }, { "epoch": 6.67, "grad_norm": 0.6145322322845459, "learning_rate": 5.270766447752097e-06, "loss": 0.0033, "step": 3509 }, { "epoch": 6.67, "grad_norm": 0.9151961207389832, "learning_rate": 5.265341886990172e-06, "loss": 0.0058, "step": 3510 }, { "epoch": 6.67, "grad_norm": 0.784995436668396, "learning_rate": 5.2599191214059605e-06, "loss": 0.0076, "step": 3511 }, { "epoch": 6.68, "grad_norm": 0.327193945646286, "learning_rate": 5.254498153055543e-06, "loss": 0.0018, "step": 3512 }, { "epoch": 6.68, "grad_norm": 0.8319517374038696, "learning_rate": 5.249078983994303e-06, "loss": 0.0056, "step": 3513 }, { "epoch": 6.68, "grad_norm": 0.09264012426137924, "learning_rate": 5.24366161627697e-06, "loss": 0.0006, "step": 3514 }, { "epoch": 6.68, "grad_norm": 0.9917687773704529, "learning_rate": 5.238246051957567e-06, "loss": 0.0033, "step": 3515 }, { "epoch": 6.68, "grad_norm": 0.9264445900917053, "learning_rate": 5.232832293089447e-06, "loss": 0.0013, "step": 3516 }, { "epoch": 6.69, "grad_norm": 0.5838993191719055, "learning_rate": 5.227420341725267e-06, "loss": 0.0026, "step": 3517 }, { "epoch": 6.69, "grad_norm": 0.8428764343261719, "learning_rate": 5.222010199917016e-06, "loss": 0.0039, "step": 3518 }, { "epoch": 6.69, "grad_norm": 0.2024022936820984, "learning_rate": 5.216601869715972e-06, "loss": 0.0017, "step": 3519 }, { "epoch": 6.69, "grad_norm": 0.13156376779079437, "learning_rate": 5.211195353172749e-06, "loss": 0.001, "step": 3520 }, { "epoch": 6.69, "grad_norm": 0.9080775380134583, "learning_rate": 5.205790652337264e-06, "loss": 0.0037, "step": 3521 }, { "epoch": 6.7, "grad_norm": 0.14403286576271057, "learning_rate": 5.200387769258743e-06, "loss": 0.0014, "step": 3522 }, { "epoch": 6.7, "grad_norm": 0.4609922468662262, "learning_rate": 5.194986705985729e-06, "loss": 0.0046, "step": 3523 }, { "epoch": 6.7, "grad_norm": 1.222154140472412, "learning_rate": 5.189587464566069e-06, "loss": 0.0159, "step": 3524 }, { "epoch": 6.7, "grad_norm": 0.5406507253646851, "learning_rate": 5.184190047046923e-06, "loss": 0.0053, "step": 3525 }, { "epoch": 6.7, "grad_norm": 0.5590861439704895, "learning_rate": 5.178794455474754e-06, "loss": 0.0101, "step": 3526 }, { "epoch": 6.71, "grad_norm": 0.5486939549446106, "learning_rate": 5.17340069189535e-06, "loss": 0.0026, "step": 3527 }, { "epoch": 6.71, "grad_norm": 0.30741214752197266, "learning_rate": 5.168008758353775e-06, "loss": 0.0036, "step": 3528 }, { "epoch": 6.71, "grad_norm": 1.0302734375, "learning_rate": 5.1626186568944315e-06, "loss": 0.0025, "step": 3529 }, { "epoch": 6.71, "grad_norm": 0.25865116715431213, "learning_rate": 5.157230389561009e-06, "loss": 0.0018, "step": 3530 }, { "epoch": 6.71, "grad_norm": 0.369213730096817, "learning_rate": 5.151843958396503e-06, "loss": 0.0042, "step": 3531 }, { "epoch": 6.71, "grad_norm": 0.16518832743167877, "learning_rate": 5.146459365443217e-06, "loss": 0.0025, "step": 3532 }, { "epoch": 6.72, "grad_norm": 0.6767488121986389, "learning_rate": 5.141076612742757e-06, "loss": 0.0054, "step": 3533 }, { "epoch": 6.72, "grad_norm": 0.3323570787906647, "learning_rate": 5.135695702336029e-06, "loss": 0.0011, "step": 3534 }, { "epoch": 6.72, "grad_norm": 0.23062464594841003, "learning_rate": 5.1303166362632405e-06, "loss": 0.0016, "step": 3535 }, { "epoch": 6.72, "grad_norm": 0.23400625586509705, "learning_rate": 5.124939416563903e-06, "loss": 0.0018, "step": 3536 }, { "epoch": 6.72, "grad_norm": 0.38591739535331726, "learning_rate": 5.119564045276821e-06, "loss": 0.0009, "step": 3537 }, { "epoch": 6.73, "grad_norm": 0.29813337326049805, "learning_rate": 5.114190524440115e-06, "loss": 0.0015, "step": 3538 }, { "epoch": 6.73, "grad_norm": 1.2504971027374268, "learning_rate": 5.108818856091174e-06, "loss": 0.0152, "step": 3539 }, { "epoch": 6.73, "grad_norm": 0.8978071212768555, "learning_rate": 5.103449042266717e-06, "loss": 0.0058, "step": 3540 }, { "epoch": 6.73, "grad_norm": 0.501835823059082, "learning_rate": 5.09808108500274e-06, "loss": 0.0047, "step": 3541 }, { "epoch": 6.73, "grad_norm": 0.6442597508430481, "learning_rate": 5.092714986334542e-06, "loss": 0.0085, "step": 3542 }, { "epoch": 6.74, "grad_norm": 0.4577977657318115, "learning_rate": 5.087350748296714e-06, "loss": 0.0059, "step": 3543 }, { "epoch": 6.74, "grad_norm": 0.2831842601299286, "learning_rate": 5.0819883729231435e-06, "loss": 0.0018, "step": 3544 }, { "epoch": 6.74, "grad_norm": 0.5468710660934448, "learning_rate": 5.076627862247012e-06, "loss": 0.0056, "step": 3545 }, { "epoch": 6.74, "grad_norm": 0.6444910764694214, "learning_rate": 5.07126921830079e-06, "loss": 0.0042, "step": 3546 }, { "epoch": 6.74, "grad_norm": 0.6378539204597473, "learning_rate": 5.065912443116252e-06, "loss": 0.0093, "step": 3547 }, { "epoch": 6.75, "grad_norm": 0.5451170802116394, "learning_rate": 5.060557538724445e-06, "loss": 0.005, "step": 3548 }, { "epoch": 6.75, "grad_norm": 1.1284947395324707, "learning_rate": 5.055204507155724e-06, "loss": 0.0036, "step": 3549 }, { "epoch": 6.75, "grad_norm": 0.1586458534002304, "learning_rate": 5.049853350439729e-06, "loss": 0.0013, "step": 3550 }, { "epoch": 6.75, "grad_norm": 0.5270030498504639, "learning_rate": 5.0445040706053815e-06, "loss": 0.0104, "step": 3551 }, { "epoch": 6.75, "grad_norm": 0.6099430322647095, "learning_rate": 5.039156669680898e-06, "loss": 0.0054, "step": 3552 }, { "epoch": 6.75, "grad_norm": 0.2296055257320404, "learning_rate": 5.03381114969379e-06, "loss": 0.001, "step": 3553 }, { "epoch": 6.76, "grad_norm": 0.6718198657035828, "learning_rate": 5.028467512670834e-06, "loss": 0.0026, "step": 3554 }, { "epoch": 6.76, "grad_norm": 1.5653936862945557, "learning_rate": 5.023125760638118e-06, "loss": 0.0039, "step": 3555 }, { "epoch": 6.76, "grad_norm": 0.8467821478843689, "learning_rate": 5.017785895620999e-06, "loss": 0.0091, "step": 3556 }, { "epoch": 6.76, "grad_norm": 0.5565018057823181, "learning_rate": 5.012447919644122e-06, "loss": 0.0035, "step": 3557 }, { "epoch": 6.76, "grad_norm": 0.16149671375751495, "learning_rate": 5.007111834731422e-06, "loss": 0.002, "step": 3558 }, { "epoch": 6.77, "grad_norm": 0.7119688391685486, "learning_rate": 5.0017776429061e-06, "loss": 0.0032, "step": 3559 }, { "epoch": 6.77, "grad_norm": 0.34135377407073975, "learning_rate": 4.996445346190663e-06, "loss": 0.0017, "step": 3560 }, { "epoch": 6.77, "grad_norm": 0.2114720344543457, "learning_rate": 4.991114946606882e-06, "loss": 0.0009, "step": 3561 }, { "epoch": 6.77, "grad_norm": 0.6286988258361816, "learning_rate": 4.985786446175815e-06, "loss": 0.0018, "step": 3562 }, { "epoch": 6.77, "grad_norm": 0.3664062023162842, "learning_rate": 4.980459846917797e-06, "loss": 0.0025, "step": 3563 }, { "epoch": 6.78, "grad_norm": 0.23826822638511658, "learning_rate": 4.975135150852453e-06, "loss": 0.0038, "step": 3564 }, { "epoch": 6.78, "grad_norm": 0.23236463963985443, "learning_rate": 4.969812359998663e-06, "loss": 0.0013, "step": 3565 }, { "epoch": 6.78, "grad_norm": 0.27299898862838745, "learning_rate": 4.964491476374611e-06, "loss": 0.0019, "step": 3566 }, { "epoch": 6.78, "grad_norm": 0.17046059668064117, "learning_rate": 4.959172501997742e-06, "loss": 0.0011, "step": 3567 }, { "epoch": 6.78, "grad_norm": 0.8192083835601807, "learning_rate": 4.953855438884782e-06, "loss": 0.0038, "step": 3568 }, { "epoch": 6.79, "grad_norm": 0.510681688785553, "learning_rate": 4.948540289051731e-06, "loss": 0.0041, "step": 3569 }, { "epoch": 6.79, "grad_norm": 0.2910626530647278, "learning_rate": 4.943227054513864e-06, "loss": 0.0014, "step": 3570 }, { "epoch": 6.79, "grad_norm": 0.35768285393714905, "learning_rate": 4.93791573728573e-06, "loss": 0.0021, "step": 3571 }, { "epoch": 6.79, "grad_norm": 0.848209023475647, "learning_rate": 4.9326063393811464e-06, "loss": 0.0038, "step": 3572 }, { "epoch": 6.79, "grad_norm": 0.7098094820976257, "learning_rate": 4.92729886281322e-06, "loss": 0.0085, "step": 3573 }, { "epoch": 6.79, "grad_norm": 0.5963584184646606, "learning_rate": 4.921993309594301e-06, "loss": 0.0021, "step": 3574 }, { "epoch": 6.8, "grad_norm": 0.24625514447689056, "learning_rate": 4.916689681736037e-06, "loss": 0.002, "step": 3575 }, { "epoch": 6.8, "grad_norm": 0.20959150791168213, "learning_rate": 4.911387981249329e-06, "loss": 0.0017, "step": 3576 }, { "epoch": 6.8, "grad_norm": 0.6324885487556458, "learning_rate": 4.906088210144356e-06, "loss": 0.002, "step": 3577 }, { "epoch": 6.8, "grad_norm": 0.6278966665267944, "learning_rate": 4.9007903704305606e-06, "loss": 0.0031, "step": 3578 }, { "epoch": 6.8, "grad_norm": 1.3814053535461426, "learning_rate": 4.895494464116654e-06, "loss": 0.006, "step": 3579 }, { "epoch": 6.81, "grad_norm": 0.336679607629776, "learning_rate": 4.890200493210615e-06, "loss": 0.0013, "step": 3580 }, { "epoch": 6.81, "grad_norm": 0.5460498929023743, "learning_rate": 4.884908459719689e-06, "loss": 0.0018, "step": 3581 }, { "epoch": 6.81, "grad_norm": 0.7915191650390625, "learning_rate": 4.879618365650387e-06, "loss": 0.0124, "step": 3582 }, { "epoch": 6.81, "grad_norm": 0.6529029011726379, "learning_rate": 4.87433021300848e-06, "loss": 0.0089, "step": 3583 }, { "epoch": 6.81, "grad_norm": 0.30176088213920593, "learning_rate": 4.869044003799017e-06, "loss": 0.0024, "step": 3584 }, { "epoch": 6.82, "grad_norm": 0.9183850884437561, "learning_rate": 4.863759740026286e-06, "loss": 0.009, "step": 3585 }, { "epoch": 6.82, "grad_norm": 0.2986622452735901, "learning_rate": 4.858477423693862e-06, "loss": 0.0032, "step": 3586 }, { "epoch": 6.82, "grad_norm": 0.2767307758331299, "learning_rate": 4.853197056804569e-06, "loss": 0.0022, "step": 3587 }, { "epoch": 6.82, "grad_norm": 0.31558677554130554, "learning_rate": 4.847918641360493e-06, "loss": 0.0011, "step": 3588 }, { "epoch": 6.82, "grad_norm": 0.47529637813568115, "learning_rate": 4.84264217936298e-06, "loss": 0.0033, "step": 3589 }, { "epoch": 6.83, "grad_norm": 0.8398638367652893, "learning_rate": 4.837367672812636e-06, "loss": 0.0042, "step": 3590 }, { "epoch": 6.83, "grad_norm": 0.4819674491882324, "learning_rate": 4.832095123709328e-06, "loss": 0.0081, "step": 3591 }, { "epoch": 6.83, "grad_norm": 0.15420185029506683, "learning_rate": 4.826824534052175e-06, "loss": 0.001, "step": 3592 }, { "epoch": 6.83, "grad_norm": 0.3855501115322113, "learning_rate": 4.821555905839565e-06, "loss": 0.0026, "step": 3593 }, { "epoch": 6.83, "grad_norm": 1.9764554500579834, "learning_rate": 4.816289241069122e-06, "loss": 0.003, "step": 3594 }, { "epoch": 6.83, "grad_norm": 0.6105419397354126, "learning_rate": 4.811024541737748e-06, "loss": 0.0042, "step": 3595 }, { "epoch": 6.84, "grad_norm": 0.47905489802360535, "learning_rate": 4.8057618098415845e-06, "loss": 0.0043, "step": 3596 }, { "epoch": 6.84, "grad_norm": 0.07491833716630936, "learning_rate": 4.800501047376034e-06, "loss": 0.0005, "step": 3597 }, { "epoch": 6.84, "grad_norm": 0.7863554358482361, "learning_rate": 4.795242256335745e-06, "loss": 0.003, "step": 3598 }, { "epoch": 6.84, "grad_norm": 0.7002213001251221, "learning_rate": 4.789985438714636e-06, "loss": 0.0053, "step": 3599 }, { "epoch": 6.84, "grad_norm": 0.4037454426288605, "learning_rate": 4.78473059650585e-06, "loss": 0.0021, "step": 3600 }, { "epoch": 6.84, "eval_blimp_filtered_avg": 0.7374626865671642, "eval_blimp_filtered_std": 0.004830391369674824, "step": 3600 }, { "epoch": 6.84, "eval_blimp_supplement_avg": 0.7887931034482759, "eval_blimp_supplement_std": 0.017746924468045863, "step": 3600 }, { "epoch": 6.84, "eval_vqa_filtered_avg": 0.34, "eval_vqa_filtered_std": 0.04760952285695235, "step": 3600 }, { "epoch": 6.84, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 3600 }, { "epoch": 6.85, "grad_norm": 0.8674353361129761, "learning_rate": 4.779477731701806e-06, "loss": 0.0082, "step": 3601 }, { "epoch": 6.85, "grad_norm": 0.6031800508499146, "learning_rate": 4.774226846294161e-06, "loss": 0.0034, "step": 3602 }, { "epoch": 6.85, "grad_norm": 0.3821650743484497, "learning_rate": 4.768977942273822e-06, "loss": 0.0032, "step": 3603 }, { "epoch": 6.85, "grad_norm": 0.5053271651268005, "learning_rate": 4.763731021630949e-06, "loss": 0.0036, "step": 3604 }, { "epoch": 6.85, "grad_norm": 0.3993751108646393, "learning_rate": 4.758486086354947e-06, "loss": 0.0021, "step": 3605 }, { "epoch": 6.86, "grad_norm": 0.5724478960037231, "learning_rate": 4.753243138434467e-06, "loss": 0.0026, "step": 3606 }, { "epoch": 6.86, "grad_norm": 0.5090261101722717, "learning_rate": 4.74800217985741e-06, "loss": 0.0062, "step": 3607 }, { "epoch": 6.86, "grad_norm": 0.46673619747161865, "learning_rate": 4.742763212610919e-06, "loss": 0.0008, "step": 3608 }, { "epoch": 6.86, "grad_norm": 0.1903819441795349, "learning_rate": 4.737526238681382e-06, "loss": 0.0017, "step": 3609 }, { "epoch": 6.86, "grad_norm": 0.197694331407547, "learning_rate": 4.7322912600544435e-06, "loss": 0.0012, "step": 3610 }, { "epoch": 6.87, "grad_norm": 0.32595294713974, "learning_rate": 4.727058278714967e-06, "loss": 0.0018, "step": 3611 }, { "epoch": 6.87, "grad_norm": 1.1722469329833984, "learning_rate": 4.721827296647083e-06, "loss": 0.0113, "step": 3612 }, { "epoch": 6.87, "grad_norm": 0.22629708051681519, "learning_rate": 4.716598315834151e-06, "loss": 0.0022, "step": 3613 }, { "epoch": 6.87, "grad_norm": 0.39744240045547485, "learning_rate": 4.711371338258775e-06, "loss": 0.0017, "step": 3614 }, { "epoch": 6.87, "grad_norm": 0.926997184753418, "learning_rate": 4.706146365902796e-06, "loss": 0.0027, "step": 3615 }, { "epoch": 6.87, "grad_norm": 0.3272531032562256, "learning_rate": 4.700923400747301e-06, "loss": 0.003, "step": 3616 }, { "epoch": 6.88, "grad_norm": 1.4005323648452759, "learning_rate": 4.695702444772611e-06, "loss": 0.005, "step": 3617 }, { "epoch": 6.88, "grad_norm": 2.351025104522705, "learning_rate": 4.690483499958284e-06, "loss": 0.0128, "step": 3618 }, { "epoch": 6.88, "grad_norm": 0.7363427877426147, "learning_rate": 4.685266568283129e-06, "loss": 0.0079, "step": 3619 }, { "epoch": 6.88, "grad_norm": 0.2787788212299347, "learning_rate": 4.680051651725165e-06, "loss": 0.0021, "step": 3620 }, { "epoch": 6.88, "grad_norm": 0.5605729818344116, "learning_rate": 4.674838752261675e-06, "loss": 0.0035, "step": 3621 }, { "epoch": 6.89, "grad_norm": 0.25320619344711304, "learning_rate": 4.6696278718691635e-06, "loss": 0.0043, "step": 3622 }, { "epoch": 6.89, "grad_norm": 0.45076727867126465, "learning_rate": 4.664419012523368e-06, "loss": 0.0036, "step": 3623 }, { "epoch": 6.89, "grad_norm": 0.770287275314331, "learning_rate": 4.659212176199265e-06, "loss": 0.0034, "step": 3624 }, { "epoch": 6.89, "grad_norm": 0.635648787021637, "learning_rate": 4.654007364871061e-06, "loss": 0.0048, "step": 3625 }, { "epoch": 6.89, "grad_norm": 1.8393456935882568, "learning_rate": 4.648804580512194e-06, "loss": 0.0175, "step": 3626 }, { "epoch": 6.9, "grad_norm": 0.6799167394638062, "learning_rate": 4.643603825095333e-06, "loss": 0.0055, "step": 3627 }, { "epoch": 6.9, "grad_norm": 0.9919283986091614, "learning_rate": 4.63840510059239e-06, "loss": 0.0084, "step": 3628 }, { "epoch": 6.9, "grad_norm": 1.0769554376602173, "learning_rate": 4.633208408974483e-06, "loss": 0.0091, "step": 3629 }, { "epoch": 6.9, "grad_norm": 0.17432598769664764, "learning_rate": 4.628013752211987e-06, "loss": 0.0009, "step": 3630 }, { "epoch": 6.9, "grad_norm": 0.41857078671455383, "learning_rate": 4.622821132274475e-06, "loss": 0.003, "step": 3631 }, { "epoch": 6.9, "grad_norm": 0.2808647155761719, "learning_rate": 4.6176305511307785e-06, "loss": 0.0015, "step": 3632 }, { "epoch": 6.91, "grad_norm": 0.2222224473953247, "learning_rate": 4.6124420107489345e-06, "loss": 0.0021, "step": 3633 }, { "epoch": 6.91, "grad_norm": 0.5322156548500061, "learning_rate": 4.607255513096215e-06, "loss": 0.0098, "step": 3634 }, { "epoch": 6.91, "grad_norm": 0.4340938925743103, "learning_rate": 4.602071060139115e-06, "loss": 0.0023, "step": 3635 }, { "epoch": 6.91, "grad_norm": 0.18313759565353394, "learning_rate": 4.5968886538433545e-06, "loss": 0.0012, "step": 3636 }, { "epoch": 6.91, "grad_norm": 0.9081158638000488, "learning_rate": 4.59170829617388e-06, "loss": 0.004, "step": 3637 }, { "epoch": 6.92, "grad_norm": 0.28740808367729187, "learning_rate": 4.586529989094854e-06, "loss": 0.0018, "step": 3638 }, { "epoch": 6.92, "grad_norm": 0.5752676725387573, "learning_rate": 4.581353734569678e-06, "loss": 0.003, "step": 3639 }, { "epoch": 6.92, "grad_norm": 1.8100310564041138, "learning_rate": 4.576179534560948e-06, "loss": 0.0044, "step": 3640 }, { "epoch": 6.92, "grad_norm": 0.1623036116361618, "learning_rate": 4.571007391030511e-06, "loss": 0.0014, "step": 3641 }, { "epoch": 6.92, "grad_norm": 1.1824352741241455, "learning_rate": 4.565837305939414e-06, "loss": 0.0089, "step": 3642 }, { "epoch": 6.93, "grad_norm": 0.10828474909067154, "learning_rate": 4.560669281247932e-06, "loss": 0.0005, "step": 3643 }, { "epoch": 6.93, "grad_norm": 1.573227047920227, "learning_rate": 4.5555033189155505e-06, "loss": 0.0098, "step": 3644 }, { "epoch": 6.93, "grad_norm": 0.8799106478691101, "learning_rate": 4.550339420900992e-06, "loss": 0.0016, "step": 3645 }, { "epoch": 6.93, "grad_norm": 0.6171805262565613, "learning_rate": 4.545177589162167e-06, "loss": 0.0062, "step": 3646 }, { "epoch": 6.93, "grad_norm": 0.5778689980506897, "learning_rate": 4.540017825656232e-06, "loss": 0.0063, "step": 3647 }, { "epoch": 6.94, "grad_norm": 0.857073962688446, "learning_rate": 4.534860132339541e-06, "loss": 0.0057, "step": 3648 }, { "epoch": 6.94, "grad_norm": 0.7416361570358276, "learning_rate": 4.529704511167669e-06, "loss": 0.007, "step": 3649 }, { "epoch": 6.94, "grad_norm": 0.44167783856391907, "learning_rate": 4.524550964095406e-06, "loss": 0.0032, "step": 3650 }, { "epoch": 6.94, "grad_norm": 0.9822051525115967, "learning_rate": 4.519399493076752e-06, "loss": 0.0073, "step": 3651 }, { "epoch": 6.94, "grad_norm": 0.33648020029067993, "learning_rate": 4.514250100064924e-06, "loss": 0.003, "step": 3652 }, { "epoch": 6.94, "grad_norm": 0.27508601546287537, "learning_rate": 4.509102787012344e-06, "loss": 0.0017, "step": 3653 }, { "epoch": 6.95, "grad_norm": 0.3809891641139984, "learning_rate": 4.5039575558706625e-06, "loss": 0.002, "step": 3654 }, { "epoch": 6.95, "grad_norm": 0.8943369388580322, "learning_rate": 4.498814408590714e-06, "loss": 0.0045, "step": 3655 }, { "epoch": 6.95, "grad_norm": 0.16639545559883118, "learning_rate": 4.493673347122572e-06, "loss": 0.0015, "step": 3656 }, { "epoch": 6.95, "grad_norm": 0.5430283546447754, "learning_rate": 4.488534373415492e-06, "loss": 0.0046, "step": 3657 }, { "epoch": 6.95, "grad_norm": 0.6238420009613037, "learning_rate": 4.483397489417959e-06, "loss": 0.0042, "step": 3658 }, { "epoch": 6.96, "grad_norm": 0.47497865557670593, "learning_rate": 4.478262697077655e-06, "loss": 0.0038, "step": 3659 }, { "epoch": 6.96, "grad_norm": 0.8549213409423828, "learning_rate": 4.47312999834147e-06, "loss": 0.0067, "step": 3660 }, { "epoch": 6.96, "grad_norm": 0.19244416058063507, "learning_rate": 4.467999395155504e-06, "loss": 0.0011, "step": 3661 }, { "epoch": 6.96, "grad_norm": 0.23914478719234467, "learning_rate": 4.462870889465058e-06, "loss": 0.0018, "step": 3662 }, { "epoch": 6.96, "grad_norm": 0.1131080910563469, "learning_rate": 4.457744483214641e-06, "loss": 0.0006, "step": 3663 }, { "epoch": 6.97, "grad_norm": 0.731115460395813, "learning_rate": 4.452620178347961e-06, "loss": 0.0035, "step": 3664 }, { "epoch": 6.97, "grad_norm": 0.26112037897109985, "learning_rate": 4.447497976807942e-06, "loss": 0.0021, "step": 3665 }, { "epoch": 6.97, "grad_norm": 0.7787262201309204, "learning_rate": 4.442377880536689e-06, "loss": 0.0044, "step": 3666 }, { "epoch": 6.97, "grad_norm": 0.24593880772590637, "learning_rate": 4.43725989147553e-06, "loss": 0.0023, "step": 3667 }, { "epoch": 6.97, "grad_norm": 0.15208560228347778, "learning_rate": 4.432144011564984e-06, "loss": 0.0008, "step": 3668 }, { "epoch": 6.98, "grad_norm": 1.363633632659912, "learning_rate": 4.42703024274477e-06, "loss": 0.0061, "step": 3669 }, { "epoch": 6.98, "grad_norm": 0.927568793296814, "learning_rate": 4.4219185869538085e-06, "loss": 0.0021, "step": 3670 }, { "epoch": 6.98, "grad_norm": 0.4569847285747528, "learning_rate": 4.416809046130218e-06, "loss": 0.0021, "step": 3671 }, { "epoch": 6.98, "grad_norm": 0.8425508737564087, "learning_rate": 4.411701622211316e-06, "loss": 0.0063, "step": 3672 }, { "epoch": 6.98, "grad_norm": 0.21160709857940674, "learning_rate": 4.406596317133615e-06, "loss": 0.0011, "step": 3673 }, { "epoch": 6.98, "grad_norm": 1.0454986095428467, "learning_rate": 4.401493132832833e-06, "loss": 0.0045, "step": 3674 }, { "epoch": 6.99, "grad_norm": 0.37992796301841736, "learning_rate": 4.396392071243865e-06, "loss": 0.0056, "step": 3675 }, { "epoch": 6.99, "grad_norm": 0.5194094777107239, "learning_rate": 4.391293134300823e-06, "loss": 0.0022, "step": 3676 }, { "epoch": 6.99, "grad_norm": 0.462329626083374, "learning_rate": 4.3861963239370005e-06, "loss": 0.0068, "step": 3677 }, { "epoch": 6.99, "grad_norm": 0.42235636711120605, "learning_rate": 4.381101642084888e-06, "loss": 0.0033, "step": 3678 }, { "epoch": 6.99, "grad_norm": 0.2749471664428711, "learning_rate": 4.3760090906761685e-06, "loss": 0.0014, "step": 3679 }, { "epoch": 7.0, "grad_norm": 1.2153170108795166, "learning_rate": 4.370918671641716e-06, "loss": 0.0059, "step": 3680 }, { "epoch": 7.0, "grad_norm": 0.35980767011642456, "learning_rate": 4.365830386911599e-06, "loss": 0.001, "step": 3681 }, { "epoch": 7.0, "grad_norm": 0.6547788977622986, "learning_rate": 4.360744238415075e-06, "loss": 0.0048, "step": 3682 }, { "epoch": 7.0, "grad_norm": 0.6304142475128174, "learning_rate": 4.355660228080591e-06, "loss": 0.0084, "step": 3683 }, { "epoch": 7.0, "grad_norm": 0.7391801476478577, "learning_rate": 4.350578357835781e-06, "loss": 0.0034, "step": 3684 }, { "epoch": 7.01, "grad_norm": 1.4025472402572632, "learning_rate": 4.3454986296074795e-06, "loss": 0.0055, "step": 3685 }, { "epoch": 7.01, "grad_norm": 0.3541855216026306, "learning_rate": 4.340421045321688e-06, "loss": 0.0023, "step": 3686 }, { "epoch": 7.01, "grad_norm": 0.570199191570282, "learning_rate": 4.335345606903616e-06, "loss": 0.0035, "step": 3687 }, { "epoch": 7.01, "grad_norm": 0.59108966588974, "learning_rate": 4.330272316277649e-06, "loss": 0.0043, "step": 3688 }, { "epoch": 7.01, "grad_norm": 0.4177117347717285, "learning_rate": 4.325201175367356e-06, "loss": 0.0021, "step": 3689 }, { "epoch": 7.02, "grad_norm": 0.16225412487983704, "learning_rate": 4.3201321860954945e-06, "loss": 0.0011, "step": 3690 }, { "epoch": 7.02, "grad_norm": 0.30663591623306274, "learning_rate": 4.3150653503840145e-06, "loss": 0.002, "step": 3691 }, { "epoch": 7.02, "grad_norm": 0.8374054431915283, "learning_rate": 4.310000670154027e-06, "loss": 0.0042, "step": 3692 }, { "epoch": 7.02, "grad_norm": 0.14841894805431366, "learning_rate": 4.304938147325853e-06, "loss": 0.0006, "step": 3693 }, { "epoch": 7.02, "grad_norm": 0.5630840063095093, "learning_rate": 4.2998777838189755e-06, "loss": 0.0019, "step": 3694 }, { "epoch": 7.02, "grad_norm": 0.14510346949100494, "learning_rate": 4.294819581552068e-06, "loss": 0.0013, "step": 3695 }, { "epoch": 7.03, "grad_norm": 0.49925222992897034, "learning_rate": 4.28976354244298e-06, "loss": 0.0015, "step": 3696 }, { "epoch": 7.03, "grad_norm": 0.4199729561805725, "learning_rate": 4.284709668408744e-06, "loss": 0.0014, "step": 3697 }, { "epoch": 7.03, "grad_norm": 0.12330497056245804, "learning_rate": 4.279657961365572e-06, "loss": 0.0004, "step": 3698 }, { "epoch": 7.03, "grad_norm": 0.2662029266357422, "learning_rate": 4.274608423228847e-06, "loss": 0.0013, "step": 3699 }, { "epoch": 7.03, "grad_norm": 0.8268382549285889, "learning_rate": 4.2695610559131486e-06, "loss": 0.0066, "step": 3700 }, { "epoch": 7.03, "eval_blimp_filtered_avg": 0.7377611940298507, "eval_blimp_filtered_std": 0.004840535937973145, "step": 3700 }, { "epoch": 7.03, "eval_blimp_supplement_avg": 0.8060344827586207, "eval_blimp_supplement_std": 0.01733909467191901, "step": 3700 }, { "epoch": 7.03, "eval_vqa_filtered_avg": 0.34, "eval_vqa_filtered_std": 0.04760952285695235, "step": 3700 }, { "epoch": 7.03, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 3700 }, { "epoch": 7.04, "grad_norm": 0.6266802549362183, "learning_rate": 4.264515861332205e-06, "loss": 0.0027, "step": 3701 }, { "epoch": 7.04, "grad_norm": 0.32977908849716187, "learning_rate": 4.259472841398945e-06, "loss": 0.0021, "step": 3702 }, { "epoch": 7.04, "grad_norm": 0.32260578870773315, "learning_rate": 4.254431998025462e-06, "loss": 0.0012, "step": 3703 }, { "epoch": 7.04, "grad_norm": 0.3582462966442108, "learning_rate": 4.249393333123026e-06, "loss": 0.0018, "step": 3704 }, { "epoch": 7.04, "grad_norm": 0.2758294939994812, "learning_rate": 4.244356848602082e-06, "loss": 0.0012, "step": 3705 }, { "epoch": 7.05, "grad_norm": 1.300262689590454, "learning_rate": 4.239322546372244e-06, "loss": 0.0042, "step": 3706 }, { "epoch": 7.05, "grad_norm": 0.28159084916114807, "learning_rate": 4.2342904283423045e-06, "loss": 0.0024, "step": 3707 }, { "epoch": 7.05, "grad_norm": 0.24517813324928284, "learning_rate": 4.229260496420224e-06, "loss": 0.0016, "step": 3708 }, { "epoch": 7.05, "grad_norm": 0.1589890420436859, "learning_rate": 4.224232752513136e-06, "loss": 0.001, "step": 3709 }, { "epoch": 7.05, "grad_norm": 0.4424799680709839, "learning_rate": 4.21920719852734e-06, "loss": 0.0016, "step": 3710 }, { "epoch": 7.06, "grad_norm": 0.3368559777736664, "learning_rate": 4.214183836368318e-06, "loss": 0.0036, "step": 3711 }, { "epoch": 7.06, "grad_norm": 0.44301342964172363, "learning_rate": 4.209162667940701e-06, "loss": 0.005, "step": 3712 }, { "epoch": 7.06, "grad_norm": 1.3328152894973755, "learning_rate": 4.204143695148305e-06, "loss": 0.0056, "step": 3713 }, { "epoch": 7.06, "grad_norm": 0.27847760915756226, "learning_rate": 4.199126919894109e-06, "loss": 0.0021, "step": 3714 }, { "epoch": 7.06, "grad_norm": 1.0363030433654785, "learning_rate": 4.194112344080252e-06, "loss": 0.0029, "step": 3715 }, { "epoch": 7.06, "grad_norm": 0.18304750323295593, "learning_rate": 4.189099969608049e-06, "loss": 0.0007, "step": 3716 }, { "epoch": 7.07, "grad_norm": 0.5794500112533569, "learning_rate": 4.184089798377975e-06, "loss": 0.0036, "step": 3717 }, { "epoch": 7.07, "grad_norm": 0.6761910915374756, "learning_rate": 4.179081832289667e-06, "loss": 0.0012, "step": 3718 }, { "epoch": 7.07, "grad_norm": 0.5107115507125854, "learning_rate": 4.174076073241931e-06, "loss": 0.0019, "step": 3719 }, { "epoch": 7.07, "grad_norm": 0.0865197628736496, "learning_rate": 4.16907252313274e-06, "loss": 0.0005, "step": 3720 }, { "epoch": 7.07, "grad_norm": 0.4165622889995575, "learning_rate": 4.164071183859211e-06, "loss": 0.002, "step": 3721 }, { "epoch": 7.08, "grad_norm": 0.0903775691986084, "learning_rate": 4.15907205731765e-06, "loss": 0.0008, "step": 3722 }, { "epoch": 7.08, "grad_norm": 0.6688073873519897, "learning_rate": 4.1540751454035025e-06, "loss": 0.0054, "step": 3723 }, { "epoch": 7.08, "grad_norm": 1.2382493019104004, "learning_rate": 4.149080450011382e-06, "loss": 0.0117, "step": 3724 }, { "epoch": 7.08, "grad_norm": 0.7802330255508423, "learning_rate": 4.144087973035062e-06, "loss": 0.004, "step": 3725 }, { "epoch": 7.08, "grad_norm": 0.4643577039241791, "learning_rate": 4.139097716367474e-06, "loss": 0.0036, "step": 3726 }, { "epoch": 7.09, "grad_norm": 0.15233322978019714, "learning_rate": 4.134109681900707e-06, "loss": 0.0009, "step": 3727 }, { "epoch": 7.09, "grad_norm": 0.16533075273036957, "learning_rate": 4.129123871526007e-06, "loss": 0.0012, "step": 3728 }, { "epoch": 7.09, "grad_norm": 0.25042763352394104, "learning_rate": 4.124140287133781e-06, "loss": 0.0014, "step": 3729 }, { "epoch": 7.09, "grad_norm": 0.13472093641757965, "learning_rate": 4.119158930613582e-06, "loss": 0.0008, "step": 3730 }, { "epoch": 7.09, "grad_norm": 0.4884595274925232, "learning_rate": 4.114179803854138e-06, "loss": 0.003, "step": 3731 }, { "epoch": 7.1, "grad_norm": 0.4557907283306122, "learning_rate": 4.109202908743303e-06, "loss": 0.0019, "step": 3732 }, { "epoch": 7.1, "grad_norm": 0.700093686580658, "learning_rate": 4.104228247168112e-06, "loss": 0.0051, "step": 3733 }, { "epoch": 7.1, "grad_norm": 0.16893674433231354, "learning_rate": 4.099255821014737e-06, "loss": 0.0007, "step": 3734 }, { "epoch": 7.1, "grad_norm": 0.14784133434295654, "learning_rate": 4.094285632168507e-06, "loss": 0.0009, "step": 3735 }, { "epoch": 7.1, "grad_norm": 0.20378947257995605, "learning_rate": 4.089317682513903e-06, "loss": 0.001, "step": 3736 }, { "epoch": 7.1, "grad_norm": 0.7475717663764954, "learning_rate": 4.0843519739345615e-06, "loss": 0.0034, "step": 3737 }, { "epoch": 7.11, "grad_norm": 0.40193626284599304, "learning_rate": 4.079388508313255e-06, "loss": 0.0013, "step": 3738 }, { "epoch": 7.11, "grad_norm": 0.1406470388174057, "learning_rate": 4.074427287531925e-06, "loss": 0.0011, "step": 3739 }, { "epoch": 7.11, "grad_norm": 0.18271911144256592, "learning_rate": 4.069468313471646e-06, "loss": 0.0014, "step": 3740 }, { "epoch": 7.11, "grad_norm": 0.45134830474853516, "learning_rate": 4.064511588012649e-06, "loss": 0.0007, "step": 3741 }, { "epoch": 7.11, "grad_norm": 0.4774637818336487, "learning_rate": 4.059557113034308e-06, "loss": 0.0017, "step": 3742 }, { "epoch": 7.12, "grad_norm": 0.5766971707344055, "learning_rate": 4.054604890415148e-06, "loss": 0.0062, "step": 3743 }, { "epoch": 7.12, "grad_norm": 0.2149340808391571, "learning_rate": 4.049654922032837e-06, "loss": 0.0015, "step": 3744 }, { "epoch": 7.12, "grad_norm": 0.3865819275379181, "learning_rate": 4.0447072097641844e-06, "loss": 0.002, "step": 3745 }, { "epoch": 7.12, "grad_norm": 0.1357206404209137, "learning_rate": 4.039761755485162e-06, "loss": 0.0008, "step": 3746 }, { "epoch": 7.12, "grad_norm": 0.05052679777145386, "learning_rate": 4.034818561070856e-06, "loss": 0.0004, "step": 3747 }, { "epoch": 7.13, "grad_norm": 0.19765906035900116, "learning_rate": 4.029877628395522e-06, "loss": 0.0009, "step": 3748 }, { "epoch": 7.13, "grad_norm": 0.20206904411315918, "learning_rate": 4.024938959332546e-06, "loss": 0.0009, "step": 3749 }, { "epoch": 7.13, "grad_norm": 0.25673457980155945, "learning_rate": 4.020002555754459e-06, "loss": 0.0014, "step": 3750 }, { "epoch": 7.13, "grad_norm": 0.3656654953956604, "learning_rate": 4.015068419532929e-06, "loss": 0.0017, "step": 3751 }, { "epoch": 7.13, "grad_norm": 0.44700944423675537, "learning_rate": 4.010136552538769e-06, "loss": 0.0022, "step": 3752 }, { "epoch": 7.13, "grad_norm": 0.06887218356132507, "learning_rate": 4.00520695664193e-06, "loss": 0.0006, "step": 3753 }, { "epoch": 7.14, "grad_norm": 0.50782710313797, "learning_rate": 4.0002796337115015e-06, "loss": 0.0018, "step": 3754 }, { "epoch": 7.14, "grad_norm": 0.5006281137466431, "learning_rate": 3.995354585615711e-06, "loss": 0.0049, "step": 3755 }, { "epoch": 7.14, "grad_norm": 0.18946678936481476, "learning_rate": 3.990431814221919e-06, "loss": 0.0012, "step": 3756 }, { "epoch": 7.14, "grad_norm": 0.2269395887851715, "learning_rate": 3.98551132139664e-06, "loss": 0.001, "step": 3757 }, { "epoch": 7.14, "grad_norm": 0.13343845307826996, "learning_rate": 3.980593109005498e-06, "loss": 0.0007, "step": 3758 }, { "epoch": 7.15, "grad_norm": 0.5059443712234497, "learning_rate": 3.975677178913278e-06, "loss": 0.0016, "step": 3759 }, { "epoch": 7.15, "grad_norm": 0.5601640343666077, "learning_rate": 3.9707635329838826e-06, "loss": 0.0019, "step": 3760 }, { "epoch": 7.15, "grad_norm": 0.06660071760416031, "learning_rate": 3.965852173080354e-06, "loss": 0.0004, "step": 3761 }, { "epoch": 7.15, "grad_norm": 0.23243975639343262, "learning_rate": 3.960943101064869e-06, "loss": 0.0022, "step": 3762 }, { "epoch": 7.15, "grad_norm": 0.5057353973388672, "learning_rate": 3.956036318798736e-06, "loss": 0.0025, "step": 3763 }, { "epoch": 7.16, "grad_norm": 1.1088889837265015, "learning_rate": 3.951131828142393e-06, "loss": 0.0059, "step": 3764 }, { "epoch": 7.16, "grad_norm": 0.6297330856323242, "learning_rate": 3.946229630955407e-06, "loss": 0.0041, "step": 3765 }, { "epoch": 7.16, "grad_norm": 0.7308667302131653, "learning_rate": 3.941329729096493e-06, "loss": 0.0025, "step": 3766 }, { "epoch": 7.16, "grad_norm": 0.053703393787145615, "learning_rate": 3.936432124423464e-06, "loss": 0.0003, "step": 3767 }, { "epoch": 7.16, "grad_norm": 1.0054516792297363, "learning_rate": 3.931536818793294e-06, "loss": 0.0043, "step": 3768 }, { "epoch": 7.17, "grad_norm": 0.36590519547462463, "learning_rate": 3.926643814062064e-06, "loss": 0.0021, "step": 3769 }, { "epoch": 7.17, "grad_norm": 0.27985915541648865, "learning_rate": 3.921753112084995e-06, "loss": 0.0013, "step": 3770 }, { "epoch": 7.17, "grad_norm": 0.12232618778944016, "learning_rate": 3.916864714716425e-06, "loss": 0.0005, "step": 3771 }, { "epoch": 7.17, "grad_norm": 0.2594970762729645, "learning_rate": 3.911978623809825e-06, "loss": 0.0015, "step": 3772 }, { "epoch": 7.17, "grad_norm": 0.5919599533081055, "learning_rate": 3.9070948412177904e-06, "loss": 0.0029, "step": 3773 }, { "epoch": 7.17, "grad_norm": 0.5050314664840698, "learning_rate": 3.902213368792035e-06, "loss": 0.002, "step": 3774 }, { "epoch": 7.18, "grad_norm": 0.9283249378204346, "learning_rate": 3.897334208383413e-06, "loss": 0.0019, "step": 3775 }, { "epoch": 7.18, "grad_norm": 0.4698369801044464, "learning_rate": 3.892457361841879e-06, "loss": 0.0018, "step": 3776 }, { "epoch": 7.18, "grad_norm": 0.7319924235343933, "learning_rate": 3.887582831016536e-06, "loss": 0.0041, "step": 3777 }, { "epoch": 7.18, "grad_norm": 0.25404268503189087, "learning_rate": 3.882710617755579e-06, "loss": 0.0018, "step": 3778 }, { "epoch": 7.18, "grad_norm": 0.3629729449748993, "learning_rate": 3.877840723906352e-06, "loss": 0.002, "step": 3779 }, { "epoch": 7.19, "grad_norm": 0.45609357953071594, "learning_rate": 3.872973151315307e-06, "loss": 0.0028, "step": 3780 }, { "epoch": 7.19, "grad_norm": 0.6428470611572266, "learning_rate": 3.8681079018280146e-06, "loss": 0.0021, "step": 3781 }, { "epoch": 7.19, "grad_norm": 1.3151696920394897, "learning_rate": 3.863244977289166e-06, "loss": 0.0061, "step": 3782 }, { "epoch": 7.19, "grad_norm": 0.15875932574272156, "learning_rate": 3.85838437954258e-06, "loss": 0.0015, "step": 3783 }, { "epoch": 7.19, "grad_norm": 0.23891887068748474, "learning_rate": 3.8535261104311725e-06, "loss": 0.001, "step": 3784 }, { "epoch": 7.2, "grad_norm": 0.34309160709381104, "learning_rate": 3.848670171797e-06, "loss": 0.0036, "step": 3785 }, { "epoch": 7.2, "grad_norm": 0.23043416440486908, "learning_rate": 3.84381656548122e-06, "loss": 0.0026, "step": 3786 }, { "epoch": 7.2, "grad_norm": 0.43702051043510437, "learning_rate": 3.838965293324111e-06, "loss": 0.0046, "step": 3787 }, { "epoch": 7.2, "grad_norm": 0.8267781138420105, "learning_rate": 3.834116357165064e-06, "loss": 0.0029, "step": 3788 }, { "epoch": 7.2, "grad_norm": 1.0272867679595947, "learning_rate": 3.82926975884259e-06, "loss": 0.0181, "step": 3789 }, { "epoch": 7.21, "grad_norm": 0.08281259983778, "learning_rate": 3.824425500194305e-06, "loss": 0.0005, "step": 3790 }, { "epoch": 7.21, "grad_norm": 0.6307563781738281, "learning_rate": 3.819583583056942e-06, "loss": 0.0023, "step": 3791 }, { "epoch": 7.21, "grad_norm": 0.13520751893520355, "learning_rate": 3.814744009266355e-06, "loss": 0.0012, "step": 3792 }, { "epoch": 7.21, "grad_norm": 0.5406705737113953, "learning_rate": 3.809906780657491e-06, "loss": 0.0036, "step": 3793 }, { "epoch": 7.21, "grad_norm": 0.7308921217918396, "learning_rate": 3.805071899064424e-06, "loss": 0.0016, "step": 3794 }, { "epoch": 7.21, "grad_norm": 0.2879466712474823, "learning_rate": 3.8002393663203317e-06, "loss": 0.0019, "step": 3795 }, { "epoch": 7.22, "grad_norm": 0.07328371703624725, "learning_rate": 3.7954091842575e-06, "loss": 0.0005, "step": 3796 }, { "epoch": 7.22, "grad_norm": 1.2290304899215698, "learning_rate": 3.7905813547073255e-06, "loss": 0.0046, "step": 3797 }, { "epoch": 7.22, "grad_norm": 0.10077936202287674, "learning_rate": 3.7857558795003123e-06, "loss": 0.0005, "step": 3798 }, { "epoch": 7.22, "grad_norm": 0.28368133306503296, "learning_rate": 3.780932760466074e-06, "loss": 0.0022, "step": 3799 }, { "epoch": 7.22, "grad_norm": 0.3157375752925873, "learning_rate": 3.776111999433322e-06, "loss": 0.0021, "step": 3800 }, { "epoch": 7.22, "eval_blimp_filtered_avg": 0.7380597014925373, "eval_blimp_filtered_std": 0.00484183736974208, "step": 3800 }, { "epoch": 7.22, "eval_blimp_supplement_avg": 0.8017241379310345, "eval_blimp_supplement_std": 0.017499141397956938, "step": 3800 }, { "epoch": 7.22, "eval_vqa_filtered_avg": 0.33, "eval_vqa_filtered_std": 0.047258156262526045, "step": 3800 }, { "epoch": 7.22, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 3800 }, { "epoch": 7.23, "grad_norm": 0.6496750712394714, "learning_rate": 3.7712935982298937e-06, "loss": 0.0025, "step": 3801 }, { "epoch": 7.23, "grad_norm": 0.17376945912837982, "learning_rate": 3.766477558682704e-06, "loss": 0.0012, "step": 3802 }, { "epoch": 7.23, "grad_norm": 0.12828871607780457, "learning_rate": 3.7616638826178e-06, "loss": 0.0004, "step": 3803 }, { "epoch": 7.23, "grad_norm": 0.8181118369102478, "learning_rate": 3.7568525718603068e-06, "loss": 0.0023, "step": 3804 }, { "epoch": 7.23, "grad_norm": 0.7249596118927002, "learning_rate": 3.752043628234474e-06, "loss": 0.0068, "step": 3805 }, { "epoch": 7.24, "grad_norm": 0.2531784474849701, "learning_rate": 3.747237053563645e-06, "loss": 0.0022, "step": 3806 }, { "epoch": 7.24, "grad_norm": 0.1312198042869568, "learning_rate": 3.742432849670261e-06, "loss": 0.0011, "step": 3807 }, { "epoch": 7.24, "grad_norm": 0.28574687242507935, "learning_rate": 3.7376310183758722e-06, "loss": 0.0008, "step": 3808 }, { "epoch": 7.24, "grad_norm": 0.5614407658576965, "learning_rate": 3.7328315615011234e-06, "loss": 0.0023, "step": 3809 }, { "epoch": 7.24, "grad_norm": 0.18495362997055054, "learning_rate": 3.7280344808657632e-06, "loss": 0.0009, "step": 3810 }, { "epoch": 7.25, "grad_norm": 0.7898962497711182, "learning_rate": 3.723239778288631e-06, "loss": 0.0057, "step": 3811 }, { "epoch": 7.25, "grad_norm": 0.2758534848690033, "learning_rate": 3.7184474555876826e-06, "loss": 0.0019, "step": 3812 }, { "epoch": 7.25, "grad_norm": 0.1337035894393921, "learning_rate": 3.7136575145799456e-06, "loss": 0.0006, "step": 3813 }, { "epoch": 7.25, "grad_norm": 0.35877758264541626, "learning_rate": 3.708869957081569e-06, "loss": 0.002, "step": 3814 }, { "epoch": 7.25, "grad_norm": 0.5095251798629761, "learning_rate": 3.7040847849077844e-06, "loss": 0.002, "step": 3815 }, { "epoch": 7.25, "grad_norm": 0.06065607815980911, "learning_rate": 3.6993019998729217e-06, "loss": 0.0002, "step": 3816 }, { "epoch": 7.26, "grad_norm": 0.46685001254081726, "learning_rate": 3.6945216037904076e-06, "loss": 0.0034, "step": 3817 }, { "epoch": 7.26, "grad_norm": 0.37123727798461914, "learning_rate": 3.6897435984727604e-06, "loss": 0.0023, "step": 3818 }, { "epoch": 7.26, "grad_norm": 0.47623687982559204, "learning_rate": 3.6849679857315935e-06, "loss": 0.0025, "step": 3819 }, { "epoch": 7.26, "grad_norm": 1.4057048559188843, "learning_rate": 3.6801947673776095e-06, "loss": 0.0073, "step": 3820 }, { "epoch": 7.26, "grad_norm": 1.0655890703201294, "learning_rate": 3.6754239452206177e-06, "loss": 0.002, "step": 3821 }, { "epoch": 7.27, "grad_norm": 0.2878413200378418, "learning_rate": 3.6706555210694916e-06, "loss": 0.0038, "step": 3822 }, { "epoch": 7.27, "grad_norm": 0.14127865433692932, "learning_rate": 3.6658894967322235e-06, "loss": 0.0008, "step": 3823 }, { "epoch": 7.27, "grad_norm": 0.44671186804771423, "learning_rate": 3.6611258740158806e-06, "loss": 0.0029, "step": 3824 }, { "epoch": 7.27, "grad_norm": 0.2913244962692261, "learning_rate": 3.656364654726622e-06, "loss": 0.0017, "step": 3825 }, { "epoch": 7.27, "grad_norm": 0.12775738537311554, "learning_rate": 3.651605840669695e-06, "loss": 0.0011, "step": 3826 }, { "epoch": 7.28, "grad_norm": 0.29809850454330444, "learning_rate": 3.6468494336494397e-06, "loss": 0.0015, "step": 3827 }, { "epoch": 7.28, "grad_norm": 0.5880947113037109, "learning_rate": 3.642095435469274e-06, "loss": 0.0062, "step": 3828 }, { "epoch": 7.28, "grad_norm": 0.31823310256004333, "learning_rate": 3.637343847931719e-06, "loss": 0.0012, "step": 3829 }, { "epoch": 7.28, "grad_norm": 0.16052481532096863, "learning_rate": 3.632594672838359e-06, "loss": 0.001, "step": 3830 }, { "epoch": 7.28, "grad_norm": 0.5831488966941833, "learning_rate": 3.6278479119898847e-06, "loss": 0.0017, "step": 3831 }, { "epoch": 7.29, "grad_norm": 1.5534827709197998, "learning_rate": 3.6231035671860605e-06, "loss": 0.0039, "step": 3832 }, { "epoch": 7.29, "grad_norm": 0.2989145517349243, "learning_rate": 3.618361640225735e-06, "loss": 0.0018, "step": 3833 }, { "epoch": 7.29, "grad_norm": 0.408076673746109, "learning_rate": 3.6136221329068435e-06, "loss": 0.0019, "step": 3834 }, { "epoch": 7.29, "grad_norm": 0.395656555891037, "learning_rate": 3.6088850470264015e-06, "loss": 0.0021, "step": 3835 }, { "epoch": 7.29, "grad_norm": 0.44569307565689087, "learning_rate": 3.604150384380508e-06, "loss": 0.0015, "step": 3836 }, { "epoch": 7.29, "grad_norm": 0.9238083362579346, "learning_rate": 3.59941814676434e-06, "loss": 0.0062, "step": 3837 }, { "epoch": 7.3, "grad_norm": 0.27728360891342163, "learning_rate": 3.594688335972164e-06, "loss": 0.0015, "step": 3838 }, { "epoch": 7.3, "grad_norm": 0.4090029299259186, "learning_rate": 3.58996095379731e-06, "loss": 0.0008, "step": 3839 }, { "epoch": 7.3, "grad_norm": 0.20706617832183838, "learning_rate": 3.5852360020322054e-06, "loss": 0.0018, "step": 3840 }, { "epoch": 7.3, "grad_norm": 0.7752285003662109, "learning_rate": 3.5805134824683443e-06, "loss": 0.0226, "step": 3841 }, { "epoch": 7.3, "grad_norm": 0.35964298248291016, "learning_rate": 3.575793396896303e-06, "loss": 0.0017, "step": 3842 }, { "epoch": 7.31, "grad_norm": 1.5834684371948242, "learning_rate": 3.5710757471057324e-06, "loss": 0.0036, "step": 3843 }, { "epoch": 7.31, "grad_norm": 0.20338322222232819, "learning_rate": 3.5663605348853625e-06, "loss": 0.0011, "step": 3844 }, { "epoch": 7.31, "grad_norm": 0.12737347185611725, "learning_rate": 3.5616477620229984e-06, "loss": 0.0007, "step": 3845 }, { "epoch": 7.31, "grad_norm": 1.0435928106307983, "learning_rate": 3.556937430305515e-06, "loss": 0.0063, "step": 3846 }, { "epoch": 7.31, "grad_norm": 0.39358216524124146, "learning_rate": 3.552229541518878e-06, "loss": 0.0026, "step": 3847 }, { "epoch": 7.32, "grad_norm": 0.2725643217563629, "learning_rate": 3.5475240974481007e-06, "loss": 0.0012, "step": 3848 }, { "epoch": 7.32, "grad_norm": 0.13890747725963593, "learning_rate": 3.542821099877295e-06, "loss": 0.0004, "step": 3849 }, { "epoch": 7.32, "grad_norm": 0.20914606750011444, "learning_rate": 3.538120550589631e-06, "loss": 0.0018, "step": 3850 }, { "epoch": 7.32, "grad_norm": 0.8775694370269775, "learning_rate": 3.5334224513673533e-06, "loss": 0.004, "step": 3851 }, { "epoch": 7.32, "grad_norm": 1.7113730907440186, "learning_rate": 3.5287268039917787e-06, "loss": 0.0036, "step": 3852 }, { "epoch": 7.33, "grad_norm": 0.5129668712615967, "learning_rate": 3.5240336102432926e-06, "loss": 0.0012, "step": 3853 }, { "epoch": 7.33, "grad_norm": 1.0376644134521484, "learning_rate": 3.5193428719013524e-06, "loss": 0.0143, "step": 3854 }, { "epoch": 7.33, "grad_norm": 0.7762235403060913, "learning_rate": 3.514654590744483e-06, "loss": 0.0037, "step": 3855 }, { "epoch": 7.33, "grad_norm": 0.3777815103530884, "learning_rate": 3.509968768550278e-06, "loss": 0.0041, "step": 3856 }, { "epoch": 7.33, "grad_norm": 0.15286177396774292, "learning_rate": 3.5052854070953944e-06, "loss": 0.001, "step": 3857 }, { "epoch": 7.33, "grad_norm": 0.4964131712913513, "learning_rate": 3.500604508155572e-06, "loss": 0.0024, "step": 3858 }, { "epoch": 7.34, "grad_norm": 0.7341929078102112, "learning_rate": 3.49592607350559e-06, "loss": 0.0034, "step": 3859 }, { "epoch": 7.34, "grad_norm": 0.36592933535575867, "learning_rate": 3.4912501049193215e-06, "loss": 0.0056, "step": 3860 }, { "epoch": 7.34, "grad_norm": 0.6668347120285034, "learning_rate": 3.4865766041696857e-06, "loss": 0.0027, "step": 3861 }, { "epoch": 7.34, "grad_norm": 0.08765663206577301, "learning_rate": 3.481905573028673e-06, "loss": 0.0009, "step": 3862 }, { "epoch": 7.34, "grad_norm": 0.1666555404663086, "learning_rate": 3.4772370132673374e-06, "loss": 0.001, "step": 3863 }, { "epoch": 7.35, "grad_norm": 0.1671186238527298, "learning_rate": 3.4725709266557926e-06, "loss": 0.0005, "step": 3864 }, { "epoch": 7.35, "grad_norm": 0.19496037065982819, "learning_rate": 3.467907314963219e-06, "loss": 0.001, "step": 3865 }, { "epoch": 7.35, "grad_norm": 0.8316970467567444, "learning_rate": 3.4632461799578533e-06, "loss": 0.0027, "step": 3866 }, { "epoch": 7.35, "grad_norm": 0.44187575578689575, "learning_rate": 3.4585875234070052e-06, "loss": 0.0022, "step": 3867 }, { "epoch": 7.35, "grad_norm": 0.3528219759464264, "learning_rate": 3.453931347077024e-06, "loss": 0.0015, "step": 3868 }, { "epoch": 7.36, "grad_norm": 0.3355001211166382, "learning_rate": 3.4492776527333404e-06, "loss": 0.0021, "step": 3869 }, { "epoch": 7.36, "grad_norm": 0.3460361361503601, "learning_rate": 3.4446264421404283e-06, "loss": 0.0015, "step": 3870 }, { "epoch": 7.36, "grad_norm": 0.3121769428253174, "learning_rate": 3.439977717061829e-06, "loss": 0.0026, "step": 3871 }, { "epoch": 7.36, "grad_norm": 0.5239521265029907, "learning_rate": 3.4353314792601335e-06, "loss": 0.0016, "step": 3872 }, { "epoch": 7.36, "grad_norm": 0.46682271361351013, "learning_rate": 3.430687730497003e-06, "loss": 0.0027, "step": 3873 }, { "epoch": 7.37, "grad_norm": 0.13003765046596527, "learning_rate": 3.4260464725331343e-06, "loss": 0.0006, "step": 3874 }, { "epoch": 7.37, "grad_norm": 0.4266086220741272, "learning_rate": 3.4214077071283057e-06, "loss": 0.0014, "step": 3875 }, { "epoch": 7.37, "grad_norm": 0.2751263976097107, "learning_rate": 3.4167714360413227e-06, "loss": 0.0013, "step": 3876 }, { "epoch": 7.37, "grad_norm": 0.2782883644104004, "learning_rate": 3.4121376610300684e-06, "loss": 0.0018, "step": 3877 }, { "epoch": 7.37, "grad_norm": 1.2560688257217407, "learning_rate": 3.4075063838514675e-06, "loss": 0.0089, "step": 3878 }, { "epoch": 7.37, "grad_norm": 0.7259113788604736, "learning_rate": 3.402877606261499e-06, "loss": 0.0057, "step": 3879 }, { "epoch": 7.38, "grad_norm": 0.9833004474639893, "learning_rate": 3.3982513300151966e-06, "loss": 0.0035, "step": 3880 }, { "epoch": 7.38, "grad_norm": 0.9863185286521912, "learning_rate": 3.393627556866643e-06, "loss": 0.0072, "step": 3881 }, { "epoch": 7.38, "grad_norm": 0.5002197027206421, "learning_rate": 3.3890062885689745e-06, "loss": 0.0138, "step": 3882 }, { "epoch": 7.38, "grad_norm": 0.3025893270969391, "learning_rate": 3.384387526874371e-06, "loss": 0.0016, "step": 3883 }, { "epoch": 7.38, "grad_norm": 0.6948472261428833, "learning_rate": 3.3797712735340794e-06, "loss": 0.0049, "step": 3884 }, { "epoch": 7.39, "grad_norm": 1.2398477792739868, "learning_rate": 3.3751575302983697e-06, "loss": 0.0043, "step": 3885 }, { "epoch": 7.39, "grad_norm": 0.475103497505188, "learning_rate": 3.3705462989165826e-06, "loss": 0.0035, "step": 3886 }, { "epoch": 7.39, "grad_norm": 0.21911104023456573, "learning_rate": 3.3659375811370953e-06, "loss": 0.0017, "step": 3887 }, { "epoch": 7.39, "grad_norm": 0.4593381881713867, "learning_rate": 3.3613313787073344e-06, "loss": 0.0019, "step": 3888 }, { "epoch": 7.39, "grad_norm": 0.4024711847305298, "learning_rate": 3.3567276933737736e-06, "loss": 0.0033, "step": 3889 }, { "epoch": 7.4, "grad_norm": 0.4998832046985626, "learning_rate": 3.3521265268819294e-06, "loss": 0.0012, "step": 3890 }, { "epoch": 7.4, "grad_norm": 0.26058462262153625, "learning_rate": 3.347527880976367e-06, "loss": 0.0008, "step": 3891 }, { "epoch": 7.4, "grad_norm": 0.5355288982391357, "learning_rate": 3.342931757400689e-06, "loss": 0.003, "step": 3892 }, { "epoch": 7.4, "grad_norm": 0.43996500968933105, "learning_rate": 3.3383381578975594e-06, "loss": 0.0014, "step": 3893 }, { "epoch": 7.4, "grad_norm": 0.19044309854507446, "learning_rate": 3.3337470842086573e-06, "loss": 0.0008, "step": 3894 }, { "epoch": 7.4, "grad_norm": 0.05510642006993294, "learning_rate": 3.3291585380747295e-06, "loss": 0.0003, "step": 3895 }, { "epoch": 7.41, "grad_norm": 0.5208409428596497, "learning_rate": 3.324572521235552e-06, "loss": 0.0023, "step": 3896 }, { "epoch": 7.41, "grad_norm": 0.5731787085533142, "learning_rate": 3.3199890354299435e-06, "loss": 0.0036, "step": 3897 }, { "epoch": 7.41, "grad_norm": 0.2640872001647949, "learning_rate": 3.3154080823957634e-06, "loss": 0.0013, "step": 3898 }, { "epoch": 7.41, "grad_norm": 0.11855581402778625, "learning_rate": 3.3108296638699124e-06, "loss": 0.0012, "step": 3899 }, { "epoch": 7.41, "grad_norm": 0.2447614073753357, "learning_rate": 3.3062537815883266e-06, "loss": 0.0014, "step": 3900 }, { "epoch": 7.41, "eval_blimp_filtered_avg": 0.7374626865671642, "eval_blimp_filtered_std": 0.0048369086744878475, "step": 3900 }, { "epoch": 7.41, "eval_blimp_supplement_avg": 0.7931034482758621, "eval_blimp_supplement_std": 0.01768895435017442, "step": 3900 }, { "epoch": 7.41, "eval_vqa_filtered_avg": 0.35, "eval_vqa_filtered_std": 0.04793724854411019, "step": 3900 }, { "epoch": 7.41, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 3900 }, { "epoch": 7.42, "grad_norm": 1.9334588050842285, "learning_rate": 3.3016804372859856e-06, "loss": 0.0056, "step": 3901 }, { "epoch": 7.42, "grad_norm": 0.2524077892303467, "learning_rate": 3.2971096326969022e-06, "loss": 0.0016, "step": 3902 }, { "epoch": 7.42, "grad_norm": 0.2723086476325989, "learning_rate": 3.292541369554124e-06, "loss": 0.0016, "step": 3903 }, { "epoch": 7.42, "grad_norm": 0.5206770896911621, "learning_rate": 3.2879756495897507e-06, "loss": 0.0022, "step": 3904 }, { "epoch": 7.42, "grad_norm": 0.994209885597229, "learning_rate": 3.2834124745348927e-06, "loss": 0.0106, "step": 3905 }, { "epoch": 7.43, "grad_norm": 0.38638001680374146, "learning_rate": 3.278851846119716e-06, "loss": 0.0015, "step": 3906 }, { "epoch": 7.43, "grad_norm": 0.2991493344306946, "learning_rate": 3.274293766073413e-06, "loss": 0.0015, "step": 3907 }, { "epoch": 7.43, "grad_norm": 0.4385310411453247, "learning_rate": 3.26973823612421e-06, "loss": 0.0043, "step": 3908 }, { "epoch": 7.43, "grad_norm": 0.08908262848854065, "learning_rate": 3.265185257999367e-06, "loss": 0.0003, "step": 3909 }, { "epoch": 7.43, "grad_norm": 0.47026413679122925, "learning_rate": 3.260634833425176e-06, "loss": 0.0025, "step": 3910 }, { "epoch": 7.44, "grad_norm": 0.2251061052083969, "learning_rate": 3.256086964126962e-06, "loss": 0.0008, "step": 3911 }, { "epoch": 7.44, "grad_norm": 0.3550705909729004, "learning_rate": 3.2515416518290777e-06, "loss": 0.0024, "step": 3912 }, { "epoch": 7.44, "grad_norm": 0.2798490524291992, "learning_rate": 3.246998898254917e-06, "loss": 0.002, "step": 3913 }, { "epoch": 7.44, "grad_norm": 0.08528285473585129, "learning_rate": 3.242458705126884e-06, "loss": 0.0006, "step": 3914 }, { "epoch": 7.44, "grad_norm": 0.6260603070259094, "learning_rate": 3.2379210741664335e-06, "loss": 0.003, "step": 3915 }, { "epoch": 7.44, "grad_norm": 0.7862895727157593, "learning_rate": 3.233386007094036e-06, "loss": 0.0044, "step": 3916 }, { "epoch": 7.45, "grad_norm": 0.16341756284236908, "learning_rate": 3.2288535056291925e-06, "loss": 0.0006, "step": 3917 }, { "epoch": 7.45, "grad_norm": 0.06478568911552429, "learning_rate": 3.2243235714904277e-06, "loss": 0.0003, "step": 3918 }, { "epoch": 7.45, "grad_norm": 0.4343390464782715, "learning_rate": 3.219796206395307e-06, "loss": 0.0024, "step": 3919 }, { "epoch": 7.45, "grad_norm": 0.5670903325080872, "learning_rate": 3.215271412060399e-06, "loss": 0.0045, "step": 3920 }, { "epoch": 7.45, "grad_norm": 0.308698445558548, "learning_rate": 3.2107491902013198e-06, "loss": 0.0027, "step": 3921 }, { "epoch": 7.46, "grad_norm": 0.4725692570209503, "learning_rate": 3.2062295425326974e-06, "loss": 0.0018, "step": 3922 }, { "epoch": 7.46, "grad_norm": 0.23751655220985413, "learning_rate": 3.2017124707681855e-06, "loss": 0.0021, "step": 3923 }, { "epoch": 7.46, "grad_norm": 0.5743544101715088, "learning_rate": 3.1971979766204685e-06, "loss": 0.0024, "step": 3924 }, { "epoch": 7.46, "grad_norm": 0.6897931694984436, "learning_rate": 3.1926860618012344e-06, "loss": 0.0031, "step": 3925 }, { "epoch": 7.46, "grad_norm": 0.2291543036699295, "learning_rate": 3.1881767280212183e-06, "loss": 0.0009, "step": 3926 }, { "epoch": 7.47, "grad_norm": 0.22436030209064484, "learning_rate": 3.1836699769901604e-06, "loss": 0.0026, "step": 3927 }, { "epoch": 7.47, "grad_norm": 0.2869182229042053, "learning_rate": 3.1791658104168264e-06, "loss": 0.0024, "step": 3928 }, { "epoch": 7.47, "grad_norm": 0.2834843099117279, "learning_rate": 3.174664230008998e-06, "loss": 0.0016, "step": 3929 }, { "epoch": 7.47, "grad_norm": 0.45349767804145813, "learning_rate": 3.1701652374734903e-06, "loss": 0.0034, "step": 3930 }, { "epoch": 7.47, "grad_norm": 0.07215266674757004, "learning_rate": 3.1656688345161123e-06, "loss": 0.0005, "step": 3931 }, { "epoch": 7.48, "grad_norm": 0.3988465368747711, "learning_rate": 3.161175022841717e-06, "loss": 0.0013, "step": 3932 }, { "epoch": 7.48, "grad_norm": 0.2249491959810257, "learning_rate": 3.1566838041541603e-06, "loss": 0.0011, "step": 3933 }, { "epoch": 7.48, "grad_norm": 0.547530472278595, "learning_rate": 3.1521951801563165e-06, "loss": 0.0027, "step": 3934 }, { "epoch": 7.48, "grad_norm": 0.06053578853607178, "learning_rate": 3.1477091525500803e-06, "loss": 0.0002, "step": 3935 }, { "epoch": 7.48, "grad_norm": 0.418312132358551, "learning_rate": 3.1432257230363573e-06, "loss": 0.0025, "step": 3936 }, { "epoch": 7.48, "grad_norm": 0.30170193314552307, "learning_rate": 3.13874489331507e-06, "loss": 0.0026, "step": 3937 }, { "epoch": 7.49, "grad_norm": 0.9731246829032898, "learning_rate": 3.134266665085154e-06, "loss": 0.0051, "step": 3938 }, { "epoch": 7.49, "grad_norm": 0.44817838072776794, "learning_rate": 3.1297910400445685e-06, "loss": 0.0039, "step": 3939 }, { "epoch": 7.49, "grad_norm": 0.05976518988609314, "learning_rate": 3.1253180198902657e-06, "loss": 0.0002, "step": 3940 }, { "epoch": 7.49, "grad_norm": 0.11982841789722443, "learning_rate": 3.120847606318228e-06, "loss": 0.0007, "step": 3941 }, { "epoch": 7.49, "grad_norm": 0.8763157725334167, "learning_rate": 3.1163798010234423e-06, "loss": 0.0051, "step": 3942 }, { "epoch": 7.5, "grad_norm": 0.497172474861145, "learning_rate": 3.111914605699906e-06, "loss": 0.0008, "step": 3943 }, { "epoch": 7.5, "grad_norm": 0.15075087547302246, "learning_rate": 3.10745202204063e-06, "loss": 0.0007, "step": 3944 }, { "epoch": 7.5, "grad_norm": 0.9520591497421265, "learning_rate": 3.1029920517376313e-06, "loss": 0.0026, "step": 3945 }, { "epoch": 7.5, "grad_norm": 0.9672303199768066, "learning_rate": 3.0985346964819373e-06, "loss": 0.0063, "step": 3946 }, { "epoch": 7.5, "grad_norm": 0.13432425260543823, "learning_rate": 3.094079957963584e-06, "loss": 0.0009, "step": 3947 }, { "epoch": 7.51, "grad_norm": 0.37981128692626953, "learning_rate": 3.089627837871623e-06, "loss": 0.0011, "step": 3948 }, { "epoch": 7.51, "grad_norm": 0.6212835311889648, "learning_rate": 3.0851783378940935e-06, "loss": 0.0027, "step": 3949 }, { "epoch": 7.51, "grad_norm": 0.5359674096107483, "learning_rate": 3.080731459718067e-06, "loss": 0.0069, "step": 3950 }, { "epoch": 7.51, "grad_norm": 0.40804508328437805, "learning_rate": 3.0762872050295934e-06, "loss": 0.0022, "step": 3951 }, { "epoch": 7.51, "grad_norm": 0.20288656651973724, "learning_rate": 3.0718455755137532e-06, "loss": 0.0009, "step": 3952 }, { "epoch": 7.52, "grad_norm": 0.7581473588943481, "learning_rate": 3.0674065728546166e-06, "loss": 0.0049, "step": 3953 }, { "epoch": 7.52, "grad_norm": 0.5492029190063477, "learning_rate": 3.06297019873526e-06, "loss": 0.0011, "step": 3954 }, { "epoch": 7.52, "grad_norm": 0.32667896151542664, "learning_rate": 3.058536454837767e-06, "loss": 0.0013, "step": 3955 }, { "epoch": 7.52, "grad_norm": 0.5153895020484924, "learning_rate": 3.054105342843221e-06, "loss": 0.0022, "step": 3956 }, { "epoch": 7.52, "grad_norm": 0.1858775019645691, "learning_rate": 3.0496768644317074e-06, "loss": 0.0013, "step": 3957 }, { "epoch": 7.52, "grad_norm": 0.35498276352882385, "learning_rate": 3.0452510212823106e-06, "loss": 0.002, "step": 3958 }, { "epoch": 7.53, "grad_norm": 0.4939782917499542, "learning_rate": 3.04082781507313e-06, "loss": 0.0023, "step": 3959 }, { "epoch": 7.53, "grad_norm": 0.45044437050819397, "learning_rate": 3.03640724748124e-06, "loss": 0.0017, "step": 3960 }, { "epoch": 7.53, "grad_norm": 0.26369449496269226, "learning_rate": 3.031989320182739e-06, "loss": 0.0031, "step": 3961 }, { "epoch": 7.53, "grad_norm": 1.0750012397766113, "learning_rate": 3.0275740348527094e-06, "loss": 0.0046, "step": 3962 }, { "epoch": 7.53, "grad_norm": 0.37619954347610474, "learning_rate": 3.0231613931652393e-06, "loss": 0.0021, "step": 3963 }, { "epoch": 7.54, "grad_norm": 0.3091850280761719, "learning_rate": 3.018751396793407e-06, "loss": 0.0015, "step": 3964 }, { "epoch": 7.54, "grad_norm": 0.21500249207019806, "learning_rate": 3.014344047409301e-06, "loss": 0.0006, "step": 3965 }, { "epoch": 7.54, "grad_norm": 1.28679358959198, "learning_rate": 3.0099393466839867e-06, "loss": 0.0034, "step": 3966 }, { "epoch": 7.54, "grad_norm": 0.06469350308179855, "learning_rate": 3.005537296287546e-06, "loss": 0.0004, "step": 3967 }, { "epoch": 7.54, "grad_norm": 0.3348994553089142, "learning_rate": 3.0011378978890416e-06, "loss": 0.0022, "step": 3968 }, { "epoch": 7.55, "grad_norm": 0.3477429449558258, "learning_rate": 2.996741153156535e-06, "loss": 0.0014, "step": 3969 }, { "epoch": 7.55, "grad_norm": 0.32320454716682434, "learning_rate": 2.992347063757083e-06, "loss": 0.0019, "step": 3970 }, { "epoch": 7.55, "grad_norm": 0.20705653727054596, "learning_rate": 2.9879556313567335e-06, "loss": 0.0013, "step": 3971 }, { "epoch": 7.55, "grad_norm": 0.5496218204498291, "learning_rate": 2.983566857620529e-06, "loss": 0.0049, "step": 3972 }, { "epoch": 7.55, "grad_norm": 1.3307163715362549, "learning_rate": 2.979180744212502e-06, "loss": 0.0016, "step": 3973 }, { "epoch": 7.56, "grad_norm": 0.6643781065940857, "learning_rate": 2.974797292795677e-06, "loss": 0.0015, "step": 3974 }, { "epoch": 7.56, "grad_norm": 0.09629146754741669, "learning_rate": 2.9704165050320653e-06, "loss": 0.0004, "step": 3975 }, { "epoch": 7.56, "grad_norm": 0.5599880218505859, "learning_rate": 2.9660383825826843e-06, "loss": 0.0031, "step": 3976 }, { "epoch": 7.56, "grad_norm": 0.9009089469909668, "learning_rate": 2.9616629271075137e-06, "loss": 0.0057, "step": 3977 }, { "epoch": 7.56, "grad_norm": 0.17508603632450104, "learning_rate": 2.9572901402655485e-06, "loss": 0.0008, "step": 3978 }, { "epoch": 7.56, "grad_norm": 0.3674411177635193, "learning_rate": 2.952920023714755e-06, "loss": 0.0035, "step": 3979 }, { "epoch": 7.57, "grad_norm": 0.7274474501609802, "learning_rate": 2.948552579112095e-06, "loss": 0.01, "step": 3980 }, { "epoch": 7.57, "grad_norm": 0.24266761541366577, "learning_rate": 2.944187808113512e-06, "loss": 0.0006, "step": 3981 }, { "epoch": 7.57, "grad_norm": 0.345733642578125, "learning_rate": 2.9398257123739416e-06, "loss": 0.0027, "step": 3982 }, { "epoch": 7.57, "grad_norm": 0.47283726930618286, "learning_rate": 2.9354662935473e-06, "loss": 0.0035, "step": 3983 }, { "epoch": 7.57, "grad_norm": 0.7087345123291016, "learning_rate": 2.9311095532864873e-06, "loss": 0.002, "step": 3984 }, { "epoch": 7.58, "grad_norm": 0.5489019155502319, "learning_rate": 2.9267554932434007e-06, "loss": 0.0069, "step": 3985 }, { "epoch": 7.58, "grad_norm": 0.2662195861339569, "learning_rate": 2.9224041150688997e-06, "loss": 0.0009, "step": 3986 }, { "epoch": 7.58, "grad_norm": 0.044561635702848434, "learning_rate": 2.9180554204128474e-06, "loss": 0.0003, "step": 3987 }, { "epoch": 7.58, "grad_norm": 0.5547471642494202, "learning_rate": 2.9137094109240784e-06, "loss": 0.0012, "step": 3988 }, { "epoch": 7.58, "grad_norm": 0.2696287930011749, "learning_rate": 2.9093660882504103e-06, "loss": 0.003, "step": 3989 }, { "epoch": 7.59, "grad_norm": 0.18932563066482544, "learning_rate": 2.905025454038646e-06, "loss": 0.0017, "step": 3990 }, { "epoch": 7.59, "grad_norm": 0.6110268235206604, "learning_rate": 2.900687509934563e-06, "loss": 0.0029, "step": 3991 }, { "epoch": 7.59, "grad_norm": 0.6080849170684814, "learning_rate": 2.896352257582925e-06, "loss": 0.0019, "step": 3992 }, { "epoch": 7.59, "grad_norm": 0.3111313283443451, "learning_rate": 2.8920196986274673e-06, "loss": 0.0014, "step": 3993 }, { "epoch": 7.59, "grad_norm": 0.12845245003700256, "learning_rate": 2.8876898347109195e-06, "loss": 0.0006, "step": 3994 }, { "epoch": 7.6, "grad_norm": 0.4846702218055725, "learning_rate": 2.883362667474967e-06, "loss": 0.0014, "step": 3995 }, { "epoch": 7.6, "grad_norm": 0.10444130748510361, "learning_rate": 2.879038198560292e-06, "loss": 0.0006, "step": 3996 }, { "epoch": 7.6, "grad_norm": 0.8815541863441467, "learning_rate": 2.8747164296065433e-06, "loss": 0.0039, "step": 3997 }, { "epoch": 7.6, "grad_norm": 0.5004289150238037, "learning_rate": 2.8703973622523506e-06, "loss": 0.0014, "step": 3998 }, { "epoch": 7.6, "grad_norm": 0.3054686486721039, "learning_rate": 2.8660809981353168e-06, "loss": 0.0029, "step": 3999 }, { "epoch": 7.6, "grad_norm": 0.346136212348938, "learning_rate": 2.86176733889202e-06, "loss": 0.0013, "step": 4000 }, { "epoch": 7.6, "eval_blimp_filtered_avg": 0.7365671641791045, "eval_blimp_filtered_std": 0.00484062890898896, "step": 4000 }, { "epoch": 7.6, "eval_blimp_supplement_avg": 0.7995689655172413, "eval_blimp_supplement_std": 0.017495116139186157, "step": 4000 }, { "epoch": 7.6, "eval_vqa_filtered_avg": 0.34, "eval_vqa_filtered_std": 0.04760952285695235, "step": 4000 }, { "epoch": 7.6, "eval_winoground_filtered_avg": 0.51, "eval_winoground_filtered_std": 0.05024183937956912, "step": 4000 }, { "epoch": 7.61, "grad_norm": 0.5686588883399963, "learning_rate": 2.857456386158014e-06, "loss": 0.0027, "step": 4001 }, { "epoch": 7.61, "grad_norm": 0.5929925441741943, "learning_rate": 2.853148141567824e-06, "loss": 0.0017, "step": 4002 }, { "epoch": 7.61, "grad_norm": 0.31308433413505554, "learning_rate": 2.8488426067549536e-06, "loss": 0.0012, "step": 4003 }, { "epoch": 7.61, "grad_norm": 0.28078702092170715, "learning_rate": 2.8445397833518694e-06, "loss": 0.0016, "step": 4004 }, { "epoch": 7.61, "grad_norm": 0.5006966590881348, "learning_rate": 2.840239672990026e-06, "loss": 0.0041, "step": 4005 }, { "epoch": 7.62, "grad_norm": 1.2470113039016724, "learning_rate": 2.835942277299828e-06, "loss": 0.0029, "step": 4006 }, { "epoch": 7.62, "grad_norm": 0.1130838617682457, "learning_rate": 2.8316475979106704e-06, "loss": 0.0006, "step": 4007 }, { "epoch": 7.62, "grad_norm": 0.5532752275466919, "learning_rate": 2.827355636450908e-06, "loss": 0.0012, "step": 4008 }, { "epoch": 7.62, "grad_norm": 0.22204698622226715, "learning_rate": 2.8230663945478664e-06, "loss": 0.001, "step": 4009 }, { "epoch": 7.62, "grad_norm": 0.41221383213996887, "learning_rate": 2.818779873827836e-06, "loss": 0.0012, "step": 4010 }, { "epoch": 7.63, "grad_norm": 0.5200076103210449, "learning_rate": 2.814496075916092e-06, "loss": 0.0053, "step": 4011 }, { "epoch": 7.63, "grad_norm": 0.29228073358535767, "learning_rate": 2.8102150024368524e-06, "loss": 0.0015, "step": 4012 }, { "epoch": 7.63, "grad_norm": 0.8863300681114197, "learning_rate": 2.8059366550133236e-06, "loss": 0.0036, "step": 4013 }, { "epoch": 7.63, "grad_norm": 0.5055689811706543, "learning_rate": 2.8016610352676675e-06, "loss": 0.0009, "step": 4014 }, { "epoch": 7.63, "grad_norm": 0.12251166999340057, "learning_rate": 2.797388144821015e-06, "loss": 0.0003, "step": 4015 }, { "epoch": 7.63, "grad_norm": 0.46353843808174133, "learning_rate": 2.79311798529346e-06, "loss": 0.0017, "step": 4016 }, { "epoch": 7.64, "grad_norm": 0.32137519121170044, "learning_rate": 2.7888505583040638e-06, "loss": 0.0027, "step": 4017 }, { "epoch": 7.64, "grad_norm": 0.6004387140274048, "learning_rate": 2.784585865470849e-06, "loss": 0.002, "step": 4018 }, { "epoch": 7.64, "grad_norm": 0.06157945841550827, "learning_rate": 2.7803239084108e-06, "loss": 0.0004, "step": 4019 }, { "epoch": 7.64, "grad_norm": 0.3596235513687134, "learning_rate": 2.776064688739878e-06, "loss": 0.0018, "step": 4020 }, { "epoch": 7.64, "grad_norm": 0.35733938217163086, "learning_rate": 2.771808208072979e-06, "loss": 0.0017, "step": 4021 }, { "epoch": 7.65, "grad_norm": 0.6593154668807983, "learning_rate": 2.7675544680239918e-06, "loss": 0.0027, "step": 4022 }, { "epoch": 7.65, "grad_norm": 0.3002084195613861, "learning_rate": 2.7633034702057383e-06, "loss": 0.001, "step": 4023 }, { "epoch": 7.65, "grad_norm": 0.10096346586942673, "learning_rate": 2.75905521623002e-06, "loss": 0.0007, "step": 4024 }, { "epoch": 7.65, "grad_norm": 0.3813326954841614, "learning_rate": 2.754809707707591e-06, "loss": 0.001, "step": 4025 }, { "epoch": 7.65, "grad_norm": 0.4346475899219513, "learning_rate": 2.750566946248162e-06, "loss": 0.0013, "step": 4026 }, { "epoch": 7.66, "grad_norm": 0.16106610000133514, "learning_rate": 2.746326933460406e-06, "loss": 0.0015, "step": 4027 }, { "epoch": 7.66, "grad_norm": 0.45590969920158386, "learning_rate": 2.742089670951954e-06, "loss": 0.0013, "step": 4028 }, { "epoch": 7.66, "grad_norm": 0.7928942441940308, "learning_rate": 2.73785516032939e-06, "loss": 0.0039, "step": 4029 }, { "epoch": 7.66, "grad_norm": 0.4989584684371948, "learning_rate": 2.733623403198257e-06, "loss": 0.0015, "step": 4030 }, { "epoch": 7.66, "grad_norm": 0.2273137867450714, "learning_rate": 2.7293944011630615e-06, "loss": 0.0013, "step": 4031 }, { "epoch": 7.67, "grad_norm": 0.3236488997936249, "learning_rate": 2.725168155827246e-06, "loss": 0.0024, "step": 4032 }, { "epoch": 7.67, "grad_norm": 0.35328999161720276, "learning_rate": 2.720944668793232e-06, "loss": 0.0013, "step": 4033 }, { "epoch": 7.67, "grad_norm": 1.2991479635238647, "learning_rate": 2.716723941662377e-06, "loss": 0.0025, "step": 4034 }, { "epoch": 7.67, "grad_norm": 0.4394667446613312, "learning_rate": 2.7125059760349993e-06, "loss": 0.002, "step": 4035 }, { "epoch": 7.67, "grad_norm": 0.7961496710777283, "learning_rate": 2.7082907735103702e-06, "loss": 0.0052, "step": 4036 }, { "epoch": 7.67, "grad_norm": 0.7116462588310242, "learning_rate": 2.7040783356867107e-06, "loss": 0.0017, "step": 4037 }, { "epoch": 7.68, "grad_norm": 0.17364901304244995, "learning_rate": 2.699868664161197e-06, "loss": 0.002, "step": 4038 }, { "epoch": 7.68, "grad_norm": 1.312577247619629, "learning_rate": 2.6956617605299505e-06, "loss": 0.0033, "step": 4039 }, { "epoch": 7.68, "grad_norm": 0.12170781195163727, "learning_rate": 2.6914576263880552e-06, "loss": 0.0006, "step": 4040 }, { "epoch": 7.68, "grad_norm": 0.14591512084007263, "learning_rate": 2.6872562633295275e-06, "loss": 0.0004, "step": 4041 }, { "epoch": 7.68, "grad_norm": 0.17642514407634735, "learning_rate": 2.683057672947349e-06, "loss": 0.0006, "step": 4042 }, { "epoch": 7.69, "grad_norm": 0.4838641881942749, "learning_rate": 2.6788618568334436e-06, "loss": 0.0064, "step": 4043 }, { "epoch": 7.69, "grad_norm": 0.4791133999824524, "learning_rate": 2.6746688165786806e-06, "loss": 0.002, "step": 4044 }, { "epoch": 7.69, "grad_norm": 0.4452683925628662, "learning_rate": 2.6704785537728783e-06, "loss": 0.0022, "step": 4045 }, { "epoch": 7.69, "grad_norm": 0.4623725414276123, "learning_rate": 2.6662910700048118e-06, "loss": 0.0017, "step": 4046 }, { "epoch": 7.69, "grad_norm": 0.702912449836731, "learning_rate": 2.6621063668621837e-06, "loss": 0.0042, "step": 4047 }, { "epoch": 7.7, "grad_norm": 0.7186940908432007, "learning_rate": 2.657924445931659e-06, "loss": 0.0033, "step": 4048 }, { "epoch": 7.7, "grad_norm": 0.11231043189764023, "learning_rate": 2.6537453087988373e-06, "loss": 0.0006, "step": 4049 }, { "epoch": 7.7, "grad_norm": 0.218244731426239, "learning_rate": 2.6495689570482664e-06, "loss": 0.0005, "step": 4050 }, { "epoch": 7.7, "grad_norm": 0.17099882662296295, "learning_rate": 2.6453953922634466e-06, "loss": 0.0008, "step": 4051 }, { "epoch": 7.7, "grad_norm": 0.09805145859718323, "learning_rate": 2.641224616026802e-06, "loss": 0.0005, "step": 4052 }, { "epoch": 7.71, "grad_norm": 0.22413946688175201, "learning_rate": 2.6370566299197175e-06, "loss": 0.0008, "step": 4053 }, { "epoch": 7.71, "grad_norm": 0.1892581284046173, "learning_rate": 2.6328914355225132e-06, "loss": 0.0014, "step": 4054 }, { "epoch": 7.71, "grad_norm": 1.0411722660064697, "learning_rate": 2.6287290344144477e-06, "loss": 0.0057, "step": 4055 }, { "epoch": 7.71, "grad_norm": 0.14195391535758972, "learning_rate": 2.6245694281737234e-06, "loss": 0.001, "step": 4056 }, { "epoch": 7.71, "grad_norm": 0.47662901878356934, "learning_rate": 2.6204126183774914e-06, "loss": 0.002, "step": 4057 }, { "epoch": 7.71, "grad_norm": 0.17065240442752838, "learning_rate": 2.6162586066018214e-06, "loss": 0.0004, "step": 4058 }, { "epoch": 7.72, "grad_norm": 0.41789510846138, "learning_rate": 2.6121073944217435e-06, "loss": 0.0017, "step": 4059 }, { "epoch": 7.72, "grad_norm": 0.31016674637794495, "learning_rate": 2.6079589834112174e-06, "loss": 0.001, "step": 4060 }, { "epoch": 7.72, "grad_norm": 0.4301970899105072, "learning_rate": 2.6038133751431395e-06, "loss": 0.0024, "step": 4061 }, { "epoch": 7.72, "grad_norm": 0.23627401888370514, "learning_rate": 2.5996705711893457e-06, "loss": 0.003, "step": 4062 }, { "epoch": 7.72, "grad_norm": 0.5571985840797424, "learning_rate": 2.595530573120608e-06, "loss": 0.0011, "step": 4063 }, { "epoch": 7.73, "grad_norm": 0.12601293623447418, "learning_rate": 2.591393382506635e-06, "loss": 0.0009, "step": 4064 }, { "epoch": 7.73, "grad_norm": 0.16052044928073883, "learning_rate": 2.587259000916068e-06, "loss": 0.001, "step": 4065 }, { "epoch": 7.73, "grad_norm": 1.0191794633865356, "learning_rate": 2.583127429916493e-06, "loss": 0.0057, "step": 4066 }, { "epoch": 7.73, "grad_norm": 0.4067133069038391, "learning_rate": 2.578998671074414e-06, "loss": 0.0021, "step": 4067 }, { "epoch": 7.73, "grad_norm": 0.16658125817775726, "learning_rate": 2.5748727259552844e-06, "loss": 0.0007, "step": 4068 }, { "epoch": 7.74, "grad_norm": 0.4874809980392456, "learning_rate": 2.570749596123482e-06, "loss": 0.0046, "step": 4069 }, { "epoch": 7.74, "grad_norm": 0.16767781972885132, "learning_rate": 2.56662928314232e-06, "loss": 0.0009, "step": 4070 }, { "epoch": 7.74, "grad_norm": 0.3505946397781372, "learning_rate": 2.5625117885740423e-06, "loss": 0.0019, "step": 4071 }, { "epoch": 7.74, "grad_norm": 0.2238459289073944, "learning_rate": 2.5583971139798248e-06, "loss": 0.001, "step": 4072 }, { "epoch": 7.74, "grad_norm": 0.6623311638832092, "learning_rate": 2.5542852609197754e-06, "loss": 0.0035, "step": 4073 }, { "epoch": 7.75, "grad_norm": 0.38083600997924805, "learning_rate": 2.55017623095293e-06, "loss": 0.0019, "step": 4074 }, { "epoch": 7.75, "grad_norm": 0.8664324879646301, "learning_rate": 2.546070025637255e-06, "loss": 0.0059, "step": 4075 }, { "epoch": 7.75, "grad_norm": 0.7108994722366333, "learning_rate": 2.5419666465296432e-06, "loss": 0.0019, "step": 4076 }, { "epoch": 7.75, "grad_norm": 0.28856128454208374, "learning_rate": 2.537866095185929e-06, "loss": 0.0011, "step": 4077 }, { "epoch": 7.75, "grad_norm": 1.1790071725845337, "learning_rate": 2.5337683731608496e-06, "loss": 0.0009, "step": 4078 }, { "epoch": 7.75, "grad_norm": 0.4973403811454773, "learning_rate": 2.5296734820080958e-06, "loss": 0.0012, "step": 4079 }, { "epoch": 7.76, "grad_norm": 0.7464736104011536, "learning_rate": 2.5255814232802676e-06, "loss": 0.0027, "step": 4080 }, { "epoch": 7.76, "grad_norm": 0.25079235434532166, "learning_rate": 2.5214921985289e-06, "loss": 0.0012, "step": 4081 }, { "epoch": 7.76, "grad_norm": 0.7823596000671387, "learning_rate": 2.517405809304446e-06, "loss": 0.0015, "step": 4082 }, { "epoch": 7.76, "grad_norm": 0.14265064895153046, "learning_rate": 2.5133222571562923e-06, "loss": 0.0013, "step": 4083 }, { "epoch": 7.76, "grad_norm": 0.13686539232730865, "learning_rate": 2.5092415436327434e-06, "loss": 0.0008, "step": 4084 }, { "epoch": 7.77, "grad_norm": 0.6497392654418945, "learning_rate": 2.5051636702810255e-06, "loss": 0.0033, "step": 4085 }, { "epoch": 7.77, "grad_norm": 0.2821301221847534, "learning_rate": 2.5010886386473023e-06, "loss": 0.0014, "step": 4086 }, { "epoch": 7.77, "grad_norm": 0.5087966918945312, "learning_rate": 2.497016450276637e-06, "loss": 0.0025, "step": 4087 }, { "epoch": 7.77, "grad_norm": 0.038824938237667084, "learning_rate": 2.492947106713036e-06, "loss": 0.0002, "step": 4088 }, { "epoch": 7.77, "grad_norm": 0.1108316108584404, "learning_rate": 2.488880609499417e-06, "loss": 0.0008, "step": 4089 }, { "epoch": 7.78, "grad_norm": 0.44530320167541504, "learning_rate": 2.4848169601776186e-06, "loss": 0.002, "step": 4090 }, { "epoch": 7.78, "grad_norm": 0.2153443694114685, "learning_rate": 2.4807561602884023e-06, "loss": 0.0011, "step": 4091 }, { "epoch": 7.78, "grad_norm": 0.26869043707847595, "learning_rate": 2.4766982113714455e-06, "loss": 0.0021, "step": 4092 }, { "epoch": 7.78, "grad_norm": 0.433095246553421, "learning_rate": 2.4726431149653496e-06, "loss": 0.0019, "step": 4093 }, { "epoch": 7.78, "grad_norm": 1.0182493925094604, "learning_rate": 2.468590872607628e-06, "loss": 0.0021, "step": 4094 }, { "epoch": 7.79, "grad_norm": 0.10765278339385986, "learning_rate": 2.464541485834725e-06, "loss": 0.0005, "step": 4095 }, { "epoch": 7.79, "grad_norm": 0.12431513518095016, "learning_rate": 2.4604949561819803e-06, "loss": 0.0007, "step": 4096 }, { "epoch": 7.79, "grad_norm": 0.40708062052726746, "learning_rate": 2.456451285183675e-06, "loss": 0.0008, "step": 4097 }, { "epoch": 7.79, "grad_norm": 0.9769524931907654, "learning_rate": 2.452410474372984e-06, "loss": 0.0034, "step": 4098 }, { "epoch": 7.79, "grad_norm": 0.0798206627368927, "learning_rate": 2.4483725252820157e-06, "loss": 0.0004, "step": 4099 }, { "epoch": 7.79, "grad_norm": 0.5334871411323547, "learning_rate": 2.4443374394417827e-06, "loss": 0.0097, "step": 4100 }, { "epoch": 7.79, "eval_blimp_filtered_avg": 0.7374626865671642, "eval_blimp_filtered_std": 0.004841000775197215, "step": 4100 }, { "epoch": 7.79, "eval_blimp_supplement_avg": 0.7974137931034483, "eval_blimp_supplement_std": 0.017640361767009737, "step": 4100 }, { "epoch": 7.79, "eval_vqa_filtered_avg": 0.39, "eval_vqa_filtered_std": 0.04902071300001975, "step": 4100 }, { "epoch": 7.79, "eval_winoground_filtered_avg": 0.51, "eval_winoground_filtered_std": 0.05024183937956912, "step": 4100 }, { "epoch": 7.8, "grad_norm": 0.23645304143428802, "learning_rate": 2.4403052183822162e-06, "loss": 0.0021, "step": 4101 }, { "epoch": 7.8, "grad_norm": 0.2265121191740036, "learning_rate": 2.436275863632156e-06, "loss": 0.0019, "step": 4102 }, { "epoch": 7.8, "grad_norm": 0.1549706757068634, "learning_rate": 2.4322493767193687e-06, "loss": 0.0004, "step": 4103 }, { "epoch": 7.8, "grad_norm": 0.27405962347984314, "learning_rate": 2.4282257591705127e-06, "loss": 0.0014, "step": 4104 }, { "epoch": 7.8, "grad_norm": 0.5812873244285583, "learning_rate": 2.4242050125111784e-06, "loss": 0.0042, "step": 4105 }, { "epoch": 7.81, "grad_norm": 0.15776444971561432, "learning_rate": 2.420187138265856e-06, "loss": 0.0008, "step": 4106 }, { "epoch": 7.81, "grad_norm": 0.11223272979259491, "learning_rate": 2.4161721379579497e-06, "loss": 0.0007, "step": 4107 }, { "epoch": 7.81, "grad_norm": 0.11082633584737778, "learning_rate": 2.4121600131097734e-06, "loss": 0.0004, "step": 4108 }, { "epoch": 7.81, "grad_norm": 0.45469242334365845, "learning_rate": 2.408150765242554e-06, "loss": 0.0047, "step": 4109 }, { "epoch": 7.81, "grad_norm": 0.4977383315563202, "learning_rate": 2.4041443958764222e-06, "loss": 0.004, "step": 4110 }, { "epoch": 7.82, "grad_norm": 0.6040261387825012, "learning_rate": 2.400140906530418e-06, "loss": 0.003, "step": 4111 }, { "epoch": 7.82, "grad_norm": 0.11383076757192612, "learning_rate": 2.3961402987225014e-06, "loss": 0.0004, "step": 4112 }, { "epoch": 7.82, "grad_norm": 0.28944462537765503, "learning_rate": 2.3921425739695157e-06, "loss": 0.0012, "step": 4113 }, { "epoch": 7.82, "grad_norm": 0.1487365961074829, "learning_rate": 2.3881477337872374e-06, "loss": 0.0012, "step": 4114 }, { "epoch": 7.82, "grad_norm": 0.07985954731702805, "learning_rate": 2.3841557796903325e-06, "loss": 0.0004, "step": 4115 }, { "epoch": 7.83, "grad_norm": 0.17295973002910614, "learning_rate": 2.3801667131923776e-06, "loss": 0.0008, "step": 4116 }, { "epoch": 7.83, "grad_norm": 0.24860642850399017, "learning_rate": 2.3761805358058545e-06, "loss": 0.0008, "step": 4117 }, { "epoch": 7.83, "grad_norm": 0.2181258499622345, "learning_rate": 2.3721972490421484e-06, "loss": 0.0016, "step": 4118 }, { "epoch": 7.83, "grad_norm": 0.8474145531654358, "learning_rate": 2.3682168544115513e-06, "loss": 0.0081, "step": 4119 }, { "epoch": 7.83, "grad_norm": 0.2929604649543762, "learning_rate": 2.3642393534232544e-06, "loss": 0.0014, "step": 4120 }, { "epoch": 7.83, "grad_norm": 0.048342619091272354, "learning_rate": 2.3602647475853567e-06, "loss": 0.0003, "step": 4121 }, { "epoch": 7.84, "grad_norm": 0.2006116360425949, "learning_rate": 2.356293038404853e-06, "loss": 0.0014, "step": 4122 }, { "epoch": 7.84, "grad_norm": 0.4574824273586273, "learning_rate": 2.352324227387651e-06, "loss": 0.0014, "step": 4123 }, { "epoch": 7.84, "grad_norm": 0.5307914018630981, "learning_rate": 2.3483583160385436e-06, "loss": 0.0031, "step": 4124 }, { "epoch": 7.84, "grad_norm": 0.4117996394634247, "learning_rate": 2.34439530586124e-06, "loss": 0.002, "step": 4125 }, { "epoch": 7.84, "grad_norm": 0.2364974170923233, "learning_rate": 2.3404351983583386e-06, "loss": 0.001, "step": 4126 }, { "epoch": 7.85, "grad_norm": 0.5819973349571228, "learning_rate": 2.3364779950313422e-06, "loss": 0.0021, "step": 4127 }, { "epoch": 7.85, "grad_norm": 0.3491966426372528, "learning_rate": 2.332523697380652e-06, "loss": 0.0024, "step": 4128 }, { "epoch": 7.85, "grad_norm": 0.203857883810997, "learning_rate": 2.3285723069055644e-06, "loss": 0.0005, "step": 4129 }, { "epoch": 7.85, "grad_norm": 0.7147539258003235, "learning_rate": 2.3246238251042787e-06, "loss": 0.0032, "step": 4130 }, { "epoch": 7.85, "grad_norm": 0.07367242127656937, "learning_rate": 2.3206782534738826e-06, "loss": 0.0005, "step": 4131 }, { "epoch": 7.86, "grad_norm": 0.809346616268158, "learning_rate": 2.3167355935103776e-06, "loss": 0.0085, "step": 4132 }, { "epoch": 7.86, "grad_norm": 0.2118033915758133, "learning_rate": 2.3127958467086376e-06, "loss": 0.0056, "step": 4133 }, { "epoch": 7.86, "grad_norm": 1.064743161201477, "learning_rate": 2.308859014562452e-06, "loss": 0.0058, "step": 4134 }, { "epoch": 7.86, "grad_norm": 0.15334346890449524, "learning_rate": 2.3049250985644957e-06, "loss": 0.0012, "step": 4135 }, { "epoch": 7.86, "grad_norm": 0.3959849178791046, "learning_rate": 2.3009941002063383e-06, "loss": 0.0022, "step": 4136 }, { "epoch": 7.87, "grad_norm": 0.15864704549312592, "learning_rate": 2.2970660209784467e-06, "loss": 0.0005, "step": 4137 }, { "epoch": 7.87, "grad_norm": 0.5534282922744751, "learning_rate": 2.2931408623701766e-06, "loss": 0.0021, "step": 4138 }, { "epoch": 7.87, "grad_norm": 0.61335688829422, "learning_rate": 2.289218625869779e-06, "loss": 0.0021, "step": 4139 }, { "epoch": 7.87, "grad_norm": 0.17951370775699615, "learning_rate": 2.2852993129643953e-06, "loss": 0.0008, "step": 4140 }, { "epoch": 7.87, "grad_norm": 0.8793749213218689, "learning_rate": 2.2813829251400655e-06, "loss": 0.0018, "step": 4141 }, { "epoch": 7.87, "grad_norm": 0.43741875886917114, "learning_rate": 2.277469463881704e-06, "loss": 0.0009, "step": 4142 }, { "epoch": 7.88, "grad_norm": 0.15947312116622925, "learning_rate": 2.2735589306731354e-06, "loss": 0.0006, "step": 4143 }, { "epoch": 7.88, "grad_norm": 0.4101869761943817, "learning_rate": 2.2696513269970623e-06, "loss": 0.0025, "step": 4144 }, { "epoch": 7.88, "grad_norm": 0.30613407492637634, "learning_rate": 2.265746654335078e-06, "loss": 0.0006, "step": 4145 }, { "epoch": 7.88, "grad_norm": 0.517195999622345, "learning_rate": 2.261844914167667e-06, "loss": 0.0045, "step": 4146 }, { "epoch": 7.88, "grad_norm": 0.24119892716407776, "learning_rate": 2.257946107974198e-06, "loss": 0.0018, "step": 4147 }, { "epoch": 7.89, "grad_norm": 0.2084256112575531, "learning_rate": 2.2540502372329298e-06, "loss": 0.0007, "step": 4148 }, { "epoch": 7.89, "grad_norm": 0.6011621952056885, "learning_rate": 2.2501573034210155e-06, "loss": 0.0042, "step": 4149 }, { "epoch": 7.89, "grad_norm": 0.21197475492954254, "learning_rate": 2.246267308014475e-06, "loss": 0.0012, "step": 4150 }, { "epoch": 7.89, "grad_norm": 0.14635096490383148, "learning_rate": 2.2423802524882364e-06, "loss": 0.0007, "step": 4151 }, { "epoch": 7.89, "grad_norm": 0.07455818355083466, "learning_rate": 2.238496138316101e-06, "loss": 0.0005, "step": 4152 }, { "epoch": 7.9, "grad_norm": 1.336686372756958, "learning_rate": 2.234614966970754e-06, "loss": 0.0021, "step": 4153 }, { "epoch": 7.9, "grad_norm": 0.22149448096752167, "learning_rate": 2.2307367399237712e-06, "loss": 0.0012, "step": 4154 }, { "epoch": 7.9, "grad_norm": 0.27800849080085754, "learning_rate": 2.226861458645606e-06, "loss": 0.0015, "step": 4155 }, { "epoch": 7.9, "grad_norm": 0.18314380943775177, "learning_rate": 2.222989124605599e-06, "loss": 0.0018, "step": 4156 }, { "epoch": 7.9, "grad_norm": 0.1229643002152443, "learning_rate": 2.219119739271969e-06, "loss": 0.0005, "step": 4157 }, { "epoch": 7.9, "grad_norm": 0.059110596776008606, "learning_rate": 2.215253304111827e-06, "loss": 0.0003, "step": 4158 }, { "epoch": 7.91, "grad_norm": 0.14597366750240326, "learning_rate": 2.211389820591149e-06, "loss": 0.0008, "step": 4159 }, { "epoch": 7.91, "grad_norm": 0.6042234301567078, "learning_rate": 2.207529290174808e-06, "loss": 0.0022, "step": 4160 }, { "epoch": 7.91, "grad_norm": 0.39823344349861145, "learning_rate": 2.2036717143265476e-06, "loss": 0.0019, "step": 4161 }, { "epoch": 7.91, "grad_norm": 0.23891296982765198, "learning_rate": 2.1998170945089923e-06, "loss": 0.0008, "step": 4162 }, { "epoch": 7.91, "grad_norm": 0.3998035490512848, "learning_rate": 2.19596543218365e-06, "loss": 0.002, "step": 4163 }, { "epoch": 7.92, "grad_norm": 0.7066842913627625, "learning_rate": 2.1921167288109035e-06, "loss": 0.0094, "step": 4164 }, { "epoch": 7.92, "grad_norm": 0.1730414181947708, "learning_rate": 2.188270985850015e-06, "loss": 0.0012, "step": 4165 }, { "epoch": 7.92, "grad_norm": 0.392056941986084, "learning_rate": 2.1844282047591203e-06, "loss": 0.0015, "step": 4166 }, { "epoch": 7.92, "grad_norm": 0.38190051913261414, "learning_rate": 2.180588386995245e-06, "loss": 0.0023, "step": 4167 }, { "epoch": 7.92, "grad_norm": 0.6370426416397095, "learning_rate": 2.176751534014271e-06, "loss": 0.0018, "step": 4168 }, { "epoch": 7.93, "grad_norm": 0.04979255795478821, "learning_rate": 2.1729176472709766e-06, "loss": 0.0003, "step": 4169 }, { "epoch": 7.93, "grad_norm": 0.4385182857513428, "learning_rate": 2.1690867282189977e-06, "loss": 0.0016, "step": 4170 }, { "epoch": 7.93, "grad_norm": 0.7957499027252197, "learning_rate": 2.165258778310859e-06, "loss": 0.0036, "step": 4171 }, { "epoch": 7.93, "grad_norm": 0.08222474902868271, "learning_rate": 2.161433798997953e-06, "loss": 0.0006, "step": 4172 }, { "epoch": 7.93, "grad_norm": 0.5043036937713623, "learning_rate": 2.157611791730545e-06, "loss": 0.0031, "step": 4173 }, { "epoch": 7.94, "grad_norm": 0.07942578941583633, "learning_rate": 2.1537927579577756e-06, "loss": 0.0004, "step": 4174 }, { "epoch": 7.94, "grad_norm": 1.0929604768753052, "learning_rate": 2.149976699127657e-06, "loss": 0.0026, "step": 4175 }, { "epoch": 7.94, "grad_norm": 0.13503825664520264, "learning_rate": 2.1461636166870738e-06, "loss": 0.0004, "step": 4176 }, { "epoch": 7.94, "grad_norm": 0.18483228981494904, "learning_rate": 2.1423535120817797e-06, "loss": 0.0015, "step": 4177 }, { "epoch": 7.94, "grad_norm": 0.40784355998039246, "learning_rate": 2.138546386756409e-06, "loss": 0.0019, "step": 4178 }, { "epoch": 7.94, "grad_norm": 0.2677672207355499, "learning_rate": 2.1347422421544495e-06, "loss": 0.0013, "step": 4179 }, { "epoch": 7.95, "grad_norm": 0.380632609128952, "learning_rate": 2.130941079718275e-06, "loss": 0.0019, "step": 4180 }, { "epoch": 7.95, "grad_norm": 0.40480685234069824, "learning_rate": 2.127142900889121e-06, "loss": 0.0032, "step": 4181 }, { "epoch": 7.95, "grad_norm": 0.2046140730381012, "learning_rate": 2.1233477071070917e-06, "loss": 0.0007, "step": 4182 }, { "epoch": 7.95, "grad_norm": 0.5060771703720093, "learning_rate": 2.11955549981116e-06, "loss": 0.001, "step": 4183 }, { "epoch": 7.95, "grad_norm": 0.21773383021354675, "learning_rate": 2.1157662804391686e-06, "loss": 0.0007, "step": 4184 }, { "epoch": 7.96, "grad_norm": 0.584531307220459, "learning_rate": 2.111980050427824e-06, "loss": 0.002, "step": 4185 }, { "epoch": 7.96, "grad_norm": 0.5663981437683105, "learning_rate": 2.1081968112127005e-06, "loss": 0.0017, "step": 4186 }, { "epoch": 7.96, "grad_norm": 0.1504506915807724, "learning_rate": 2.1044165642282445e-06, "loss": 0.0006, "step": 4187 }, { "epoch": 7.96, "grad_norm": 0.04353781417012215, "learning_rate": 2.100639310907753e-06, "loss": 0.0003, "step": 4188 }, { "epoch": 7.96, "grad_norm": 0.532181978225708, "learning_rate": 2.0968650526834045e-06, "loss": 0.0012, "step": 4189 }, { "epoch": 7.97, "grad_norm": 0.6140087842941284, "learning_rate": 2.0930937909862314e-06, "loss": 0.003, "step": 4190 }, { "epoch": 7.97, "grad_norm": 0.19021640717983246, "learning_rate": 2.0893255272461355e-06, "loss": 0.0012, "step": 4191 }, { "epoch": 7.97, "grad_norm": 0.23830761015415192, "learning_rate": 2.085560262891875e-06, "loss": 0.001, "step": 4192 }, { "epoch": 7.97, "grad_norm": 0.09691085666418076, "learning_rate": 2.081797999351083e-06, "loss": 0.0006, "step": 4193 }, { "epoch": 7.97, "grad_norm": 0.6013554334640503, "learning_rate": 2.0780387380502375e-06, "loss": 0.0009, "step": 4194 }, { "epoch": 7.98, "grad_norm": 0.36455368995666504, "learning_rate": 2.0742824804146978e-06, "loss": 0.0018, "step": 4195 }, { "epoch": 7.98, "grad_norm": 0.1703290492296219, "learning_rate": 2.070529227868665e-06, "loss": 0.001, "step": 4196 }, { "epoch": 7.98, "grad_norm": 0.758212149143219, "learning_rate": 2.066778981835218e-06, "loss": 0.0014, "step": 4197 }, { "epoch": 7.98, "grad_norm": 0.12570799887180328, "learning_rate": 2.0630317437362834e-06, "loss": 0.0005, "step": 4198 }, { "epoch": 7.98, "grad_norm": 0.28541630506515503, "learning_rate": 2.059287514992655e-06, "loss": 0.0022, "step": 4199 }, { "epoch": 7.98, "grad_norm": 0.5371941328048706, "learning_rate": 2.0555462970239805e-06, "loss": 0.0017, "step": 4200 }, { "epoch": 7.98, "eval_blimp_filtered_avg": 0.7380597014925373, "eval_blimp_filtered_std": 0.004843974677145115, "step": 4200 }, { "epoch": 7.98, "eval_blimp_supplement_avg": 0.7952586206896551, "eval_blimp_supplement_std": 0.0177088511968717, "step": 4200 }, { "epoch": 7.98, "eval_vqa_filtered_avg": 0.38, "eval_vqa_filtered_std": 0.048783173121456316, "step": 4200 }, { "epoch": 7.98, "eval_winoground_filtered_avg": 0.51, "eval_winoground_filtered_std": 0.05024183937956912, "step": 4200 }, { "epoch": 7.99, "grad_norm": 0.9155452251434326, "learning_rate": 2.0518080912487683e-06, "loss": 0.0029, "step": 4201 }, { "epoch": 7.99, "grad_norm": 0.15230952203273773, "learning_rate": 2.0480728990843835e-06, "loss": 0.0005, "step": 4202 }, { "epoch": 7.99, "grad_norm": 0.6693857312202454, "learning_rate": 2.044340721947049e-06, "loss": 0.0013, "step": 4203 }, { "epoch": 7.99, "grad_norm": 0.8316721320152283, "learning_rate": 2.040611561251853e-06, "loss": 0.0022, "step": 4204 }, { "epoch": 7.99, "grad_norm": 0.5938078761100769, "learning_rate": 2.0368854184127187e-06, "loss": 0.003, "step": 4205 }, { "epoch": 8.0, "grad_norm": 0.23298950493335724, "learning_rate": 2.033162294842449e-06, "loss": 0.0009, "step": 4206 }, { "epoch": 8.0, "grad_norm": 0.2648637294769287, "learning_rate": 2.0294421919526874e-06, "loss": 0.0019, "step": 4207 }, { "epoch": 8.0, "grad_norm": 0.11472371220588684, "learning_rate": 2.0257251111539365e-06, "loss": 0.0007, "step": 4208 }, { "epoch": 8.0, "grad_norm": 0.10234756022691727, "learning_rate": 2.0220110538555537e-06, "loss": 0.0007, "step": 4209 }, { "epoch": 8.0, "grad_norm": 0.45360708236694336, "learning_rate": 2.0183000214657476e-06, "loss": 0.0015, "step": 4210 }, { "epoch": 8.01, "grad_norm": 0.2957366406917572, "learning_rate": 2.014592015391581e-06, "loss": 0.0019, "step": 4211 }, { "epoch": 8.01, "grad_norm": 0.196389839053154, "learning_rate": 2.010887037038969e-06, "loss": 0.0009, "step": 4212 }, { "epoch": 8.01, "grad_norm": 0.08568590879440308, "learning_rate": 2.0071850878126843e-06, "loss": 0.0005, "step": 4213 }, { "epoch": 8.01, "grad_norm": 0.04805300384759903, "learning_rate": 2.0034861691163375e-06, "loss": 0.0003, "step": 4214 }, { "epoch": 8.01, "grad_norm": 0.4556220769882202, "learning_rate": 1.9997902823524072e-06, "loss": 0.003, "step": 4215 }, { "epoch": 8.02, "grad_norm": 0.9039709568023682, "learning_rate": 1.996097428922209e-06, "loss": 0.0013, "step": 4216 }, { "epoch": 8.02, "grad_norm": 0.24127115309238434, "learning_rate": 1.9924076102259147e-06, "loss": 0.0029, "step": 4217 }, { "epoch": 8.02, "grad_norm": 0.13006451725959778, "learning_rate": 1.988720827662545e-06, "loss": 0.0006, "step": 4218 }, { "epoch": 8.02, "grad_norm": 0.055673304945230484, "learning_rate": 1.985037082629966e-06, "loss": 0.0002, "step": 4219 }, { "epoch": 8.02, "grad_norm": 0.40692007541656494, "learning_rate": 1.9813563765248957e-06, "loss": 0.0026, "step": 4220 }, { "epoch": 8.02, "grad_norm": 0.11515144258737564, "learning_rate": 1.9776787107428998e-06, "loss": 0.0003, "step": 4221 }, { "epoch": 8.03, "grad_norm": 0.15563245117664337, "learning_rate": 1.9740040866783894e-06, "loss": 0.0013, "step": 4222 }, { "epoch": 8.03, "grad_norm": 0.10078701376914978, "learning_rate": 1.9703325057246202e-06, "loss": 0.0005, "step": 4223 }, { "epoch": 8.03, "grad_norm": 0.5134672522544861, "learning_rate": 1.9666639692737054e-06, "loss": 0.0017, "step": 4224 }, { "epoch": 8.03, "grad_norm": 0.10198858380317688, "learning_rate": 1.9629984787165847e-06, "loss": 0.0006, "step": 4225 }, { "epoch": 8.03, "grad_norm": 0.07967215031385422, "learning_rate": 1.9593360354430613e-06, "loss": 0.0003, "step": 4226 }, { "epoch": 8.04, "grad_norm": 0.02821691520512104, "learning_rate": 1.955676640841774e-06, "loss": 0.0002, "step": 4227 }, { "epoch": 8.04, "grad_norm": 0.2487531453371048, "learning_rate": 1.952020296300204e-06, "loss": 0.0008, "step": 4228 }, { "epoch": 8.04, "grad_norm": 0.03697032108902931, "learning_rate": 1.948367003204683e-06, "loss": 0.0002, "step": 4229 }, { "epoch": 8.04, "grad_norm": 0.3415977656841278, "learning_rate": 1.944716762940381e-06, "loss": 0.0011, "step": 4230 }, { "epoch": 8.04, "grad_norm": 0.06481347233057022, "learning_rate": 1.941069576891308e-06, "loss": 0.0005, "step": 4231 }, { "epoch": 8.05, "grad_norm": 0.6871536374092102, "learning_rate": 1.937425446440321e-06, "loss": 0.0048, "step": 4232 }, { "epoch": 8.05, "grad_norm": 0.8403608202934265, "learning_rate": 1.933784372969122e-06, "loss": 0.0047, "step": 4233 }, { "epoch": 8.05, "grad_norm": 0.053559429943561554, "learning_rate": 1.9301463578582382e-06, "loss": 0.0002, "step": 4234 }, { "epoch": 8.05, "grad_norm": 0.34228959679603577, "learning_rate": 1.926511402487057e-06, "loss": 0.0009, "step": 4235 }, { "epoch": 8.05, "grad_norm": 1.2879056930541992, "learning_rate": 1.9228795082337926e-06, "loss": 0.0037, "step": 4236 }, { "epoch": 8.06, "grad_norm": 1.0514554977416992, "learning_rate": 1.9192506764755038e-06, "loss": 0.0047, "step": 4237 }, { "epoch": 8.06, "grad_norm": 0.08836644142866135, "learning_rate": 1.915624908588082e-06, "loss": 0.0004, "step": 4238 }, { "epoch": 8.06, "grad_norm": 0.13499538600444794, "learning_rate": 1.912002205946271e-06, "loss": 0.0011, "step": 4239 }, { "epoch": 8.06, "grad_norm": 1.7648677825927734, "learning_rate": 1.9083825699236327e-06, "loss": 0.0028, "step": 4240 }, { "epoch": 8.06, "grad_norm": 0.07435586303472519, "learning_rate": 1.904766001892583e-06, "loss": 0.0003, "step": 4241 }, { "epoch": 8.06, "grad_norm": 0.0795307457447052, "learning_rate": 1.901152503224366e-06, "loss": 0.0004, "step": 4242 }, { "epoch": 8.07, "grad_norm": 0.16997000575065613, "learning_rate": 1.8975420752890662e-06, "loss": 0.0006, "step": 4243 }, { "epoch": 8.07, "grad_norm": 0.3475034236907959, "learning_rate": 1.893934719455599e-06, "loss": 0.0011, "step": 4244 }, { "epoch": 8.07, "grad_norm": 0.027636487036943436, "learning_rate": 1.8903304370917209e-06, "loss": 0.0002, "step": 4245 }, { "epoch": 8.07, "grad_norm": 0.5895635485649109, "learning_rate": 1.886729229564016e-06, "loss": 0.0016, "step": 4246 }, { "epoch": 8.07, "grad_norm": 0.053408991545438766, "learning_rate": 1.8831310982379103e-06, "loss": 0.0004, "step": 4247 }, { "epoch": 8.08, "grad_norm": 0.9117750525474548, "learning_rate": 1.8795360444776578e-06, "loss": 0.0028, "step": 4248 }, { "epoch": 8.08, "grad_norm": 1.3120198249816895, "learning_rate": 1.875944069646346e-06, "loss": 0.0039, "step": 4249 }, { "epoch": 8.08, "grad_norm": 0.28347909450531006, "learning_rate": 1.8723551751059022e-06, "loss": 0.0018, "step": 4250 }, { "epoch": 8.08, "grad_norm": 0.9578442573547363, "learning_rate": 1.8687693622170721e-06, "loss": 0.0035, "step": 4251 }, { "epoch": 8.08, "grad_norm": 0.11759591102600098, "learning_rate": 1.8651866323394473e-06, "loss": 0.0007, "step": 4252 }, { "epoch": 8.09, "grad_norm": 0.5357439517974854, "learning_rate": 1.8616069868314412e-06, "loss": 0.0014, "step": 4253 }, { "epoch": 8.09, "grad_norm": 0.3873136639595032, "learning_rate": 1.8580304270503025e-06, "loss": 0.0014, "step": 4254 }, { "epoch": 8.09, "grad_norm": 0.3417164385318756, "learning_rate": 1.8544569543521062e-06, "loss": 0.0005, "step": 4255 }, { "epoch": 8.09, "grad_norm": 0.049027442932128906, "learning_rate": 1.85088657009176e-06, "loss": 0.0004, "step": 4256 }, { "epoch": 8.09, "grad_norm": 0.4861471652984619, "learning_rate": 1.8473192756229985e-06, "loss": 0.0017, "step": 4257 }, { "epoch": 8.1, "grad_norm": 0.7382941842079163, "learning_rate": 1.8437550722983831e-06, "loss": 0.0019, "step": 4258 }, { "epoch": 8.1, "grad_norm": 0.19919133186340332, "learning_rate": 1.8401939614693143e-06, "loss": 0.0009, "step": 4259 }, { "epoch": 8.1, "grad_norm": 0.4672450125217438, "learning_rate": 1.8366359444859994e-06, "loss": 0.0016, "step": 4260 }, { "epoch": 8.1, "grad_norm": 0.16449536383152008, "learning_rate": 1.8330810226974927e-06, "loss": 0.0005, "step": 4261 }, { "epoch": 8.1, "grad_norm": 0.1933366060256958, "learning_rate": 1.829529197451666e-06, "loss": 0.0007, "step": 4262 }, { "epoch": 8.1, "grad_norm": 0.22157613933086395, "learning_rate": 1.8259804700952165e-06, "loss": 0.0004, "step": 4263 }, { "epoch": 8.11, "grad_norm": 0.09092210233211517, "learning_rate": 1.8224348419736692e-06, "loss": 0.0004, "step": 4264 }, { "epoch": 8.11, "grad_norm": 0.19790326058864594, "learning_rate": 1.8188923144313707e-06, "loss": 0.0006, "step": 4265 }, { "epoch": 8.11, "grad_norm": 0.2499152272939682, "learning_rate": 1.815352888811498e-06, "loss": 0.001, "step": 4266 }, { "epoch": 8.11, "grad_norm": 0.6830257177352905, "learning_rate": 1.811816566456045e-06, "loss": 0.0046, "step": 4267 }, { "epoch": 8.11, "grad_norm": 0.05130018666386604, "learning_rate": 1.8082833487058337e-06, "loss": 0.0003, "step": 4268 }, { "epoch": 8.12, "grad_norm": 0.1408393532037735, "learning_rate": 1.804753236900505e-06, "loss": 0.0005, "step": 4269 }, { "epoch": 8.12, "grad_norm": 0.11475095897912979, "learning_rate": 1.8012262323785323e-06, "loss": 0.0005, "step": 4270 }, { "epoch": 8.12, "grad_norm": 0.12622125446796417, "learning_rate": 1.797702336477194e-06, "loss": 0.0006, "step": 4271 }, { "epoch": 8.12, "grad_norm": 0.3460841774940491, "learning_rate": 1.7941815505326043e-06, "loss": 0.0014, "step": 4272 }, { "epoch": 8.12, "grad_norm": 0.19518059492111206, "learning_rate": 1.7906638758796924e-06, "loss": 0.0015, "step": 4273 }, { "epoch": 8.13, "grad_norm": 0.7331743836402893, "learning_rate": 1.787149313852209e-06, "loss": 0.0012, "step": 4274 }, { "epoch": 8.13, "grad_norm": 0.0632692500948906, "learning_rate": 1.7836378657827224e-06, "loss": 0.0005, "step": 4275 }, { "epoch": 8.13, "grad_norm": 0.4482622444629669, "learning_rate": 1.7801295330026224e-06, "loss": 0.0018, "step": 4276 }, { "epoch": 8.13, "grad_norm": 0.23787641525268555, "learning_rate": 1.7766243168421182e-06, "loss": 0.0011, "step": 4277 }, { "epoch": 8.13, "grad_norm": 0.1322604864835739, "learning_rate": 1.7731222186302323e-06, "loss": 0.0006, "step": 4278 }, { "epoch": 8.13, "grad_norm": 0.12761956453323364, "learning_rate": 1.7696232396948176e-06, "loss": 0.0005, "step": 4279 }, { "epoch": 8.14, "grad_norm": 0.8518974781036377, "learning_rate": 1.7661273813625257e-06, "loss": 0.0024, "step": 4280 }, { "epoch": 8.14, "grad_norm": 0.5713862180709839, "learning_rate": 1.7626346449588417e-06, "loss": 0.0009, "step": 4281 }, { "epoch": 8.14, "grad_norm": 0.24629074335098267, "learning_rate": 1.7591450318080572e-06, "loss": 0.0009, "step": 4282 }, { "epoch": 8.14, "grad_norm": 0.4884274899959564, "learning_rate": 1.7556585432332852e-06, "loss": 0.0032, "step": 4283 }, { "epoch": 8.14, "grad_norm": 0.27398550510406494, "learning_rate": 1.752175180556447e-06, "loss": 0.0007, "step": 4284 }, { "epoch": 8.15, "grad_norm": 0.09768766909837723, "learning_rate": 1.7486949450982904e-06, "loss": 0.0006, "step": 4285 }, { "epoch": 8.15, "grad_norm": 0.16016952693462372, "learning_rate": 1.7452178381783614e-06, "loss": 0.0015, "step": 4286 }, { "epoch": 8.15, "grad_norm": 0.439543753862381, "learning_rate": 1.741743861115035e-06, "loss": 0.0019, "step": 4287 }, { "epoch": 8.15, "grad_norm": 0.2162570059299469, "learning_rate": 1.7382730152254922e-06, "loss": 0.001, "step": 4288 }, { "epoch": 8.15, "grad_norm": 0.9666628241539001, "learning_rate": 1.734805301825726e-06, "loss": 0.0082, "step": 4289 }, { "epoch": 8.16, "grad_norm": 0.3153863549232483, "learning_rate": 1.7313407222305456e-06, "loss": 0.001, "step": 4290 }, { "epoch": 8.16, "grad_norm": 0.16550429165363312, "learning_rate": 1.727879277753568e-06, "loss": 0.0016, "step": 4291 }, { "epoch": 8.16, "grad_norm": 0.06080075725913048, "learning_rate": 1.7244209697072235e-06, "loss": 0.0003, "step": 4292 }, { "epoch": 8.16, "grad_norm": 0.11664901673793793, "learning_rate": 1.7209657994027528e-06, "loss": 0.0008, "step": 4293 }, { "epoch": 8.16, "grad_norm": 0.29918399453163147, "learning_rate": 1.7175137681502086e-06, "loss": 0.0014, "step": 4294 }, { "epoch": 8.17, "grad_norm": 0.08541471511125565, "learning_rate": 1.714064877258449e-06, "loss": 0.0006, "step": 4295 }, { "epoch": 8.17, "grad_norm": 0.38400793075561523, "learning_rate": 1.7106191280351503e-06, "loss": 0.001, "step": 4296 }, { "epoch": 8.17, "grad_norm": 0.047119591385126114, "learning_rate": 1.7071765217867841e-06, "loss": 0.0003, "step": 4297 }, { "epoch": 8.17, "grad_norm": 0.22920486330986023, "learning_rate": 1.7037370598186431e-06, "loss": 0.0007, "step": 4298 }, { "epoch": 8.17, "grad_norm": 0.4138776957988739, "learning_rate": 1.7003007434348217e-06, "loss": 0.0062, "step": 4299 }, { "epoch": 8.17, "grad_norm": 0.19765767455101013, "learning_rate": 1.6968675739382211e-06, "loss": 0.0011, "step": 4300 }, { "epoch": 8.17, "eval_blimp_filtered_avg": 0.7368656716417911, "eval_blimp_filtered_std": 0.00484634319873661, "step": 4300 }, { "epoch": 8.17, "eval_blimp_supplement_avg": 0.7995689655172413, "eval_blimp_supplement_std": 0.01761239156606601, "step": 4300 }, { "epoch": 8.17, "eval_vqa_filtered_avg": 0.37, "eval_vqa_filtered_std": 0.048523658709391, "step": 4300 }, { "epoch": 8.17, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 4300 }, { "epoch": 8.18, "grad_norm": 0.21077926456928253, "learning_rate": 1.6934375526305525e-06, "loss": 0.0007, "step": 4301 }, { "epoch": 8.18, "grad_norm": 0.5761735439300537, "learning_rate": 1.6900106808123308e-06, "loss": 0.0034, "step": 4302 }, { "epoch": 8.18, "grad_norm": 0.11215998232364655, "learning_rate": 1.6865869597828767e-06, "loss": 0.0003, "step": 4303 }, { "epoch": 8.18, "grad_norm": 0.07138937711715698, "learning_rate": 1.6831663908403173e-06, "loss": 0.0003, "step": 4304 }, { "epoch": 8.18, "grad_norm": 0.13921156525611877, "learning_rate": 1.6797489752815888e-06, "loss": 0.0013, "step": 4305 }, { "epoch": 8.19, "grad_norm": 0.46881988644599915, "learning_rate": 1.67633471440242e-06, "loss": 0.0008, "step": 4306 }, { "epoch": 8.19, "grad_norm": 0.5937180519104004, "learning_rate": 1.6729236094973577e-06, "loss": 0.0026, "step": 4307 }, { "epoch": 8.19, "grad_norm": 0.43786826729774475, "learning_rate": 1.669515661859743e-06, "loss": 0.0014, "step": 4308 }, { "epoch": 8.19, "grad_norm": 0.16921868920326233, "learning_rate": 1.6661108727817221e-06, "loss": 0.0014, "step": 4309 }, { "epoch": 8.19, "grad_norm": 0.29034996032714844, "learning_rate": 1.6627092435542447e-06, "loss": 0.0023, "step": 4310 }, { "epoch": 8.2, "grad_norm": 0.08478104323148727, "learning_rate": 1.65931077546706e-06, "loss": 0.0004, "step": 4311 }, { "epoch": 8.2, "grad_norm": 0.40402138233184814, "learning_rate": 1.655915469808722e-06, "loss": 0.002, "step": 4312 }, { "epoch": 8.2, "grad_norm": 0.11682812869548798, "learning_rate": 1.6525233278665798e-06, "loss": 0.0007, "step": 4313 }, { "epoch": 8.2, "grad_norm": 0.06720246374607086, "learning_rate": 1.6491343509267954e-06, "loss": 0.0002, "step": 4314 }, { "epoch": 8.2, "grad_norm": 0.8744116425514221, "learning_rate": 1.6457485402743113e-06, "loss": 0.0053, "step": 4315 }, { "epoch": 8.21, "grad_norm": 0.05676279589533806, "learning_rate": 1.6423658971928925e-06, "loss": 0.0003, "step": 4316 }, { "epoch": 8.21, "grad_norm": 0.4724164605140686, "learning_rate": 1.6389864229650799e-06, "loss": 0.0013, "step": 4317 }, { "epoch": 8.21, "grad_norm": 0.04479055851697922, "learning_rate": 1.6356101188722306e-06, "loss": 0.0003, "step": 4318 }, { "epoch": 8.21, "grad_norm": 0.15826593339443207, "learning_rate": 1.632236986194492e-06, "loss": 0.0004, "step": 4319 }, { "epoch": 8.21, "grad_norm": 0.2935536503791809, "learning_rate": 1.6288670262108108e-06, "loss": 0.0016, "step": 4320 }, { "epoch": 8.21, "grad_norm": 0.07833229005336761, "learning_rate": 1.6255002401989284e-06, "loss": 0.0004, "step": 4321 }, { "epoch": 8.22, "grad_norm": 0.2975424826145172, "learning_rate": 1.6221366294353858e-06, "loss": 0.0019, "step": 4322 }, { "epoch": 8.22, "grad_norm": 0.04407280683517456, "learning_rate": 1.6187761951955183e-06, "loss": 0.0002, "step": 4323 }, { "epoch": 8.22, "grad_norm": 0.13505704700946808, "learning_rate": 1.6154189387534548e-06, "loss": 0.0008, "step": 4324 }, { "epoch": 8.22, "grad_norm": 0.5422782897949219, "learning_rate": 1.61206486138213e-06, "loss": 0.0016, "step": 4325 }, { "epoch": 8.22, "grad_norm": 0.03133607655763626, "learning_rate": 1.6087139643532534e-06, "loss": 0.0002, "step": 4326 }, { "epoch": 8.23, "grad_norm": 0.5447821617126465, "learning_rate": 1.60536624893735e-06, "loss": 0.001, "step": 4327 }, { "epoch": 8.23, "grad_norm": 0.10035780817270279, "learning_rate": 1.6020217164037244e-06, "loss": 0.0005, "step": 4328 }, { "epoch": 8.23, "grad_norm": 0.10746712982654572, "learning_rate": 1.59868036802048e-06, "loss": 0.0007, "step": 4329 }, { "epoch": 8.23, "grad_norm": 0.22163891792297363, "learning_rate": 1.5953422050545098e-06, "loss": 0.0018, "step": 4330 }, { "epoch": 8.23, "grad_norm": 0.10385783761739731, "learning_rate": 1.5920072287715061e-06, "loss": 0.0006, "step": 4331 }, { "epoch": 8.24, "grad_norm": 0.3123694360256195, "learning_rate": 1.5886754404359395e-06, "loss": 0.0007, "step": 4332 }, { "epoch": 8.24, "grad_norm": 0.6945271492004395, "learning_rate": 1.5853468413110873e-06, "loss": 0.0036, "step": 4333 }, { "epoch": 8.24, "grad_norm": 1.0438213348388672, "learning_rate": 1.5820214326590068e-06, "loss": 0.0019, "step": 4334 }, { "epoch": 8.24, "grad_norm": 0.4835401475429535, "learning_rate": 1.5786992157405512e-06, "loss": 0.0015, "step": 4335 }, { "epoch": 8.24, "grad_norm": 0.36276188492774963, "learning_rate": 1.5753801918153611e-06, "loss": 0.0017, "step": 4336 }, { "epoch": 8.25, "grad_norm": 0.44003793597221375, "learning_rate": 1.5720643621418652e-06, "loss": 0.0014, "step": 4337 }, { "epoch": 8.25, "grad_norm": 0.7039808630943298, "learning_rate": 1.5687517279772846e-06, "loss": 0.002, "step": 4338 }, { "epoch": 8.25, "grad_norm": 0.39448657631874084, "learning_rate": 1.5654422905776235e-06, "loss": 0.0008, "step": 4339 }, { "epoch": 8.25, "grad_norm": 0.23121270537376404, "learning_rate": 1.5621360511976857e-06, "loss": 0.0017, "step": 4340 }, { "epoch": 8.25, "grad_norm": 0.2644481658935547, "learning_rate": 1.5588330110910444e-06, "loss": 0.0006, "step": 4341 }, { "epoch": 8.25, "grad_norm": 0.7098546624183655, "learning_rate": 1.5555331715100785e-06, "loss": 0.0034, "step": 4342 }, { "epoch": 8.26, "grad_norm": 0.06709442287683487, "learning_rate": 1.5522365337059363e-06, "loss": 0.0003, "step": 4343 }, { "epoch": 8.26, "grad_norm": 0.10433752089738846, "learning_rate": 1.548943098928566e-06, "loss": 0.0006, "step": 4344 }, { "epoch": 8.26, "grad_norm": 0.26751136779785156, "learning_rate": 1.5456528684266937e-06, "loss": 0.0008, "step": 4345 }, { "epoch": 8.26, "grad_norm": 0.12015445530414581, "learning_rate": 1.5423658434478339e-06, "loss": 0.0008, "step": 4346 }, { "epoch": 8.26, "grad_norm": 0.32606685161590576, "learning_rate": 1.5390820252382832e-06, "loss": 0.001, "step": 4347 }, { "epoch": 8.27, "grad_norm": 0.5173876285552979, "learning_rate": 1.5358014150431232e-06, "loss": 0.0028, "step": 4348 }, { "epoch": 8.27, "grad_norm": 0.04022819176316261, "learning_rate": 1.5325240141062203e-06, "loss": 0.0003, "step": 4349 }, { "epoch": 8.27, "grad_norm": 0.24858258664608002, "learning_rate": 1.5292498236702213e-06, "loss": 0.0008, "step": 4350 }, { "epoch": 8.27, "grad_norm": 0.22620272636413574, "learning_rate": 1.5259788449765633e-06, "loss": 0.0003, "step": 4351 }, { "epoch": 8.27, "grad_norm": 0.8551767468452454, "learning_rate": 1.522711079265452e-06, "loss": 0.0047, "step": 4352 }, { "epoch": 8.28, "grad_norm": 0.1315561980009079, "learning_rate": 1.5194465277758884e-06, "loss": 0.0009, "step": 4353 }, { "epoch": 8.28, "grad_norm": 0.19906537234783173, "learning_rate": 1.5161851917456494e-06, "loss": 0.0007, "step": 4354 }, { "epoch": 8.28, "grad_norm": 0.735843300819397, "learning_rate": 1.512927072411291e-06, "loss": 0.0016, "step": 4355 }, { "epoch": 8.28, "grad_norm": 0.15483583509922028, "learning_rate": 1.5096721710081509e-06, "loss": 0.0004, "step": 4356 }, { "epoch": 8.28, "grad_norm": 0.8799454569816589, "learning_rate": 1.5064204887703482e-06, "loss": 0.0007, "step": 4357 }, { "epoch": 8.29, "grad_norm": 0.38492655754089355, "learning_rate": 1.5031720269307793e-06, "loss": 0.0035, "step": 4358 }, { "epoch": 8.29, "grad_norm": 0.39851051568984985, "learning_rate": 1.4999267867211175e-06, "loss": 0.0023, "step": 4359 }, { "epoch": 8.29, "grad_norm": 0.0490901805460453, "learning_rate": 1.496684769371828e-06, "loss": 0.0003, "step": 4360 }, { "epoch": 8.29, "grad_norm": 0.5468082427978516, "learning_rate": 1.49344597611213e-06, "loss": 0.001, "step": 4361 }, { "epoch": 8.29, "grad_norm": 0.07242156565189362, "learning_rate": 1.4902104081700442e-06, "loss": 0.0003, "step": 4362 }, { "epoch": 8.29, "grad_norm": 0.04965168982744217, "learning_rate": 1.4869780667723532e-06, "loss": 0.0003, "step": 4363 }, { "epoch": 8.3, "grad_norm": 0.5703487396240234, "learning_rate": 1.4837489531446236e-06, "loss": 0.0024, "step": 4364 }, { "epoch": 8.3, "grad_norm": 0.11041479557752609, "learning_rate": 1.4805230685111938e-06, "loss": 0.0004, "step": 4365 }, { "epoch": 8.3, "grad_norm": 0.7891824245452881, "learning_rate": 1.4773004140951807e-06, "loss": 0.0027, "step": 4366 }, { "epoch": 8.3, "grad_norm": 0.11208539456129074, "learning_rate": 1.4740809911184762e-06, "loss": 0.0005, "step": 4367 }, { "epoch": 8.3, "grad_norm": 0.27440059185028076, "learning_rate": 1.4708648008017446e-06, "loss": 0.0009, "step": 4368 }, { "epoch": 8.31, "grad_norm": 0.3801560401916504, "learning_rate": 1.4676518443644283e-06, "loss": 0.0014, "step": 4369 }, { "epoch": 8.31, "grad_norm": 0.11204870045185089, "learning_rate": 1.4644421230247375e-06, "loss": 0.0005, "step": 4370 }, { "epoch": 8.31, "grad_norm": 0.20533894002437592, "learning_rate": 1.4612356379996673e-06, "loss": 0.0007, "step": 4371 }, { "epoch": 8.31, "grad_norm": 0.9157264232635498, "learning_rate": 1.4580323905049687e-06, "loss": 0.0028, "step": 4372 }, { "epoch": 8.31, "grad_norm": 0.1772078424692154, "learning_rate": 1.4548323817551825e-06, "loss": 0.0006, "step": 4373 }, { "epoch": 8.32, "grad_norm": 0.2816546857357025, "learning_rate": 1.4516356129636112e-06, "loss": 0.0013, "step": 4374 }, { "epoch": 8.32, "grad_norm": 0.18620052933692932, "learning_rate": 1.4484420853423297e-06, "loss": 0.0007, "step": 4375 }, { "epoch": 8.32, "grad_norm": 0.14583322405815125, "learning_rate": 1.4452518001021865e-06, "loss": 0.0005, "step": 4376 }, { "epoch": 8.32, "grad_norm": 0.6369453072547913, "learning_rate": 1.4420647584528037e-06, "loss": 0.0027, "step": 4377 }, { "epoch": 8.32, "grad_norm": 0.11710014939308167, "learning_rate": 1.4388809616025623e-06, "loss": 0.001, "step": 4378 }, { "epoch": 8.33, "grad_norm": 0.3676125407218933, "learning_rate": 1.4357004107586259e-06, "loss": 0.0025, "step": 4379 }, { "epoch": 8.33, "grad_norm": 0.1768130511045456, "learning_rate": 1.4325231071269219e-06, "loss": 0.001, "step": 4380 }, { "epoch": 8.33, "grad_norm": 0.5705807209014893, "learning_rate": 1.4293490519121433e-06, "loss": 0.003, "step": 4381 }, { "epoch": 8.33, "grad_norm": 0.620261013507843, "learning_rate": 1.4261782463177588e-06, "loss": 0.0019, "step": 4382 }, { "epoch": 8.33, "grad_norm": 0.8748486638069153, "learning_rate": 1.4230106915459963e-06, "loss": 0.0052, "step": 4383 }, { "epoch": 8.33, "grad_norm": 1.041046142578125, "learning_rate": 1.4198463887978598e-06, "loss": 0.002, "step": 4384 }, { "epoch": 8.34, "grad_norm": 0.09113771468400955, "learning_rate": 1.4166853392731105e-06, "loss": 0.0008, "step": 4385 }, { "epoch": 8.34, "grad_norm": 0.0860869288444519, "learning_rate": 1.4135275441702901e-06, "loss": 0.0005, "step": 4386 }, { "epoch": 8.34, "grad_norm": 0.04547056183218956, "learning_rate": 1.4103730046866892e-06, "loss": 0.0003, "step": 4387 }, { "epoch": 8.34, "grad_norm": 0.33408862352371216, "learning_rate": 1.4072217220183792e-06, "loss": 0.0018, "step": 4388 }, { "epoch": 8.34, "grad_norm": 0.29700860381126404, "learning_rate": 1.4040736973601887e-06, "loss": 0.0018, "step": 4389 }, { "epoch": 8.35, "grad_norm": 0.16371360421180725, "learning_rate": 1.4009289319057118e-06, "loss": 0.0008, "step": 4390 }, { "epoch": 8.35, "grad_norm": 0.15498442947864532, "learning_rate": 1.3977874268473069e-06, "loss": 0.0013, "step": 4391 }, { "epoch": 8.35, "grad_norm": 0.028248699381947517, "learning_rate": 1.3946491833760989e-06, "loss": 0.0003, "step": 4392 }, { "epoch": 8.35, "grad_norm": 0.6009773015975952, "learning_rate": 1.3915142026819716e-06, "loss": 0.0043, "step": 4393 }, { "epoch": 8.35, "grad_norm": 0.6134759187698364, "learning_rate": 1.3883824859535766e-06, "loss": 0.003, "step": 4394 }, { "epoch": 8.36, "grad_norm": 0.06113799661397934, "learning_rate": 1.385254034378325e-06, "loss": 0.0003, "step": 4395 }, { "epoch": 8.36, "grad_norm": 0.0937075987458229, "learning_rate": 1.3821288491423867e-06, "loss": 0.0007, "step": 4396 }, { "epoch": 8.36, "grad_norm": 0.1766739934682846, "learning_rate": 1.3790069314307053e-06, "loss": 0.0023, "step": 4397 }, { "epoch": 8.36, "grad_norm": 0.0684737041592598, "learning_rate": 1.3758882824269659e-06, "loss": 0.0004, "step": 4398 }, { "epoch": 8.36, "grad_norm": 0.028834203258156776, "learning_rate": 1.372772903313635e-06, "loss": 0.0002, "step": 4399 }, { "epoch": 8.37, "grad_norm": 0.5333136320114136, "learning_rate": 1.3696607952719255e-06, "loss": 0.0011, "step": 4400 }, { "epoch": 8.37, "eval_blimp_filtered_avg": 0.7388059701492538, "eval_blimp_filtered_std": 0.004837536664720376, "step": 4400 }, { "epoch": 8.37, "eval_blimp_supplement_avg": 0.7931034482758621, "eval_blimp_supplement_std": 0.01773402182283453, "step": 4400 }, { "epoch": 8.37, "eval_vqa_filtered_avg": 0.35, "eval_vqa_filtered_std": 0.047937248544110196, "step": 4400 }, { "epoch": 8.37, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 4400 }, { "epoch": 8.37, "grad_norm": 0.9994814991950989, "learning_rate": 1.3665519594818155e-06, "loss": 0.0008, "step": 4401 }, { "epoch": 8.37, "grad_norm": 0.4718815088272095, "learning_rate": 1.36344639712204e-06, "loss": 0.0016, "step": 4402 }, { "epoch": 8.37, "grad_norm": 0.04172009602189064, "learning_rate": 1.3603441093700943e-06, "loss": 0.0002, "step": 4403 }, { "epoch": 8.37, "grad_norm": 0.04627189412713051, "learning_rate": 1.3572450974022321e-06, "loss": 0.0003, "step": 4404 }, { "epoch": 8.37, "grad_norm": 1.1424204111099243, "learning_rate": 1.354149362393462e-06, "loss": 0.0039, "step": 4405 }, { "epoch": 8.38, "grad_norm": 0.12246687710285187, "learning_rate": 1.3510569055175603e-06, "loss": 0.0008, "step": 4406 }, { "epoch": 8.38, "grad_norm": 0.23921869695186615, "learning_rate": 1.3479677279470449e-06, "loss": 0.0008, "step": 4407 }, { "epoch": 8.38, "grad_norm": 0.18621748685836792, "learning_rate": 1.344881830853203e-06, "loss": 0.0003, "step": 4408 }, { "epoch": 8.38, "grad_norm": 0.03919734060764313, "learning_rate": 1.341799215406072e-06, "loss": 0.0003, "step": 4409 }, { "epoch": 8.38, "grad_norm": 0.6165281534194946, "learning_rate": 1.3387198827744474e-06, "loss": 0.0062, "step": 4410 }, { "epoch": 8.39, "grad_norm": 0.08674421161413193, "learning_rate": 1.335643834125876e-06, "loss": 0.0003, "step": 4411 }, { "epoch": 8.39, "grad_norm": 0.25230666995048523, "learning_rate": 1.3325710706266693e-06, "loss": 0.0008, "step": 4412 }, { "epoch": 8.39, "grad_norm": 0.2538583278656006, "learning_rate": 1.3295015934418788e-06, "loss": 0.0011, "step": 4413 }, { "epoch": 8.39, "grad_norm": 0.2968876361846924, "learning_rate": 1.326435403735321e-06, "loss": 0.0014, "step": 4414 }, { "epoch": 8.39, "grad_norm": 0.19089528918266296, "learning_rate": 1.323372502669562e-06, "loss": 0.001, "step": 4415 }, { "epoch": 8.4, "grad_norm": 0.07842905074357986, "learning_rate": 1.3203128914059193e-06, "loss": 0.0008, "step": 4416 }, { "epoch": 8.4, "grad_norm": 0.08292330801486969, "learning_rate": 1.3172565711044726e-06, "loss": 0.0005, "step": 4417 }, { "epoch": 8.4, "grad_norm": 0.05813704431056976, "learning_rate": 1.3142035429240373e-06, "loss": 0.0004, "step": 4418 }, { "epoch": 8.4, "grad_norm": 0.0639563500881195, "learning_rate": 1.3111538080221952e-06, "loss": 0.0003, "step": 4419 }, { "epoch": 8.4, "grad_norm": 0.11456341296434402, "learning_rate": 1.3081073675552735e-06, "loss": 0.0005, "step": 4420 }, { "epoch": 8.4, "grad_norm": 0.12778563797473907, "learning_rate": 1.3050642226783493e-06, "loss": 0.0007, "step": 4421 }, { "epoch": 8.41, "grad_norm": 0.3667040467262268, "learning_rate": 1.3020243745452498e-06, "loss": 0.0015, "step": 4422 }, { "epoch": 8.41, "grad_norm": 0.11472772806882858, "learning_rate": 1.2989878243085619e-06, "loss": 0.0007, "step": 4423 }, { "epoch": 8.41, "grad_norm": 0.3502579629421234, "learning_rate": 1.2959545731196032e-06, "loss": 0.0014, "step": 4424 }, { "epoch": 8.41, "grad_norm": 0.15330177545547485, "learning_rate": 1.2929246221284596e-06, "loss": 0.0008, "step": 4425 }, { "epoch": 8.41, "grad_norm": 0.2319496124982834, "learning_rate": 1.2898979724839544e-06, "loss": 0.0036, "step": 4426 }, { "epoch": 8.42, "grad_norm": 0.887284517288208, "learning_rate": 1.286874625333664e-06, "loss": 0.0016, "step": 4427 }, { "epoch": 8.42, "grad_norm": 0.5112677216529846, "learning_rate": 1.283854581823909e-06, "loss": 0.0007, "step": 4428 }, { "epoch": 8.42, "grad_norm": 0.08047492057085037, "learning_rate": 1.2808378430997591e-06, "loss": 0.0004, "step": 4429 }, { "epoch": 8.42, "grad_norm": 0.11754536628723145, "learning_rate": 1.277824410305034e-06, "loss": 0.0005, "step": 4430 }, { "epoch": 8.42, "grad_norm": 0.29325222969055176, "learning_rate": 1.274814284582293e-06, "loss": 0.0013, "step": 4431 }, { "epoch": 8.43, "grad_norm": 0.342746764421463, "learning_rate": 1.271807467072852e-06, "loss": 0.0015, "step": 4432 }, { "epoch": 8.43, "grad_norm": 0.12758590281009674, "learning_rate": 1.2688039589167577e-06, "loss": 0.0007, "step": 4433 }, { "epoch": 8.43, "grad_norm": 0.4534390866756439, "learning_rate": 1.2658037612528184e-06, "loss": 0.0012, "step": 4434 }, { "epoch": 8.43, "grad_norm": 0.3332512676715851, "learning_rate": 1.2628068752185752e-06, "loss": 0.0008, "step": 4435 }, { "epoch": 8.43, "grad_norm": 0.16436868906021118, "learning_rate": 1.259813301950319e-06, "loss": 0.0005, "step": 4436 }, { "epoch": 8.44, "grad_norm": 0.12231405079364777, "learning_rate": 1.256823042583084e-06, "loss": 0.0004, "step": 4437 }, { "epoch": 8.44, "grad_norm": 0.08669916540384293, "learning_rate": 1.253836098250646e-06, "loss": 0.0005, "step": 4438 }, { "epoch": 8.44, "grad_norm": 0.7070015668869019, "learning_rate": 1.250852470085525e-06, "loss": 0.0057, "step": 4439 }, { "epoch": 8.44, "grad_norm": 0.1552242934703827, "learning_rate": 1.2478721592189846e-06, "loss": 0.0005, "step": 4440 }, { "epoch": 8.44, "grad_norm": 0.055653464049100876, "learning_rate": 1.2448951667810304e-06, "loss": 0.0004, "step": 4441 }, { "epoch": 8.44, "grad_norm": 0.5051988959312439, "learning_rate": 1.2419214939004065e-06, "loss": 0.0025, "step": 4442 }, { "epoch": 8.45, "grad_norm": 0.8252220153808594, "learning_rate": 1.2389511417046073e-06, "loss": 0.0027, "step": 4443 }, { "epoch": 8.45, "grad_norm": 0.2991043031215668, "learning_rate": 1.2359841113198534e-06, "loss": 0.0008, "step": 4444 }, { "epoch": 8.45, "grad_norm": 0.03337952494621277, "learning_rate": 1.2330204038711214e-06, "loss": 0.0003, "step": 4445 }, { "epoch": 8.45, "grad_norm": 0.300611674785614, "learning_rate": 1.2300600204821177e-06, "loss": 0.0014, "step": 4446 }, { "epoch": 8.45, "grad_norm": 0.22231121361255646, "learning_rate": 1.227102962275294e-06, "loss": 0.0007, "step": 4447 }, { "epoch": 8.46, "grad_norm": 0.11662594974040985, "learning_rate": 1.2241492303718372e-06, "loss": 0.0005, "step": 4448 }, { "epoch": 8.46, "grad_norm": 0.06183187663555145, "learning_rate": 1.2211988258916751e-06, "loss": 0.0003, "step": 4449 }, { "epoch": 8.46, "grad_norm": 0.20178669691085815, "learning_rate": 1.218251749953473e-06, "loss": 0.0007, "step": 4450 }, { "epoch": 8.46, "grad_norm": 0.3866437077522278, "learning_rate": 1.2153080036746346e-06, "loss": 0.0019, "step": 4451 }, { "epoch": 8.46, "grad_norm": 0.06413784623146057, "learning_rate": 1.2123675881713048e-06, "loss": 0.0003, "step": 4452 }, { "epoch": 8.47, "grad_norm": 0.16483132541179657, "learning_rate": 1.2094305045583566e-06, "loss": 0.0008, "step": 4453 }, { "epoch": 8.47, "grad_norm": 0.9323352575302124, "learning_rate": 1.2064967539494087e-06, "loss": 0.002, "step": 4454 }, { "epoch": 8.47, "grad_norm": 0.10516464710235596, "learning_rate": 1.2035663374568118e-06, "loss": 0.0006, "step": 4455 }, { "epoch": 8.47, "grad_norm": 0.2017342895269394, "learning_rate": 1.2006392561916536e-06, "loss": 0.0007, "step": 4456 }, { "epoch": 8.47, "grad_norm": 0.041810542345047, "learning_rate": 1.197715511263754e-06, "loss": 0.0002, "step": 4457 }, { "epoch": 8.48, "grad_norm": 0.4320303201675415, "learning_rate": 1.1947951037816762e-06, "loss": 0.0021, "step": 4458 }, { "epoch": 8.48, "grad_norm": 0.26776719093322754, "learning_rate": 1.1918780348527082e-06, "loss": 0.0015, "step": 4459 }, { "epoch": 8.48, "grad_norm": 0.09656321257352829, "learning_rate": 1.188964305582876e-06, "loss": 0.0005, "step": 4460 }, { "epoch": 8.48, "grad_norm": 0.08663162589073181, "learning_rate": 1.1860539170769436e-06, "loss": 0.0006, "step": 4461 }, { "epoch": 8.48, "grad_norm": 0.5516363978385925, "learning_rate": 1.1831468704383996e-06, "loss": 0.0015, "step": 4462 }, { "epoch": 8.48, "grad_norm": 0.19870974123477936, "learning_rate": 1.180243166769477e-06, "loss": 0.0021, "step": 4463 }, { "epoch": 8.49, "grad_norm": 0.3300761580467224, "learning_rate": 1.1773428071711257e-06, "loss": 0.0015, "step": 4464 }, { "epoch": 8.49, "grad_norm": 0.3128100037574768, "learning_rate": 1.1744457927430442e-06, "loss": 0.0029, "step": 4465 }, { "epoch": 8.49, "grad_norm": 0.12104743719100952, "learning_rate": 1.1715521245836526e-06, "loss": 0.0008, "step": 4466 }, { "epoch": 8.49, "grad_norm": 0.44848230481147766, "learning_rate": 1.1686618037901053e-06, "loss": 0.0016, "step": 4467 }, { "epoch": 8.49, "grad_norm": 0.26341190934181213, "learning_rate": 1.165774831458285e-06, "loss": 0.0023, "step": 4468 }, { "epoch": 8.5, "grad_norm": 0.5359348058700562, "learning_rate": 1.1628912086828115e-06, "loss": 0.0009, "step": 4469 }, { "epoch": 8.5, "grad_norm": 0.10408496111631393, "learning_rate": 1.1600109365570235e-06, "loss": 0.0006, "step": 4470 }, { "epoch": 8.5, "grad_norm": 0.0594789981842041, "learning_rate": 1.1571340161730017e-06, "loss": 0.0004, "step": 4471 }, { "epoch": 8.5, "grad_norm": 0.230558842420578, "learning_rate": 1.1542604486215458e-06, "loss": 0.0008, "step": 4472 }, { "epoch": 8.5, "grad_norm": 0.14265170693397522, "learning_rate": 1.1513902349921914e-06, "loss": 0.0003, "step": 4473 }, { "epoch": 8.51, "grad_norm": 0.03244486078619957, "learning_rate": 1.1485233763731961e-06, "loss": 0.0002, "step": 4474 }, { "epoch": 8.51, "grad_norm": 0.13666561245918274, "learning_rate": 1.1456598738515522e-06, "loss": 0.0006, "step": 4475 }, { "epoch": 8.51, "grad_norm": 0.09099838882684708, "learning_rate": 1.1427997285129743e-06, "loss": 0.0004, "step": 4476 }, { "epoch": 8.51, "grad_norm": 0.1993841677904129, "learning_rate": 1.139942941441904e-06, "loss": 0.0007, "step": 4477 }, { "epoch": 8.51, "grad_norm": 0.3164008557796478, "learning_rate": 1.1370895137215178e-06, "loss": 0.0027, "step": 4478 }, { "epoch": 8.52, "grad_norm": 0.31119123101234436, "learning_rate": 1.1342394464337036e-06, "loss": 0.0008, "step": 4479 }, { "epoch": 8.52, "grad_norm": 0.6009032130241394, "learning_rate": 1.1313927406590907e-06, "loss": 0.0021, "step": 4480 }, { "epoch": 8.52, "grad_norm": 0.10203361511230469, "learning_rate": 1.1285493974770246e-06, "loss": 0.0004, "step": 4481 }, { "epoch": 8.52, "grad_norm": 0.570560097694397, "learning_rate": 1.125709417965577e-06, "loss": 0.0014, "step": 4482 }, { "epoch": 8.52, "grad_norm": 0.24506539106369019, "learning_rate": 1.1228728032015468e-06, "loss": 0.0004, "step": 4483 }, { "epoch": 8.52, "grad_norm": 0.13265973329544067, "learning_rate": 1.1200395542604548e-06, "loss": 0.0005, "step": 4484 }, { "epoch": 8.53, "grad_norm": 0.5596687197685242, "learning_rate": 1.1172096722165459e-06, "loss": 0.0013, "step": 4485 }, { "epoch": 8.53, "grad_norm": 0.3687857985496521, "learning_rate": 1.1143831581427888e-06, "loss": 0.0012, "step": 4486 }, { "epoch": 8.53, "grad_norm": 0.20373445749282837, "learning_rate": 1.1115600131108817e-06, "loss": 0.001, "step": 4487 }, { "epoch": 8.53, "grad_norm": 0.3150709569454193, "learning_rate": 1.1087402381912293e-06, "loss": 0.0017, "step": 4488 }, { "epoch": 8.53, "grad_norm": 0.22817692160606384, "learning_rate": 1.1059238344529766e-06, "loss": 0.0008, "step": 4489 }, { "epoch": 8.54, "grad_norm": 0.18378740549087524, "learning_rate": 1.1031108029639759e-06, "loss": 0.0005, "step": 4490 }, { "epoch": 8.54, "grad_norm": 0.4508519768714905, "learning_rate": 1.1003011447908107e-06, "loss": 0.0016, "step": 4491 }, { "epoch": 8.54, "grad_norm": 0.6321935057640076, "learning_rate": 1.0974948609987824e-06, "loss": 0.0016, "step": 4492 }, { "epoch": 8.54, "grad_norm": 0.6574958562850952, "learning_rate": 1.0946919526519118e-06, "loss": 0.0017, "step": 4493 }, { "epoch": 8.54, "grad_norm": 0.1922304481267929, "learning_rate": 1.091892420812939e-06, "loss": 0.0005, "step": 4494 }, { "epoch": 8.55, "grad_norm": 0.06049855425953865, "learning_rate": 1.0890962665433268e-06, "loss": 0.0004, "step": 4495 }, { "epoch": 8.55, "grad_norm": 0.4413755536079407, "learning_rate": 1.0863034909032565e-06, "loss": 0.0008, "step": 4496 }, { "epoch": 8.55, "grad_norm": 0.15745529532432556, "learning_rate": 1.0835140949516254e-06, "loss": 0.0011, "step": 4497 }, { "epoch": 8.55, "grad_norm": 0.2455398589372635, "learning_rate": 1.0807280797460573e-06, "loss": 0.0007, "step": 4498 }, { "epoch": 8.55, "grad_norm": 0.2593502700328827, "learning_rate": 1.0779454463428819e-06, "loss": 0.0006, "step": 4499 }, { "epoch": 8.56, "grad_norm": 0.7636024355888367, "learning_rate": 1.0751661957971581e-06, "loss": 0.0037, "step": 4500 }, { "epoch": 8.56, "eval_blimp_filtered_avg": 0.7376119402985075, "eval_blimp_filtered_std": 0.004832603583247732, "step": 4500 }, { "epoch": 8.56, "eval_blimp_supplement_avg": 0.7974137931034483, "eval_blimp_supplement_std": 0.017640361767009737, "step": 4500 }, { "epoch": 8.56, "eval_vqa_filtered_avg": 0.37, "eval_vqa_filtered_std": 0.048523658709391, "step": 4500 }, { "epoch": 8.56, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 4500 }, { "epoch": 8.56, "grad_norm": 0.18212753534317017, "learning_rate": 1.0723903291626569e-06, "loss": 0.0007, "step": 4501 }, { "epoch": 8.56, "grad_norm": 0.02559029497206211, "learning_rate": 1.069617847491866e-06, "loss": 0.0002, "step": 4502 }, { "epoch": 8.56, "grad_norm": 0.15272262692451477, "learning_rate": 1.0668487518359894e-06, "loss": 0.0006, "step": 4503 }, { "epoch": 8.56, "grad_norm": 0.121881403028965, "learning_rate": 1.0640830432449534e-06, "loss": 0.0005, "step": 4504 }, { "epoch": 8.56, "grad_norm": 0.5184833407402039, "learning_rate": 1.0613207227673906e-06, "loss": 0.0043, "step": 4505 }, { "epoch": 8.57, "grad_norm": 0.12964968383312225, "learning_rate": 1.058561791450653e-06, "loss": 0.0009, "step": 4506 }, { "epoch": 8.57, "grad_norm": 0.06927710771560669, "learning_rate": 1.055806250340813e-06, "loss": 0.0003, "step": 4507 }, { "epoch": 8.57, "grad_norm": 0.7998728156089783, "learning_rate": 1.0530541004826456e-06, "loss": 0.0038, "step": 4508 }, { "epoch": 8.57, "grad_norm": 0.3232382833957672, "learning_rate": 1.050305342919653e-06, "loss": 0.0016, "step": 4509 }, { "epoch": 8.57, "grad_norm": 0.019174546003341675, "learning_rate": 1.0475599786940438e-06, "loss": 0.0001, "step": 4510 }, { "epoch": 8.58, "grad_norm": 1.0784131288528442, "learning_rate": 1.0448180088467408e-06, "loss": 0.0033, "step": 4511 }, { "epoch": 8.58, "grad_norm": 0.024921895936131477, "learning_rate": 1.0420794344173813e-06, "loss": 0.0002, "step": 4512 }, { "epoch": 8.58, "grad_norm": 0.12627799808979034, "learning_rate": 1.0393442564443124e-06, "loss": 0.0003, "step": 4513 }, { "epoch": 8.58, "grad_norm": 0.1138378158211708, "learning_rate": 1.0366124759645956e-06, "loss": 0.0006, "step": 4514 }, { "epoch": 8.58, "grad_norm": 1.0394772291183472, "learning_rate": 1.0338840940140083e-06, "loss": 0.0087, "step": 4515 }, { "epoch": 8.59, "grad_norm": 0.23510602116584778, "learning_rate": 1.0311591116270282e-06, "loss": 0.0005, "step": 4516 }, { "epoch": 8.59, "grad_norm": 0.17184491455554962, "learning_rate": 1.0284375298368555e-06, "loss": 0.0017, "step": 4517 }, { "epoch": 8.59, "grad_norm": 0.744900643825531, "learning_rate": 1.0257193496753959e-06, "loss": 0.0026, "step": 4518 }, { "epoch": 8.59, "grad_norm": 0.7991925477981567, "learning_rate": 1.023004572173265e-06, "loss": 0.0011, "step": 4519 }, { "epoch": 8.59, "grad_norm": 0.648861825466156, "learning_rate": 1.0202931983597896e-06, "loss": 0.0012, "step": 4520 }, { "epoch": 8.6, "grad_norm": 0.13598337769508362, "learning_rate": 1.0175852292630051e-06, "loss": 0.0004, "step": 4521 }, { "epoch": 8.6, "grad_norm": 0.08375159651041031, "learning_rate": 1.0148806659096554e-06, "loss": 0.0003, "step": 4522 }, { "epoch": 8.6, "grad_norm": 0.11968885362148285, "learning_rate": 1.012179509325194e-06, "loss": 0.0007, "step": 4523 }, { "epoch": 8.6, "grad_norm": 0.2948761284351349, "learning_rate": 1.009481760533787e-06, "loss": 0.0007, "step": 4524 }, { "epoch": 8.6, "grad_norm": 0.39043667912483215, "learning_rate": 1.0067874205582974e-06, "loss": 0.001, "step": 4525 }, { "epoch": 8.6, "grad_norm": 0.36747318506240845, "learning_rate": 1.0040964904203077e-06, "loss": 0.0016, "step": 4526 }, { "epoch": 8.61, "grad_norm": 1.6253434419631958, "learning_rate": 1.0014089711400998e-06, "loss": 0.0024, "step": 4527 }, { "epoch": 8.61, "grad_norm": 0.4242497384548187, "learning_rate": 9.987248637366664e-07, "loss": 0.0009, "step": 4528 }, { "epoch": 8.61, "grad_norm": 0.11321121454238892, "learning_rate": 9.960441692277033e-07, "loss": 0.0005, "step": 4529 }, { "epoch": 8.61, "grad_norm": 0.25040385127067566, "learning_rate": 9.933668886296155e-07, "loss": 0.0008, "step": 4530 }, { "epoch": 8.61, "grad_norm": 0.4605788290500641, "learning_rate": 9.906930229575119e-07, "loss": 0.002, "step": 4531 }, { "epoch": 8.62, "grad_norm": 0.11621037125587463, "learning_rate": 9.880225732252036e-07, "loss": 0.0006, "step": 4532 }, { "epoch": 8.62, "grad_norm": 0.01262626051902771, "learning_rate": 9.853555404452164e-07, "loss": 0.0001, "step": 4533 }, { "epoch": 8.62, "grad_norm": 0.17559842765331268, "learning_rate": 9.826919256287659e-07, "loss": 0.0012, "step": 4534 }, { "epoch": 8.62, "grad_norm": 0.1831952929496765, "learning_rate": 9.800317297857865e-07, "loss": 0.001, "step": 4535 }, { "epoch": 8.62, "grad_norm": 0.36377131938934326, "learning_rate": 9.773749539249056e-07, "loss": 0.0009, "step": 4536 }, { "epoch": 8.63, "grad_norm": 0.31949159502983093, "learning_rate": 9.747215990534586e-07, "loss": 0.0013, "step": 4537 }, { "epoch": 8.63, "grad_norm": 0.9964385032653809, "learning_rate": 9.720716661774842e-07, "loss": 0.0023, "step": 4538 }, { "epoch": 8.63, "grad_norm": 1.1644556522369385, "learning_rate": 9.69425156301721e-07, "loss": 0.002, "step": 4539 }, { "epoch": 8.63, "grad_norm": 0.34877529740333557, "learning_rate": 9.667820704296115e-07, "loss": 0.0025, "step": 4540 }, { "epoch": 8.63, "grad_norm": 0.1472691297531128, "learning_rate": 9.641424095633e-07, "loss": 0.0007, "step": 4541 }, { "epoch": 8.63, "grad_norm": 0.09140437841415405, "learning_rate": 9.615061747036313e-07, "loss": 0.0003, "step": 4542 }, { "epoch": 8.64, "grad_norm": 0.058957137167453766, "learning_rate": 9.588733668501504e-07, "loss": 0.0003, "step": 4543 }, { "epoch": 8.64, "grad_norm": 0.19230695068836212, "learning_rate": 9.562439870011097e-07, "loss": 0.0013, "step": 4544 }, { "epoch": 8.64, "grad_norm": 0.047976940870285034, "learning_rate": 9.536180361534475e-07, "loss": 0.0002, "step": 4545 }, { "epoch": 8.64, "grad_norm": 0.3405960202217102, "learning_rate": 9.509955153028194e-07, "loss": 0.0012, "step": 4546 }, { "epoch": 8.64, "grad_norm": 0.30944743752479553, "learning_rate": 9.483764254435668e-07, "loss": 0.001, "step": 4547 }, { "epoch": 8.65, "grad_norm": 0.07354913651943207, "learning_rate": 9.457607675687375e-07, "loss": 0.0004, "step": 4548 }, { "epoch": 8.65, "grad_norm": 0.507630467414856, "learning_rate": 9.431485426700737e-07, "loss": 0.0011, "step": 4549 }, { "epoch": 8.65, "grad_norm": 0.06408625841140747, "learning_rate": 9.405397517380232e-07, "loss": 0.0003, "step": 4550 }, { "epoch": 8.65, "grad_norm": 0.11449747532606125, "learning_rate": 9.379343957617226e-07, "loss": 0.0005, "step": 4551 }, { "epoch": 8.65, "grad_norm": 1.056908369064331, "learning_rate": 9.353324757290084e-07, "loss": 0.0058, "step": 4552 }, { "epoch": 8.66, "grad_norm": 0.2556059956550598, "learning_rate": 9.327339926264223e-07, "loss": 0.0013, "step": 4553 }, { "epoch": 8.66, "grad_norm": 0.28089454770088196, "learning_rate": 9.301389474391898e-07, "loss": 0.0011, "step": 4554 }, { "epoch": 8.66, "grad_norm": 0.16147194802761078, "learning_rate": 9.275473411512448e-07, "loss": 0.0005, "step": 4555 }, { "epoch": 8.66, "grad_norm": 0.260076105594635, "learning_rate": 9.249591747452125e-07, "loss": 0.0009, "step": 4556 }, { "epoch": 8.66, "grad_norm": 0.07110165059566498, "learning_rate": 9.223744492024111e-07, "loss": 0.0004, "step": 4557 }, { "epoch": 8.67, "grad_norm": 0.17036981880664825, "learning_rate": 9.197931655028558e-07, "loss": 0.0015, "step": 4558 }, { "epoch": 8.67, "grad_norm": 0.43534567952156067, "learning_rate": 9.172153246252647e-07, "loss": 0.0007, "step": 4559 }, { "epoch": 8.67, "grad_norm": 0.10372278094291687, "learning_rate": 9.146409275470348e-07, "loss": 0.0003, "step": 4560 }, { "epoch": 8.67, "grad_norm": 0.1433970183134079, "learning_rate": 9.120699752442741e-07, "loss": 0.0002, "step": 4561 }, { "epoch": 8.67, "grad_norm": 0.2220367193222046, "learning_rate": 9.095024686917686e-07, "loss": 0.0024, "step": 4562 }, { "epoch": 8.67, "grad_norm": 0.5731876492500305, "learning_rate": 9.069384088630117e-07, "loss": 0.0025, "step": 4563 }, { "epoch": 8.68, "grad_norm": 0.8629439473152161, "learning_rate": 9.043777967301837e-07, "loss": 0.0016, "step": 4564 }, { "epoch": 8.68, "grad_norm": 0.2753264307975769, "learning_rate": 9.018206332641555e-07, "loss": 0.0007, "step": 4565 }, { "epoch": 8.68, "grad_norm": 0.30492717027664185, "learning_rate": 8.992669194344949e-07, "loss": 0.0029, "step": 4566 }, { "epoch": 8.68, "grad_norm": 0.06960004568099976, "learning_rate": 8.967166562094586e-07, "loss": 0.0003, "step": 4567 }, { "epoch": 8.68, "grad_norm": 0.3734361529350281, "learning_rate": 8.941698445559965e-07, "loss": 0.0017, "step": 4568 }, { "epoch": 8.69, "grad_norm": 0.25354427099227905, "learning_rate": 8.916264854397483e-07, "loss": 0.0005, "step": 4569 }, { "epoch": 8.69, "grad_norm": 0.2305360585451126, "learning_rate": 8.890865798250503e-07, "loss": 0.0008, "step": 4570 }, { "epoch": 8.69, "grad_norm": 0.11854898184537888, "learning_rate": 8.86550128674919e-07, "loss": 0.0007, "step": 4571 }, { "epoch": 8.69, "grad_norm": 0.12019939720630646, "learning_rate": 8.840171329510705e-07, "loss": 0.0005, "step": 4572 }, { "epoch": 8.69, "grad_norm": 0.3344084620475769, "learning_rate": 8.814875936139078e-07, "loss": 0.0007, "step": 4573 }, { "epoch": 8.7, "grad_norm": 0.29142096638679504, "learning_rate": 8.789615116225214e-07, "loss": 0.0008, "step": 4574 }, { "epoch": 8.7, "grad_norm": 0.11992885172367096, "learning_rate": 8.76438887934693e-07, "loss": 0.0004, "step": 4575 }, { "epoch": 8.7, "grad_norm": 0.6202839016914368, "learning_rate": 8.739197235068919e-07, "loss": 0.0016, "step": 4576 }, { "epoch": 8.7, "grad_norm": 0.024331066757440567, "learning_rate": 8.714040192942763e-07, "loss": 0.0002, "step": 4577 }, { "epoch": 8.7, "grad_norm": 0.1205788180232048, "learning_rate": 8.68891776250691e-07, "loss": 0.0004, "step": 4578 }, { "epoch": 8.71, "grad_norm": 0.07880747318267822, "learning_rate": 8.663829953286762e-07, "loss": 0.0004, "step": 4579 }, { "epoch": 8.71, "grad_norm": 0.02207680232822895, "learning_rate": 8.638776774794456e-07, "loss": 0.0002, "step": 4580 }, { "epoch": 8.71, "grad_norm": 0.2181522697210312, "learning_rate": 8.613758236529113e-07, "loss": 0.0005, "step": 4581 }, { "epoch": 8.71, "grad_norm": 1.0758181810379028, "learning_rate": 8.58877434797668e-07, "loss": 0.0032, "step": 4582 }, { "epoch": 8.71, "grad_norm": 0.29206693172454834, "learning_rate": 8.56382511860997e-07, "loss": 0.0011, "step": 4583 }, { "epoch": 8.71, "grad_norm": 0.10079033672809601, "learning_rate": 8.538910557888635e-07, "loss": 0.0003, "step": 4584 }, { "epoch": 8.72, "grad_norm": 0.01612163335084915, "learning_rate": 8.514030675259221e-07, "loss": 0.0001, "step": 4585 }, { "epoch": 8.72, "grad_norm": 0.8214495778083801, "learning_rate": 8.489185480155082e-07, "loss": 0.0022, "step": 4586 }, { "epoch": 8.72, "grad_norm": 0.06163203343749046, "learning_rate": 8.464374981996459e-07, "loss": 0.0004, "step": 4587 }, { "epoch": 8.72, "grad_norm": 0.39319565892219543, "learning_rate": 8.439599190190417e-07, "loss": 0.0016, "step": 4588 }, { "epoch": 8.72, "grad_norm": 0.4133671820163727, "learning_rate": 8.414858114130842e-07, "loss": 0.0012, "step": 4589 }, { "epoch": 8.73, "grad_norm": 0.41708609461784363, "learning_rate": 8.390151763198529e-07, "loss": 0.0013, "step": 4590 }, { "epoch": 8.73, "grad_norm": 0.06427033245563507, "learning_rate": 8.365480146761007e-07, "loss": 0.0003, "step": 4591 }, { "epoch": 8.73, "grad_norm": 0.6929601430892944, "learning_rate": 8.340843274172728e-07, "loss": 0.0024, "step": 4592 }, { "epoch": 8.73, "grad_norm": 0.06945943832397461, "learning_rate": 8.316241154774907e-07, "loss": 0.0002, "step": 4593 }, { "epoch": 8.73, "grad_norm": 0.2623699903488159, "learning_rate": 8.291673797895616e-07, "loss": 0.001, "step": 4594 }, { "epoch": 8.74, "grad_norm": 0.9821833968162537, "learning_rate": 8.267141212849705e-07, "loss": 0.0018, "step": 4595 }, { "epoch": 8.74, "grad_norm": 0.045104019343853, "learning_rate": 8.242643408938922e-07, "loss": 0.0003, "step": 4596 }, { "epoch": 8.74, "grad_norm": 0.7488456964492798, "learning_rate": 8.218180395451736e-07, "loss": 0.0047, "step": 4597 }, { "epoch": 8.74, "grad_norm": 0.16233707964420319, "learning_rate": 8.193752181663461e-07, "loss": 0.0013, "step": 4598 }, { "epoch": 8.74, "grad_norm": 0.5491921901702881, "learning_rate": 8.169358776836267e-07, "loss": 0.0009, "step": 4599 }, { "epoch": 8.75, "grad_norm": 0.22789433598518372, "learning_rate": 8.145000190218999e-07, "loss": 0.0005, "step": 4600 }, { "epoch": 8.75, "eval_blimp_filtered_avg": 0.7379104477611941, "eval_blimp_filtered_std": 0.004837862257192662, "step": 4600 }, { "epoch": 8.75, "eval_blimp_supplement_avg": 0.7952586206896551, "eval_blimp_supplement_std": 0.0177088511968717, "step": 4600 }, { "epoch": 8.75, "eval_vqa_filtered_avg": 0.35, "eval_vqa_filtered_std": 0.047937248544110196, "step": 4600 }, { "epoch": 8.75, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 4600 }, { "epoch": 8.75, "grad_norm": 0.18191704154014587, "learning_rate": 8.120676431047459e-07, "loss": 0.0014, "step": 4601 }, { "epoch": 8.75, "grad_norm": 0.24610891938209534, "learning_rate": 8.096387508544123e-07, "loss": 0.0008, "step": 4602 }, { "epoch": 8.75, "grad_norm": 0.3966616690158844, "learning_rate": 8.072133431918317e-07, "loss": 0.0044, "step": 4603 }, { "epoch": 8.75, "grad_norm": 0.05350183695554733, "learning_rate": 8.047914210366104e-07, "loss": 0.0002, "step": 4604 }, { "epoch": 8.75, "grad_norm": 0.17934012413024902, "learning_rate": 8.023729853070439e-07, "loss": 0.0004, "step": 4605 }, { "epoch": 8.76, "grad_norm": 0.8028424978256226, "learning_rate": 7.999580369200899e-07, "loss": 0.0018, "step": 4606 }, { "epoch": 8.76, "grad_norm": 0.2593037784099579, "learning_rate": 7.975465767913981e-07, "loss": 0.0017, "step": 4607 }, { "epoch": 8.76, "grad_norm": 1.0279871225357056, "learning_rate": 7.951386058352895e-07, "loss": 0.0026, "step": 4608 }, { "epoch": 8.76, "grad_norm": 0.1598934829235077, "learning_rate": 7.927341249647602e-07, "loss": 0.0006, "step": 4609 }, { "epoch": 8.76, "grad_norm": 0.035165440291166306, "learning_rate": 7.903331350914867e-07, "loss": 0.0001, "step": 4610 }, { "epoch": 8.77, "grad_norm": 0.7998136878013611, "learning_rate": 7.879356371258218e-07, "loss": 0.0065, "step": 4611 }, { "epoch": 8.77, "grad_norm": 0.4842442274093628, "learning_rate": 7.855416319767905e-07, "loss": 0.0019, "step": 4612 }, { "epoch": 8.77, "grad_norm": 0.3124077618122101, "learning_rate": 7.831511205520981e-07, "loss": 0.0013, "step": 4613 }, { "epoch": 8.77, "grad_norm": 0.08975119888782501, "learning_rate": 7.807641037581226e-07, "loss": 0.0006, "step": 4614 }, { "epoch": 8.77, "grad_norm": 0.11143851280212402, "learning_rate": 7.783805824999158e-07, "loss": 0.0003, "step": 4615 }, { "epoch": 8.78, "grad_norm": 0.08092871308326721, "learning_rate": 7.760005576812113e-07, "loss": 0.0003, "step": 4616 }, { "epoch": 8.78, "grad_norm": 0.41683584451675415, "learning_rate": 7.73624030204404e-07, "loss": 0.0027, "step": 4617 }, { "epoch": 8.78, "grad_norm": 1.2435330152511597, "learning_rate": 7.712510009705765e-07, "loss": 0.0025, "step": 4618 }, { "epoch": 8.78, "grad_norm": 0.5179740786552429, "learning_rate": 7.688814708794767e-07, "loss": 0.0012, "step": 4619 }, { "epoch": 8.78, "grad_norm": 0.08107495307922363, "learning_rate": 7.665154408295284e-07, "loss": 0.0004, "step": 4620 }, { "epoch": 8.79, "grad_norm": 0.07895413041114807, "learning_rate": 7.641529117178271e-07, "loss": 0.0004, "step": 4621 }, { "epoch": 8.79, "grad_norm": 0.15111252665519714, "learning_rate": 7.617938844401429e-07, "loss": 0.0006, "step": 4622 }, { "epoch": 8.79, "grad_norm": 0.4766933023929596, "learning_rate": 7.59438359890916e-07, "loss": 0.0013, "step": 4623 }, { "epoch": 8.79, "grad_norm": 0.7709715962409973, "learning_rate": 7.570863389632588e-07, "loss": 0.0035, "step": 4624 }, { "epoch": 8.79, "grad_norm": 0.163069486618042, "learning_rate": 7.547378225489599e-07, "loss": 0.0006, "step": 4625 }, { "epoch": 8.79, "grad_norm": 0.09499335289001465, "learning_rate": 7.523928115384682e-07, "loss": 0.0006, "step": 4626 }, { "epoch": 8.8, "grad_norm": 0.20496493577957153, "learning_rate": 7.500513068209181e-07, "loss": 0.0009, "step": 4627 }, { "epoch": 8.8, "grad_norm": 0.13346628844738007, "learning_rate": 7.477133092841026e-07, "loss": 0.0007, "step": 4628 }, { "epoch": 8.8, "grad_norm": 0.0731731653213501, "learning_rate": 7.453788198144895e-07, "loss": 0.0005, "step": 4629 }, { "epoch": 8.8, "grad_norm": 0.3514834940433502, "learning_rate": 7.430478392972185e-07, "loss": 0.0008, "step": 4630 }, { "epoch": 8.8, "grad_norm": 0.2886366844177246, "learning_rate": 7.407203686160946e-07, "loss": 0.0004, "step": 4631 }, { "epoch": 8.81, "grad_norm": 0.30236223340034485, "learning_rate": 7.383964086535944e-07, "loss": 0.0033, "step": 4632 }, { "epoch": 8.81, "grad_norm": 0.10698659718036652, "learning_rate": 7.360759602908618e-07, "loss": 0.0005, "step": 4633 }, { "epoch": 8.81, "grad_norm": 0.35370081663131714, "learning_rate": 7.337590244077164e-07, "loss": 0.0007, "step": 4634 }, { "epoch": 8.81, "grad_norm": 0.6895363330841064, "learning_rate": 7.314456018826322e-07, "loss": 0.0013, "step": 4635 }, { "epoch": 8.81, "grad_norm": 0.22137831151485443, "learning_rate": 7.291356935927651e-07, "loss": 0.0006, "step": 4636 }, { "epoch": 8.82, "grad_norm": 0.11479948461055756, "learning_rate": 7.268293004139271e-07, "loss": 0.0005, "step": 4637 }, { "epoch": 8.82, "grad_norm": 0.1419840157032013, "learning_rate": 7.245264232206073e-07, "loss": 0.0008, "step": 4638 }, { "epoch": 8.82, "grad_norm": 0.1574755758047104, "learning_rate": 7.222270628859562e-07, "loss": 0.0004, "step": 4639 }, { "epoch": 8.82, "grad_norm": 0.582170307636261, "learning_rate": 7.19931220281791e-07, "loss": 0.0009, "step": 4640 }, { "epoch": 8.82, "grad_norm": 0.05214209482073784, "learning_rate": 7.176388962785951e-07, "loss": 0.0002, "step": 4641 }, { "epoch": 8.83, "grad_norm": 0.06942916661500931, "learning_rate": 7.153500917455225e-07, "loss": 0.0004, "step": 4642 }, { "epoch": 8.83, "grad_norm": 0.12052622437477112, "learning_rate": 7.130648075503844e-07, "loss": 0.0007, "step": 4643 }, { "epoch": 8.83, "grad_norm": 0.900768518447876, "learning_rate": 7.107830445596631e-07, "loss": 0.0029, "step": 4644 }, { "epoch": 8.83, "grad_norm": 0.08490339666604996, "learning_rate": 7.085048036385079e-07, "loss": 0.0005, "step": 4645 }, { "epoch": 8.83, "grad_norm": 0.2959882616996765, "learning_rate": 7.062300856507232e-07, "loss": 0.0007, "step": 4646 }, { "epoch": 8.83, "grad_norm": 0.8497399091720581, "learning_rate": 7.039588914587892e-07, "loss": 0.0033, "step": 4647 }, { "epoch": 8.84, "grad_norm": 0.3419683277606964, "learning_rate": 7.016912219238414e-07, "loss": 0.0007, "step": 4648 }, { "epoch": 8.84, "grad_norm": 0.22435231506824493, "learning_rate": 6.994270779056833e-07, "loss": 0.0005, "step": 4649 }, { "epoch": 8.84, "grad_norm": 0.6575555205345154, "learning_rate": 6.971664602627792e-07, "loss": 0.0023, "step": 4650 }, { "epoch": 8.84, "grad_norm": 0.6127040386199951, "learning_rate": 6.949093698522613e-07, "loss": 0.0058, "step": 4651 }, { "epoch": 8.84, "grad_norm": 0.15414728224277496, "learning_rate": 6.92655807529915e-07, "loss": 0.0004, "step": 4652 }, { "epoch": 8.85, "grad_norm": 0.04841246455907822, "learning_rate": 6.90405774150199e-07, "loss": 0.0002, "step": 4653 }, { "epoch": 8.85, "grad_norm": 0.19958943128585815, "learning_rate": 6.881592705662266e-07, "loss": 0.0012, "step": 4654 }, { "epoch": 8.85, "grad_norm": 1.1625386476516724, "learning_rate": 6.85916297629774e-07, "loss": 0.0051, "step": 4655 }, { "epoch": 8.85, "grad_norm": 0.6148843169212341, "learning_rate": 6.836768561912799e-07, "loss": 0.0021, "step": 4656 }, { "epoch": 8.85, "grad_norm": 0.12476001679897308, "learning_rate": 6.814409470998451e-07, "loss": 0.0011, "step": 4657 }, { "epoch": 8.86, "grad_norm": 0.12169189751148224, "learning_rate": 6.792085712032281e-07, "loss": 0.0003, "step": 4658 }, { "epoch": 8.86, "grad_norm": 0.2607518434524536, "learning_rate": 6.769797293478486e-07, "loss": 0.0019, "step": 4659 }, { "epoch": 8.86, "grad_norm": 0.20198293030261993, "learning_rate": 6.747544223787917e-07, "loss": 0.0017, "step": 4660 }, { "epoch": 8.86, "grad_norm": 0.112950898706913, "learning_rate": 6.725326511397923e-07, "loss": 0.0004, "step": 4661 }, { "epoch": 8.86, "grad_norm": 0.29652926325798035, "learning_rate": 6.703144164732534e-07, "loss": 0.0004, "step": 4662 }, { "epoch": 8.87, "grad_norm": 0.22313876450061798, "learning_rate": 6.680997192202299e-07, "loss": 0.0012, "step": 4663 }, { "epoch": 8.87, "grad_norm": 0.8335538506507874, "learning_rate": 6.658885602204446e-07, "loss": 0.0031, "step": 4664 }, { "epoch": 8.87, "grad_norm": 0.23714786767959595, "learning_rate": 6.636809403122691e-07, "loss": 0.0012, "step": 4665 }, { "epoch": 8.87, "grad_norm": 0.026527730748057365, "learning_rate": 6.614768603327393e-07, "loss": 0.0001, "step": 4666 }, { "epoch": 8.87, "grad_norm": 0.22717155516147614, "learning_rate": 6.592763211175468e-07, "loss": 0.001, "step": 4667 }, { "epoch": 8.87, "grad_norm": 0.10708031803369522, "learning_rate": 6.570793235010408e-07, "loss": 0.0005, "step": 4668 }, { "epoch": 8.88, "grad_norm": 0.029907915741205215, "learning_rate": 6.548858683162285e-07, "loss": 0.0002, "step": 4669 }, { "epoch": 8.88, "grad_norm": 0.06958063691854477, "learning_rate": 6.52695956394771e-07, "loss": 0.0003, "step": 4670 }, { "epoch": 8.88, "grad_norm": 0.21922291815280914, "learning_rate": 6.505095885669921e-07, "loss": 0.002, "step": 4671 }, { "epoch": 8.88, "grad_norm": 1.0751022100448608, "learning_rate": 6.483267656618641e-07, "loss": 0.0022, "step": 4672 }, { "epoch": 8.88, "grad_norm": 0.24681003391742706, "learning_rate": 6.461474885070218e-07, "loss": 0.0012, "step": 4673 }, { "epoch": 8.89, "grad_norm": 0.7218507528305054, "learning_rate": 6.439717579287519e-07, "loss": 0.0009, "step": 4674 }, { "epoch": 8.89, "grad_norm": 0.48566678166389465, "learning_rate": 6.417995747519967e-07, "loss": 0.0006, "step": 4675 }, { "epoch": 8.89, "grad_norm": 0.42712560296058655, "learning_rate": 6.396309398003564e-07, "loss": 0.0014, "step": 4676 }, { "epoch": 8.89, "grad_norm": 0.2083093822002411, "learning_rate": 6.37465853896081e-07, "loss": 0.0006, "step": 4677 }, { "epoch": 8.89, "grad_norm": 0.16028940677642822, "learning_rate": 6.353043178600793e-07, "loss": 0.0004, "step": 4678 }, { "epoch": 8.9, "grad_norm": 0.4976688027381897, "learning_rate": 6.331463325119092e-07, "loss": 0.0071, "step": 4679 }, { "epoch": 8.9, "grad_norm": 1.6826565265655518, "learning_rate": 6.309918986697916e-07, "loss": 0.0018, "step": 4680 }, { "epoch": 8.9, "grad_norm": 0.8925759792327881, "learning_rate": 6.288410171505887e-07, "loss": 0.0058, "step": 4681 }, { "epoch": 8.9, "grad_norm": 0.1434478908777237, "learning_rate": 6.26693688769825e-07, "loss": 0.0006, "step": 4682 }, { "epoch": 8.9, "grad_norm": 0.188623309135437, "learning_rate": 6.245499143416733e-07, "loss": 0.0007, "step": 4683 }, { "epoch": 8.9, "grad_norm": 0.04064309597015381, "learning_rate": 6.224096946789615e-07, "loss": 0.0003, "step": 4684 }, { "epoch": 8.91, "grad_norm": 0.2179836481809616, "learning_rate": 6.20273030593167e-07, "loss": 0.0007, "step": 4685 }, { "epoch": 8.91, "grad_norm": 0.1944364309310913, "learning_rate": 6.181399228944218e-07, "loss": 0.001, "step": 4686 }, { "epoch": 8.91, "grad_norm": 0.07959677278995514, "learning_rate": 6.160103723915067e-07, "loss": 0.0004, "step": 4687 }, { "epoch": 8.91, "grad_norm": 0.8050590753555298, "learning_rate": 6.138843798918559e-07, "loss": 0.0036, "step": 4688 }, { "epoch": 8.91, "grad_norm": 0.04145471751689911, "learning_rate": 6.117619462015533e-07, "loss": 0.0003, "step": 4689 }, { "epoch": 8.92, "grad_norm": 0.39414000511169434, "learning_rate": 6.096430721253343e-07, "loss": 0.001, "step": 4690 }, { "epoch": 8.92, "grad_norm": 0.06256009638309479, "learning_rate": 6.075277584665862e-07, "loss": 0.0004, "step": 4691 }, { "epoch": 8.92, "grad_norm": 0.12218333780765533, "learning_rate": 6.054160060273406e-07, "loss": 0.0004, "step": 4692 }, { "epoch": 8.92, "grad_norm": 0.05890003591775894, "learning_rate": 6.033078156082873e-07, "loss": 0.0003, "step": 4693 }, { "epoch": 8.92, "grad_norm": 0.19066080451011658, "learning_rate": 6.012031880087577e-07, "loss": 0.0011, "step": 4694 }, { "epoch": 8.93, "grad_norm": 0.22817151248455048, "learning_rate": 5.99102124026737e-07, "loss": 0.0015, "step": 4695 }, { "epoch": 8.93, "grad_norm": 0.1525169461965561, "learning_rate": 5.970046244588556e-07, "loss": 0.0008, "step": 4696 }, { "epoch": 8.93, "grad_norm": 0.6713542938232422, "learning_rate": 5.94910690100401e-07, "loss": 0.0007, "step": 4697 }, { "epoch": 8.93, "grad_norm": 0.36184167861938477, "learning_rate": 5.928203217452943e-07, "loss": 0.0007, "step": 4698 }, { "epoch": 8.93, "grad_norm": 0.2491466999053955, "learning_rate": 5.907335201861176e-07, "loss": 0.0007, "step": 4699 }, { "epoch": 8.94, "grad_norm": 0.07820779085159302, "learning_rate": 5.886502862140953e-07, "loss": 0.0003, "step": 4700 }, { "epoch": 8.94, "eval_blimp_filtered_avg": 0.7376119402985075, "eval_blimp_filtered_std": 0.004840187280759051, "step": 4700 }, { "epoch": 8.94, "eval_blimp_supplement_avg": 0.7974137931034483, "eval_blimp_supplement_std": 0.017640361767009737, "step": 4700 }, { "epoch": 8.94, "eval_vqa_filtered_avg": 0.37, "eval_vqa_filtered_std": 0.048523658709391, "step": 4700 }, { "epoch": 8.94, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 4700 }, { "epoch": 8.94, "grad_norm": 0.033070534467697144, "learning_rate": 5.865706206191002e-07, "loss": 0.0002, "step": 4701 }, { "epoch": 8.94, "grad_norm": 0.2175975739955902, "learning_rate": 5.844945241896505e-07, "loss": 0.0016, "step": 4702 }, { "epoch": 8.94, "grad_norm": 0.2185663878917694, "learning_rate": 5.824219977129119e-07, "loss": 0.0014, "step": 4703 }, { "epoch": 8.94, "grad_norm": 0.03569607436656952, "learning_rate": 5.803530419746972e-07, "loss": 0.0002, "step": 4704 }, { "epoch": 8.94, "grad_norm": 0.14501990377902985, "learning_rate": 5.782876577594643e-07, "loss": 0.0003, "step": 4705 }, { "epoch": 8.95, "grad_norm": 1.0810548067092896, "learning_rate": 5.762258458503222e-07, "loss": 0.0014, "step": 4706 }, { "epoch": 8.95, "grad_norm": 0.15709036588668823, "learning_rate": 5.741676070290136e-07, "loss": 0.0002, "step": 4707 }, { "epoch": 8.95, "grad_norm": 0.24237218499183655, "learning_rate": 5.721129420759386e-07, "loss": 0.0004, "step": 4708 }, { "epoch": 8.95, "grad_norm": 0.14375025033950806, "learning_rate": 5.700618517701362e-07, "loss": 0.0007, "step": 4709 }, { "epoch": 8.95, "grad_norm": 0.8035158514976501, "learning_rate": 5.68014336889291e-07, "loss": 0.0022, "step": 4710 }, { "epoch": 8.96, "grad_norm": 0.2330581247806549, "learning_rate": 5.65970398209732e-07, "loss": 0.0009, "step": 4711 }, { "epoch": 8.96, "grad_norm": 0.3193734884262085, "learning_rate": 5.639300365064337e-07, "loss": 0.0008, "step": 4712 }, { "epoch": 8.96, "grad_norm": 0.09596104174852371, "learning_rate": 5.618932525530107e-07, "loss": 0.0008, "step": 4713 }, { "epoch": 8.96, "grad_norm": 0.39466169476509094, "learning_rate": 5.598600471217252e-07, "loss": 0.0018, "step": 4714 }, { "epoch": 8.96, "grad_norm": 0.40141069889068604, "learning_rate": 5.578304209834806e-07, "loss": 0.0008, "step": 4715 }, { "epoch": 8.97, "grad_norm": 0.16131314635276794, "learning_rate": 5.558043749078212e-07, "loss": 0.0003, "step": 4716 }, { "epoch": 8.97, "grad_norm": 0.22766496241092682, "learning_rate": 5.537819096629416e-07, "loss": 0.0027, "step": 4717 }, { "epoch": 8.97, "grad_norm": 0.06325778365135193, "learning_rate": 5.517630260156659e-07, "loss": 0.0004, "step": 4718 }, { "epoch": 8.97, "grad_norm": 0.6099585890769958, "learning_rate": 5.497477247314731e-07, "loss": 0.0013, "step": 4719 }, { "epoch": 8.97, "grad_norm": 0.27264246344566345, "learning_rate": 5.477360065744763e-07, "loss": 0.0008, "step": 4720 }, { "epoch": 8.98, "grad_norm": 0.5155649781227112, "learning_rate": 5.457278723074333e-07, "loss": 0.0025, "step": 4721 }, { "epoch": 8.98, "grad_norm": 0.043737947940826416, "learning_rate": 5.437233226917393e-07, "loss": 0.0002, "step": 4722 }, { "epoch": 8.98, "grad_norm": 0.042599089443683624, "learning_rate": 5.417223584874354e-07, "loss": 0.0002, "step": 4723 }, { "epoch": 8.98, "grad_norm": 0.2669163644313812, "learning_rate": 5.39724980453199e-07, "loss": 0.0006, "step": 4724 }, { "epoch": 8.98, "grad_norm": 0.3438108563423157, "learning_rate": 5.3773118934635e-07, "loss": 0.0033, "step": 4725 }, { "epoch": 8.98, "grad_norm": 0.41516774892807007, "learning_rate": 5.357409859228491e-07, "loss": 0.0006, "step": 4726 }, { "epoch": 8.99, "grad_norm": 0.2813287675380707, "learning_rate": 5.337543709372927e-07, "loss": 0.0014, "step": 4727 }, { "epoch": 8.99, "grad_norm": 0.4087371826171875, "learning_rate": 5.317713451429218e-07, "loss": 0.001, "step": 4728 }, { "epoch": 8.99, "grad_norm": 0.2123159021139145, "learning_rate": 5.297919092916137e-07, "loss": 0.0006, "step": 4729 }, { "epoch": 8.99, "grad_norm": 0.424864262342453, "learning_rate": 5.278160641338848e-07, "loss": 0.002, "step": 4730 }, { "epoch": 8.99, "grad_norm": 0.8081618547439575, "learning_rate": 5.258438104188879e-07, "loss": 0.0031, "step": 4731 }, { "epoch": 9.0, "grad_norm": 0.20172899961471558, "learning_rate": 5.238751488944204e-07, "loss": 0.0006, "step": 4732 }, { "epoch": 9.0, "grad_norm": 0.14716079831123352, "learning_rate": 5.21910080306911e-07, "loss": 0.0005, "step": 4733 }, { "epoch": 9.0, "grad_norm": 0.1154310330748558, "learning_rate": 5.199486054014292e-07, "loss": 0.0002, "step": 4734 }, { "epoch": 9.0, "grad_norm": 0.1374177485704422, "learning_rate": 5.17990724921682e-07, "loss": 0.0004, "step": 4735 }, { "epoch": 9.0, "grad_norm": 0.6093526482582092, "learning_rate": 5.160364396100115e-07, "loss": 0.0012, "step": 4736 }, { "epoch": 9.01, "grad_norm": 0.11674202978610992, "learning_rate": 5.140857502074015e-07, "loss": 0.0004, "step": 4737 }, { "epoch": 9.01, "grad_norm": 0.1107538565993309, "learning_rate": 5.121386574534648e-07, "loss": 0.0006, "step": 4738 }, { "epoch": 9.01, "grad_norm": 1.4657249450683594, "learning_rate": 5.101951620864576e-07, "loss": 0.0045, "step": 4739 }, { "epoch": 9.01, "grad_norm": 0.4170711636543274, "learning_rate": 5.082552648432693e-07, "loss": 0.0018, "step": 4740 }, { "epoch": 9.01, "grad_norm": 0.2963908612728119, "learning_rate": 5.063189664594247e-07, "loss": 0.0008, "step": 4741 }, { "epoch": 9.02, "grad_norm": 0.047530338168144226, "learning_rate": 5.043862676690825e-07, "loss": 0.0002, "step": 4742 }, { "epoch": 9.02, "grad_norm": 0.1637575626373291, "learning_rate": 5.024571692050439e-07, "loss": 0.0013, "step": 4743 }, { "epoch": 9.02, "grad_norm": 0.03090512938797474, "learning_rate": 5.005316717987329e-07, "loss": 0.0002, "step": 4744 }, { "epoch": 9.02, "grad_norm": 0.05894993245601654, "learning_rate": 4.986097761802189e-07, "loss": 0.0003, "step": 4745 }, { "epoch": 9.02, "grad_norm": 0.814836323261261, "learning_rate": 4.96691483078201e-07, "loss": 0.0009, "step": 4746 }, { "epoch": 9.02, "grad_norm": 0.2811616361141205, "learning_rate": 4.947767932200143e-07, "loss": 0.0008, "step": 4747 }, { "epoch": 9.03, "grad_norm": 0.10812044143676758, "learning_rate": 4.928657073316234e-07, "loss": 0.0003, "step": 4748 }, { "epoch": 9.03, "grad_norm": 0.1869424283504486, "learning_rate": 4.909582261376322e-07, "loss": 0.0019, "step": 4749 }, { "epoch": 9.03, "grad_norm": 0.26636803150177, "learning_rate": 4.890543503612732e-07, "loss": 0.0011, "step": 4750 }, { "epoch": 9.03, "grad_norm": 0.24469436705112457, "learning_rate": 4.871540807244135e-07, "loss": 0.0004, "step": 4751 }, { "epoch": 9.03, "grad_norm": 0.9295972585678101, "learning_rate": 4.852574179475566e-07, "loss": 0.0017, "step": 4752 }, { "epoch": 9.04, "grad_norm": 0.5984472632408142, "learning_rate": 4.833643627498308e-07, "loss": 0.0009, "step": 4753 }, { "epoch": 9.04, "grad_norm": 0.44968485832214355, "learning_rate": 4.81474915849004e-07, "loss": 0.0009, "step": 4754 }, { "epoch": 9.04, "grad_norm": 0.15897396206855774, "learning_rate": 4.79589077961472e-07, "loss": 0.0004, "step": 4755 }, { "epoch": 9.04, "grad_norm": 0.29436323046684265, "learning_rate": 4.777068498022619e-07, "loss": 0.001, "step": 4756 }, { "epoch": 9.04, "grad_norm": 0.04751139134168625, "learning_rate": 4.7582823208503383e-07, "loss": 0.0003, "step": 4757 }, { "epoch": 9.05, "grad_norm": 0.3479086756706238, "learning_rate": 4.739532255220791e-07, "loss": 0.0024, "step": 4758 }, { "epoch": 9.05, "grad_norm": 0.0961330309510231, "learning_rate": 4.720818308243191e-07, "loss": 0.0003, "step": 4759 }, { "epoch": 9.05, "grad_norm": 0.062515489757061, "learning_rate": 4.7021404870130405e-07, "loss": 0.0003, "step": 4760 }, { "epoch": 9.05, "grad_norm": 0.06373703479766846, "learning_rate": 4.683498798612185e-07, "loss": 0.0003, "step": 4761 }, { "epoch": 9.05, "grad_norm": 0.13560113310813904, "learning_rate": 4.6648932501087154e-07, "loss": 0.0006, "step": 4762 }, { "epoch": 9.06, "grad_norm": 0.04583212733268738, "learning_rate": 4.646323848557099e-07, "loss": 0.0003, "step": 4763 }, { "epoch": 9.06, "grad_norm": 0.10344265401363373, "learning_rate": 4.6277906009980054e-07, "loss": 0.0004, "step": 4764 }, { "epoch": 9.06, "grad_norm": 0.6083114743232727, "learning_rate": 4.609293514458468e-07, "loss": 0.0011, "step": 4765 }, { "epoch": 9.06, "grad_norm": 0.10993566364049911, "learning_rate": 4.5908325959517773e-07, "loss": 0.0005, "step": 4766 }, { "epoch": 9.06, "grad_norm": 0.45908382534980774, "learning_rate": 4.572407852477512e-07, "loss": 0.0025, "step": 4767 }, { "epoch": 9.06, "grad_norm": 0.4133104681968689, "learning_rate": 4.5540192910215384e-07, "loss": 0.0022, "step": 4768 }, { "epoch": 9.07, "grad_norm": 0.030157769098877907, "learning_rate": 4.5356669185560033e-07, "loss": 0.0002, "step": 4769 }, { "epoch": 9.07, "grad_norm": 0.3052297830581665, "learning_rate": 4.5173507420393395e-07, "loss": 0.0025, "step": 4770 }, { "epoch": 9.07, "grad_norm": 0.04687649384140968, "learning_rate": 4.499070768416225e-07, "loss": 0.0003, "step": 4771 }, { "epoch": 9.07, "grad_norm": 0.20124702155590057, "learning_rate": 4.480827004617683e-07, "loss": 0.001, "step": 4772 }, { "epoch": 9.07, "grad_norm": 1.0045729875564575, "learning_rate": 4.462619457560913e-07, "loss": 0.003, "step": 4773 }, { "epoch": 9.08, "grad_norm": 0.08372555673122406, "learning_rate": 4.4444481341494595e-07, "loss": 0.0004, "step": 4774 }, { "epoch": 9.08, "grad_norm": 0.09396915137767792, "learning_rate": 4.426313041273089e-07, "loss": 0.0003, "step": 4775 }, { "epoch": 9.08, "grad_norm": 0.2856135666370392, "learning_rate": 4.4082141858078467e-07, "loss": 0.0006, "step": 4776 }, { "epoch": 9.08, "grad_norm": 0.14638899266719818, "learning_rate": 4.390151574616031e-07, "loss": 0.001, "step": 4777 }, { "epoch": 9.08, "grad_norm": 0.03694518655538559, "learning_rate": 4.37212521454623e-07, "loss": 0.0002, "step": 4778 }, { "epoch": 9.09, "grad_norm": 0.10376763343811035, "learning_rate": 4.3541351124332333e-07, "loss": 0.0004, "step": 4779 }, { "epoch": 9.09, "grad_norm": 0.12169143557548523, "learning_rate": 4.3361812750981057e-07, "loss": 0.0005, "step": 4780 }, { "epoch": 9.09, "grad_norm": 0.21008816361427307, "learning_rate": 4.3182637093482027e-07, "loss": 0.0004, "step": 4781 }, { "epoch": 9.09, "grad_norm": 0.4995923638343811, "learning_rate": 4.300382421977034e-07, "loss": 0.0019, "step": 4782 }, { "epoch": 9.09, "grad_norm": 0.1286098062992096, "learning_rate": 4.282537419764476e-07, "loss": 0.0004, "step": 4783 }, { "epoch": 9.1, "grad_norm": 0.03470555320382118, "learning_rate": 4.264728709476529e-07, "loss": 0.0001, "step": 4784 }, { "epoch": 9.1, "grad_norm": 0.12940680980682373, "learning_rate": 4.246956297865512e-07, "loss": 0.0003, "step": 4785 }, { "epoch": 9.1, "grad_norm": 0.8406385779380798, "learning_rate": 4.229220191669947e-07, "loss": 0.0011, "step": 4786 }, { "epoch": 9.1, "grad_norm": 0.17592158913612366, "learning_rate": 4.2115203976146104e-07, "loss": 0.0005, "step": 4787 }, { "epoch": 9.1, "grad_norm": 0.7135826349258423, "learning_rate": 4.193856922410466e-07, "loss": 0.0017, "step": 4788 }, { "epoch": 9.1, "grad_norm": 0.1462121605873108, "learning_rate": 4.176229772754803e-07, "loss": 0.0005, "step": 4789 }, { "epoch": 9.11, "grad_norm": 0.6011044383049011, "learning_rate": 4.158638955331007e-07, "loss": 0.0017, "step": 4790 }, { "epoch": 9.11, "grad_norm": 0.1288672834634781, "learning_rate": 4.141084476808799e-07, "loss": 0.0006, "step": 4791 }, { "epoch": 9.11, "grad_norm": 0.044399525970220566, "learning_rate": 4.1235663438440546e-07, "loss": 0.0002, "step": 4792 }, { "epoch": 9.11, "grad_norm": 0.2784285843372345, "learning_rate": 4.106084563078916e-07, "loss": 0.0012, "step": 4793 }, { "epoch": 9.11, "grad_norm": 0.46917930245399475, "learning_rate": 4.088639141141693e-07, "loss": 0.001, "step": 4794 }, { "epoch": 9.12, "grad_norm": 0.35472846031188965, "learning_rate": 4.071230084646949e-07, "loss": 0.0005, "step": 4795 }, { "epoch": 9.12, "grad_norm": 0.22464287281036377, "learning_rate": 4.0538574001954487e-07, "loss": 0.0008, "step": 4796 }, { "epoch": 9.12, "grad_norm": 0.055204931646585464, "learning_rate": 4.036521094374146e-07, "loss": 0.0003, "step": 4797 }, { "epoch": 9.12, "grad_norm": 1.476424217224121, "learning_rate": 4.01922117375626e-07, "loss": 0.0043, "step": 4798 }, { "epoch": 9.12, "grad_norm": 0.3631926476955414, "learning_rate": 4.001957644901122e-07, "loss": 0.0018, "step": 4799 }, { "epoch": 9.13, "grad_norm": 0.4112536907196045, "learning_rate": 3.98473051435434e-07, "loss": 0.0009, "step": 4800 }, { "epoch": 9.13, "eval_blimp_filtered_avg": 0.7368656716417911, "eval_blimp_filtered_std": 0.004842069731463618, "step": 4800 }, { "epoch": 9.13, "eval_blimp_supplement_avg": 0.7952586206896551, "eval_blimp_supplement_std": 0.0177088511968717, "step": 4800 }, { "epoch": 9.13, "eval_vqa_filtered_avg": 0.37, "eval_vqa_filtered_std": 0.04852365870939099, "step": 4800 }, { "epoch": 9.13, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 4800 }, { "epoch": 9.13, "grad_norm": 0.11939458549022675, "learning_rate": 3.9675397886477006e-07, "loss": 0.0008, "step": 4801 }, { "epoch": 9.13, "grad_norm": 0.3650364279747009, "learning_rate": 3.95038547429919e-07, "loss": 0.0014, "step": 4802 }, { "epoch": 9.13, "grad_norm": 0.40528562664985657, "learning_rate": 3.93326757781296e-07, "loss": 0.0016, "step": 4803 }, { "epoch": 9.13, "grad_norm": 0.2951503098011017, "learning_rate": 3.916186105679387e-07, "loss": 0.0007, "step": 4804 }, { "epoch": 9.13, "grad_norm": 0.1606767624616623, "learning_rate": 3.8991410643750336e-07, "loss": 0.0006, "step": 4805 }, { "epoch": 9.14, "grad_norm": 0.12389999628067017, "learning_rate": 3.882132460362631e-07, "loss": 0.0005, "step": 4806 }, { "epoch": 9.14, "grad_norm": 0.0819864571094513, "learning_rate": 3.8651603000911306e-07, "loss": 0.0004, "step": 4807 }, { "epoch": 9.14, "grad_norm": 1.3900731801986694, "learning_rate": 3.848224589995608e-07, "loss": 0.0021, "step": 4808 }, { "epoch": 9.14, "grad_norm": 0.25638318061828613, "learning_rate": 3.8313253364973933e-07, "loss": 0.0006, "step": 4809 }, { "epoch": 9.14, "grad_norm": 0.12796814739704132, "learning_rate": 3.8144625460038943e-07, "loss": 0.0005, "step": 4810 }, { "epoch": 9.15, "grad_norm": 0.4239066541194916, "learning_rate": 3.797636224908807e-07, "loss": 0.0007, "step": 4811 }, { "epoch": 9.15, "grad_norm": 0.030171750113368034, "learning_rate": 3.780846379591929e-07, "loss": 0.0002, "step": 4812 }, { "epoch": 9.15, "grad_norm": 0.10083284229040146, "learning_rate": 3.7640930164192347e-07, "loss": 0.0005, "step": 4813 }, { "epoch": 9.15, "grad_norm": 0.3766883313655853, "learning_rate": 3.747376141742887e-07, "loss": 0.001, "step": 4814 }, { "epoch": 9.15, "grad_norm": 0.06539294868707657, "learning_rate": 3.730695761901193e-07, "loss": 0.0003, "step": 4815 }, { "epoch": 9.16, "grad_norm": 0.024400778114795685, "learning_rate": 3.714051883218628e-07, "loss": 0.0002, "step": 4816 }, { "epoch": 9.16, "grad_norm": 0.45957061648368835, "learning_rate": 3.697444512005832e-07, "loss": 0.0012, "step": 4817 }, { "epoch": 9.16, "grad_norm": 0.25360867381095886, "learning_rate": 3.680873654559636e-07, "loss": 0.0004, "step": 4818 }, { "epoch": 9.16, "grad_norm": 0.04249800369143486, "learning_rate": 3.6643393171629346e-07, "loss": 0.0002, "step": 4819 }, { "epoch": 9.16, "grad_norm": 0.5276291370391846, "learning_rate": 3.647841506084882e-07, "loss": 0.0015, "step": 4820 }, { "epoch": 9.17, "grad_norm": 0.15056949853897095, "learning_rate": 3.6313802275807165e-07, "loss": 0.0005, "step": 4821 }, { "epoch": 9.17, "grad_norm": 0.6331552863121033, "learning_rate": 3.6149554878918467e-07, "loss": 0.0009, "step": 4822 }, { "epoch": 9.17, "grad_norm": 0.11855363100767136, "learning_rate": 3.5985672932458137e-07, "loss": 0.0006, "step": 4823 }, { "epoch": 9.17, "grad_norm": 0.31978169083595276, "learning_rate": 3.5822156498563353e-07, "loss": 0.0017, "step": 4824 }, { "epoch": 9.17, "grad_norm": 0.3419336676597595, "learning_rate": 3.5659005639232304e-07, "loss": 0.0032, "step": 4825 }, { "epoch": 9.17, "grad_norm": 0.06932321935892105, "learning_rate": 3.549622041632461e-07, "loss": 0.0005, "step": 4826 }, { "epoch": 9.18, "grad_norm": 0.09828192740678787, "learning_rate": 3.53338008915618e-07, "loss": 0.0005, "step": 4827 }, { "epoch": 9.18, "grad_norm": 0.019068971276283264, "learning_rate": 3.5171747126525823e-07, "loss": 0.0002, "step": 4828 }, { "epoch": 9.18, "grad_norm": 0.8605539798736572, "learning_rate": 3.501005918266087e-07, "loss": 0.0049, "step": 4829 }, { "epoch": 9.18, "grad_norm": 0.08573441952466965, "learning_rate": 3.4848737121271905e-07, "loss": 0.0004, "step": 4830 }, { "epoch": 9.18, "grad_norm": 0.5405794382095337, "learning_rate": 3.4687781003525324e-07, "loss": 0.0018, "step": 4831 }, { "epoch": 9.19, "grad_norm": 0.20041535794734955, "learning_rate": 3.4527190890448535e-07, "loss": 0.0007, "step": 4832 }, { "epoch": 9.19, "grad_norm": 0.20412397384643555, "learning_rate": 3.436696684293062e-07, "loss": 0.0009, "step": 4833 }, { "epoch": 9.19, "grad_norm": 0.1215098649263382, "learning_rate": 3.42071089217213e-07, "loss": 0.0004, "step": 4834 }, { "epoch": 9.19, "grad_norm": 0.27023738622665405, "learning_rate": 3.4047617187432215e-07, "loss": 0.0018, "step": 4835 }, { "epoch": 9.19, "grad_norm": 0.23515267670154572, "learning_rate": 3.388849170053532e-07, "loss": 0.0008, "step": 4836 }, { "epoch": 9.2, "grad_norm": 0.03670227900147438, "learning_rate": 3.3729732521364354e-07, "loss": 0.0002, "step": 4837 }, { "epoch": 9.2, "grad_norm": 0.4014851450920105, "learning_rate": 3.3571339710113946e-07, "loss": 0.0007, "step": 4838 }, { "epoch": 9.2, "grad_norm": 0.04638369381427765, "learning_rate": 3.3413313326839724e-07, "loss": 0.0002, "step": 4839 }, { "epoch": 9.2, "grad_norm": 0.13420003652572632, "learning_rate": 3.325565343145853e-07, "loss": 0.0007, "step": 4840 }, { "epoch": 9.2, "grad_norm": 0.7567545175552368, "learning_rate": 3.3098360083748006e-07, "loss": 0.0029, "step": 4841 }, { "epoch": 9.21, "grad_norm": 0.3449312746524811, "learning_rate": 3.2941433343347205e-07, "loss": 0.001, "step": 4842 }, { "epoch": 9.21, "grad_norm": 0.06116902083158493, "learning_rate": 3.278487326975577e-07, "loss": 0.0003, "step": 4843 }, { "epoch": 9.21, "grad_norm": 0.051961127668619156, "learning_rate": 3.262867992233487e-07, "loss": 0.0002, "step": 4844 }, { "epoch": 9.21, "grad_norm": 0.044418204575777054, "learning_rate": 3.247285336030581e-07, "loss": 0.0003, "step": 4845 }, { "epoch": 9.21, "grad_norm": 0.17713084816932678, "learning_rate": 3.231739364275155e-07, "loss": 0.0007, "step": 4846 }, { "epoch": 9.21, "grad_norm": 0.11800667643547058, "learning_rate": 3.21623008286156e-07, "loss": 0.0006, "step": 4847 }, { "epoch": 9.22, "grad_norm": 0.05970320850610733, "learning_rate": 3.2007574976702595e-07, "loss": 0.0002, "step": 4848 }, { "epoch": 9.22, "grad_norm": 0.04656514525413513, "learning_rate": 3.185321614567782e-07, "loss": 0.0004, "step": 4849 }, { "epoch": 9.22, "grad_norm": 0.07862792909145355, "learning_rate": 3.169922439406736e-07, "loss": 0.0004, "step": 4850 }, { "epoch": 9.22, "grad_norm": 0.07760387659072876, "learning_rate": 3.1545599780258397e-07, "loss": 0.0003, "step": 4851 }, { "epoch": 9.22, "grad_norm": 0.09624210000038147, "learning_rate": 3.139234236249844e-07, "loss": 0.0002, "step": 4852 }, { "epoch": 9.23, "grad_norm": 0.04786939546465874, "learning_rate": 3.123945219889657e-07, "loss": 0.0002, "step": 4853 }, { "epoch": 9.23, "grad_norm": 0.27610379457473755, "learning_rate": 3.108692934742163e-07, "loss": 0.0019, "step": 4854 }, { "epoch": 9.23, "grad_norm": 0.09676630049943924, "learning_rate": 3.0934773865904156e-07, "loss": 0.0008, "step": 4855 }, { "epoch": 9.23, "grad_norm": 0.08709942549467087, "learning_rate": 3.0782985812034537e-07, "loss": 0.0004, "step": 4856 }, { "epoch": 9.23, "grad_norm": 0.38516151905059814, "learning_rate": 3.0631565243364525e-07, "loss": 0.0014, "step": 4857 }, { "epoch": 9.24, "grad_norm": 0.19888722896575928, "learning_rate": 3.048051221730597e-07, "loss": 0.0018, "step": 4858 }, { "epoch": 9.24, "grad_norm": 0.17435353994369507, "learning_rate": 3.0329826791131945e-07, "loss": 0.0013, "step": 4859 }, { "epoch": 9.24, "grad_norm": 0.4114125370979309, "learning_rate": 3.017950902197575e-07, "loss": 0.0013, "step": 4860 }, { "epoch": 9.24, "grad_norm": 1.035325527191162, "learning_rate": 3.002955896683124e-07, "loss": 0.0034, "step": 4861 }, { "epoch": 9.24, "grad_norm": 0.1044229194521904, "learning_rate": 2.987997668255316e-07, "loss": 0.0006, "step": 4862 }, { "epoch": 9.25, "grad_norm": 0.11128132790327072, "learning_rate": 2.973076222585647e-07, "loss": 0.0005, "step": 4863 }, { "epoch": 9.25, "grad_norm": 0.13179704546928406, "learning_rate": 2.958191565331725e-07, "loss": 0.0004, "step": 4864 }, { "epoch": 9.25, "grad_norm": 0.9637926816940308, "learning_rate": 2.943343702137114e-07, "loss": 0.0009, "step": 4865 }, { "epoch": 9.25, "grad_norm": 0.5061189532279968, "learning_rate": 2.9285326386315205e-07, "loss": 0.0014, "step": 4866 }, { "epoch": 9.25, "grad_norm": 0.3179650604724884, "learning_rate": 2.9137583804306426e-07, "loss": 0.0005, "step": 4867 }, { "epoch": 9.25, "grad_norm": 0.12213040143251419, "learning_rate": 2.899020933136254e-07, "loss": 0.0004, "step": 4868 }, { "epoch": 9.26, "grad_norm": 0.07619825750589371, "learning_rate": 2.88432030233613e-07, "loss": 0.0002, "step": 4869 }, { "epoch": 9.26, "grad_norm": 0.07547226548194885, "learning_rate": 2.869656493604156e-07, "loss": 0.0003, "step": 4870 }, { "epoch": 9.26, "grad_norm": 0.32433366775512695, "learning_rate": 2.8550295125001847e-07, "loss": 0.001, "step": 4871 }, { "epoch": 9.26, "grad_norm": 0.27348408102989197, "learning_rate": 2.840439364570124e-07, "loss": 0.0016, "step": 4872 }, { "epoch": 9.26, "grad_norm": 0.10329557210206985, "learning_rate": 2.8258860553459835e-07, "loss": 0.0005, "step": 4873 }, { "epoch": 9.27, "grad_norm": 0.46395841240882874, "learning_rate": 2.8113695903456807e-07, "loss": 0.0008, "step": 4874 }, { "epoch": 9.27, "grad_norm": 0.856888473033905, "learning_rate": 2.7968899750732694e-07, "loss": 0.0023, "step": 4875 }, { "epoch": 9.27, "grad_norm": 0.34871184825897217, "learning_rate": 2.782447215018791e-07, "loss": 0.001, "step": 4876 }, { "epoch": 9.27, "grad_norm": 0.1396360993385315, "learning_rate": 2.7680413156583097e-07, "loss": 0.0004, "step": 4877 }, { "epoch": 9.27, "grad_norm": 0.7234363555908203, "learning_rate": 2.753672282453912e-07, "loss": 0.0015, "step": 4878 }, { "epoch": 9.28, "grad_norm": 0.10725653171539307, "learning_rate": 2.7393401208537395e-07, "loss": 0.0003, "step": 4879 }, { "epoch": 9.28, "grad_norm": 0.2956854999065399, "learning_rate": 2.7250448362919015e-07, "loss": 0.0012, "step": 4880 }, { "epoch": 9.28, "grad_norm": 0.4073553681373596, "learning_rate": 2.710786434188573e-07, "loss": 0.0012, "step": 4881 }, { "epoch": 9.28, "grad_norm": 0.09090757369995117, "learning_rate": 2.696564919949907e-07, "loss": 0.0004, "step": 4882 }, { "epoch": 9.28, "grad_norm": 0.24881131947040558, "learning_rate": 2.68238029896809e-07, "loss": 0.0017, "step": 4883 }, { "epoch": 9.29, "grad_norm": 0.3042006194591522, "learning_rate": 2.6682325766213324e-07, "loss": 0.001, "step": 4884 }, { "epoch": 9.29, "grad_norm": 0.10589288920164108, "learning_rate": 2.65412175827382e-07, "loss": 0.0004, "step": 4885 }, { "epoch": 9.29, "grad_norm": 0.02347962185740471, "learning_rate": 2.640047849275784e-07, "loss": 0.0002, "step": 4886 }, { "epoch": 9.29, "grad_norm": 0.2601788640022278, "learning_rate": 2.6260108549634236e-07, "loss": 0.0005, "step": 4887 }, { "epoch": 9.29, "grad_norm": 0.09861357510089874, "learning_rate": 2.612010780658969e-07, "loss": 0.0006, "step": 4888 }, { "epoch": 9.29, "grad_norm": 0.7369228601455688, "learning_rate": 2.5980476316706306e-07, "loss": 0.0027, "step": 4889 }, { "epoch": 9.3, "grad_norm": 0.1067320853471756, "learning_rate": 2.584121413292673e-07, "loss": 0.0006, "step": 4890 }, { "epoch": 9.3, "grad_norm": 0.10896355658769608, "learning_rate": 2.57023213080525e-07, "loss": 0.0004, "step": 4891 }, { "epoch": 9.3, "grad_norm": 0.028980223461985588, "learning_rate": 2.556379789474639e-07, "loss": 0.0003, "step": 4892 }, { "epoch": 9.3, "grad_norm": 0.11757060885429382, "learning_rate": 2.5425643945530155e-07, "loss": 0.0003, "step": 4893 }, { "epoch": 9.3, "grad_norm": 0.11164820194244385, "learning_rate": 2.5287859512785895e-07, "loss": 0.0004, "step": 4894 }, { "epoch": 9.31, "grad_norm": 0.20442268252372742, "learning_rate": 2.515044464875549e-07, "loss": 0.0031, "step": 4895 }, { "epoch": 9.31, "grad_norm": 0.10639487206935883, "learning_rate": 2.501339940554071e-07, "loss": 0.0003, "step": 4896 }, { "epoch": 9.31, "grad_norm": 0.28189876675605774, "learning_rate": 2.48767238351032e-07, "loss": 0.001, "step": 4897 }, { "epoch": 9.31, "grad_norm": 0.3196962773799896, "learning_rate": 2.474041798926441e-07, "loss": 0.0009, "step": 4898 }, { "epoch": 9.31, "grad_norm": 1.308956265449524, "learning_rate": 2.4604481919705767e-07, "loss": 0.0032, "step": 4899 }, { "epoch": 9.32, "grad_norm": 0.028804948553442955, "learning_rate": 2.4468915677968055e-07, "loss": 0.0002, "step": 4900 }, { "epoch": 9.32, "eval_blimp_filtered_avg": 0.7367164179104477, "eval_blimp_filtered_std": 0.004842789981948876, "step": 4900 }, { "epoch": 9.32, "eval_blimp_supplement_avg": 0.7995689655172413, "eval_blimp_supplement_std": 0.01761239156606601, "step": 4900 }, { "epoch": 9.32, "eval_vqa_filtered_avg": 0.37, "eval_vqa_filtered_std": 0.048523658709391, "step": 4900 }, { "epoch": 9.32, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 4900 }, { "epoch": 9.32, "grad_norm": 0.04941131919622421, "learning_rate": 2.4333719315452363e-07, "loss": 0.0003, "step": 4901 }, { "epoch": 9.32, "grad_norm": 0.18446817994117737, "learning_rate": 2.4198892883419255e-07, "loss": 0.0004, "step": 4902 }, { "epoch": 9.32, "grad_norm": 0.07855930924415588, "learning_rate": 2.4064436432989166e-07, "loss": 0.0004, "step": 4903 }, { "epoch": 9.32, "grad_norm": 0.4641731381416321, "learning_rate": 2.3930350015142103e-07, "loss": 0.0028, "step": 4904 }, { "epoch": 9.33, "grad_norm": 0.12997794151306152, "learning_rate": 2.379663368071772e-07, "loss": 0.0004, "step": 4905 }, { "epoch": 9.33, "grad_norm": 0.17271223664283752, "learning_rate": 2.3663287480415687e-07, "loss": 0.0004, "step": 4906 }, { "epoch": 9.33, "grad_norm": 0.14109839498996735, "learning_rate": 2.3530311464794896e-07, "loss": 0.0003, "step": 4907 }, { "epoch": 9.33, "grad_norm": 0.3920232653617859, "learning_rate": 2.3397705684274352e-07, "loss": 0.0009, "step": 4908 }, { "epoch": 9.33, "grad_norm": 0.13901452720165253, "learning_rate": 2.3265470189132167e-07, "loss": 0.0006, "step": 4909 }, { "epoch": 9.33, "grad_norm": 0.16055715084075928, "learning_rate": 2.3133605029506567e-07, "loss": 0.0008, "step": 4910 }, { "epoch": 9.34, "grad_norm": 0.49193257093429565, "learning_rate": 2.3002110255394895e-07, "loss": 0.0012, "step": 4911 }, { "epoch": 9.34, "grad_norm": 0.1668964922428131, "learning_rate": 2.2870985916654487e-07, "loss": 0.0008, "step": 4912 }, { "epoch": 9.34, "grad_norm": 0.3418249487876892, "learning_rate": 2.274023206300202e-07, "loss": 0.0006, "step": 4913 }, { "epoch": 9.34, "grad_norm": 0.08606669306755066, "learning_rate": 2.2609848744013509e-07, "loss": 0.0004, "step": 4914 }, { "epoch": 9.34, "grad_norm": 0.05752231925725937, "learning_rate": 2.2479836009124735e-07, "loss": 0.0003, "step": 4915 }, { "epoch": 9.35, "grad_norm": 0.012540338560938835, "learning_rate": 2.2350193907631156e-07, "loss": 0.0001, "step": 4916 }, { "epoch": 9.35, "grad_norm": 0.06953978538513184, "learning_rate": 2.2220922488687237e-07, "loss": 0.0003, "step": 4917 }, { "epoch": 9.35, "grad_norm": 0.12820971012115479, "learning_rate": 2.2092021801306984e-07, "loss": 0.0005, "step": 4918 }, { "epoch": 9.35, "grad_norm": 0.06149531900882721, "learning_rate": 2.1963491894364308e-07, "loss": 0.0004, "step": 4919 }, { "epoch": 9.35, "grad_norm": 0.22426097095012665, "learning_rate": 2.1835332816591781e-07, "loss": 0.0007, "step": 4920 }, { "epoch": 9.36, "grad_norm": 0.2054009735584259, "learning_rate": 2.1707544616582198e-07, "loss": 0.0004, "step": 4921 }, { "epoch": 9.36, "grad_norm": 0.02436087466776371, "learning_rate": 2.1580127342787027e-07, "loss": 0.0002, "step": 4922 }, { "epoch": 9.36, "grad_norm": 0.11493802070617676, "learning_rate": 2.145308104351762e-07, "loss": 0.0006, "step": 4923 }, { "epoch": 9.36, "grad_norm": 0.3643447756767273, "learning_rate": 2.1326405766944113e-07, "loss": 0.001, "step": 4924 }, { "epoch": 9.36, "grad_norm": 0.18297529220581055, "learning_rate": 2.1200101561096753e-07, "loss": 0.001, "step": 4925 }, { "epoch": 9.37, "grad_norm": 0.06475614756345749, "learning_rate": 2.107416847386423e-07, "loss": 0.0005, "step": 4926 }, { "epoch": 9.37, "grad_norm": 0.21703964471817017, "learning_rate": 2.0948606552995132e-07, "loss": 0.0007, "step": 4927 }, { "epoch": 9.37, "grad_norm": 0.5365815162658691, "learning_rate": 2.082341584609704e-07, "loss": 0.0017, "step": 4928 }, { "epoch": 9.37, "grad_norm": 0.6256747841835022, "learning_rate": 2.0698596400636873e-07, "loss": 0.0015, "step": 4929 }, { "epoch": 9.37, "grad_norm": 0.04484088346362114, "learning_rate": 2.0574148263940775e-07, "loss": 0.0003, "step": 4930 }, { "epoch": 9.37, "grad_norm": 0.21233510971069336, "learning_rate": 2.0450071483194222e-07, "loss": 0.0003, "step": 4931 }, { "epoch": 9.38, "grad_norm": 0.1283808946609497, "learning_rate": 2.0326366105441586e-07, "loss": 0.0003, "step": 4932 }, { "epoch": 9.38, "grad_norm": 0.0881182998418808, "learning_rate": 2.0203032177586568e-07, "loss": 0.0006, "step": 4933 }, { "epoch": 9.38, "grad_norm": 0.029543550685048103, "learning_rate": 2.0080069746392316e-07, "loss": 0.0002, "step": 4934 }, { "epoch": 9.38, "grad_norm": 0.060124099254608154, "learning_rate": 1.995747885848054e-07, "loss": 0.0002, "step": 4935 }, { "epoch": 9.38, "grad_norm": 0.1273433268070221, "learning_rate": 1.983525956033272e-07, "loss": 0.0004, "step": 4936 }, { "epoch": 9.39, "grad_norm": 1.0504285097122192, "learning_rate": 1.9713411898288793e-07, "loss": 0.0005, "step": 4937 }, { "epoch": 9.39, "grad_norm": 0.23414656519889832, "learning_rate": 1.9591935918548465e-07, "loss": 0.0023, "step": 4938 }, { "epoch": 9.39, "grad_norm": 0.06086886301636696, "learning_rate": 1.947083166717001e-07, "loss": 0.0003, "step": 4939 }, { "epoch": 9.39, "grad_norm": 0.0462041050195694, "learning_rate": 1.9350099190071025e-07, "loss": 0.0004, "step": 4940 }, { "epoch": 9.39, "grad_norm": 0.07869857549667358, "learning_rate": 1.9229738533027896e-07, "loss": 0.0006, "step": 4941 }, { "epoch": 9.4, "grad_norm": 0.15693975985050201, "learning_rate": 1.9109749741676232e-07, "loss": 0.0005, "step": 4942 }, { "epoch": 9.4, "grad_norm": 0.11660808324813843, "learning_rate": 1.8990132861510635e-07, "loss": 0.0005, "step": 4943 }, { "epoch": 9.4, "grad_norm": 0.22951436042785645, "learning_rate": 1.8870887937884608e-07, "loss": 0.0007, "step": 4944 }, { "epoch": 9.4, "grad_norm": 0.12891459465026855, "learning_rate": 1.8752015016010872e-07, "loss": 0.0005, "step": 4945 }, { "epoch": 9.4, "grad_norm": 0.29401078820228577, "learning_rate": 1.8633514140960485e-07, "loss": 0.0014, "step": 4946 }, { "epoch": 9.4, "grad_norm": 0.14062289893627167, "learning_rate": 1.8515385357664284e-07, "loss": 0.0006, "step": 4947 }, { "epoch": 9.41, "grad_norm": 0.4205966591835022, "learning_rate": 1.8397628710911332e-07, "loss": 0.0015, "step": 4948 }, { "epoch": 9.41, "grad_norm": 0.32025283575057983, "learning_rate": 1.828024424535002e-07, "loss": 0.001, "step": 4949 }, { "epoch": 9.41, "grad_norm": 0.11593971401453018, "learning_rate": 1.816323200548742e-07, "loss": 0.0009, "step": 4950 }, { "epoch": 9.41, "grad_norm": 0.38141530752182007, "learning_rate": 1.804659203568937e-07, "loss": 0.001, "step": 4951 }, { "epoch": 9.41, "grad_norm": 0.08977726101875305, "learning_rate": 1.793032438018083e-07, "loss": 0.0008, "step": 4952 }, { "epoch": 9.42, "grad_norm": 0.7008116841316223, "learning_rate": 1.7814429083045427e-07, "loss": 0.0034, "step": 4953 }, { "epoch": 9.42, "grad_norm": 0.18366959691047668, "learning_rate": 1.7698906188225894e-07, "loss": 0.0023, "step": 4954 }, { "epoch": 9.42, "grad_norm": 0.16429439187049866, "learning_rate": 1.758375573952309e-07, "loss": 0.0008, "step": 4955 }, { "epoch": 9.42, "grad_norm": 0.25693878531455994, "learning_rate": 1.746897778059753e-07, "loss": 0.0004, "step": 4956 }, { "epoch": 9.42, "grad_norm": 0.3373255729675293, "learning_rate": 1.7354572354967513e-07, "loss": 0.0014, "step": 4957 }, { "epoch": 9.43, "grad_norm": 0.07281147688627243, "learning_rate": 1.7240539506011233e-07, "loss": 0.0002, "step": 4958 }, { "epoch": 9.43, "grad_norm": 0.40240487456321716, "learning_rate": 1.7126879276964549e-07, "loss": 0.0008, "step": 4959 }, { "epoch": 9.43, "grad_norm": 0.062207188457250595, "learning_rate": 1.7013591710922762e-07, "loss": 0.0005, "step": 4960 }, { "epoch": 9.43, "grad_norm": 0.24603794515132904, "learning_rate": 1.6900676850839514e-07, "loss": 0.0006, "step": 4961 }, { "epoch": 9.43, "grad_norm": 0.10829874873161316, "learning_rate": 1.678813473952745e-07, "loss": 0.0002, "step": 4962 }, { "epoch": 9.44, "grad_norm": 0.5788793563842773, "learning_rate": 1.6675965419657325e-07, "loss": 0.0005, "step": 4963 }, { "epoch": 9.44, "grad_norm": 0.02356104366481304, "learning_rate": 1.6564168933759007e-07, "loss": 0.0001, "step": 4964 }, { "epoch": 9.44, "grad_norm": 1.3504807949066162, "learning_rate": 1.6452745324221143e-07, "loss": 0.0021, "step": 4965 }, { "epoch": 9.44, "grad_norm": 0.1680411696434021, "learning_rate": 1.6341694633290494e-07, "loss": 0.0006, "step": 4966 }, { "epoch": 9.44, "grad_norm": 0.3109643757343292, "learning_rate": 1.6231016903072716e-07, "loss": 0.0005, "step": 4967 }, { "epoch": 9.44, "grad_norm": 0.10585866868495941, "learning_rate": 1.6120712175532126e-07, "loss": 0.0003, "step": 4968 }, { "epoch": 9.45, "grad_norm": 0.02079414762556553, "learning_rate": 1.6010780492491384e-07, "loss": 0.0002, "step": 4969 }, { "epoch": 9.45, "grad_norm": 0.09951137006282806, "learning_rate": 1.5901221895631814e-07, "loss": 0.0004, "step": 4970 }, { "epoch": 9.45, "grad_norm": 0.04816626384854317, "learning_rate": 1.5792036426493517e-07, "loss": 0.0003, "step": 4971 }, { "epoch": 9.45, "grad_norm": 0.2428828477859497, "learning_rate": 1.5683224126474606e-07, "loss": 0.003, "step": 4972 }, { "epoch": 9.45, "grad_norm": 0.10630444437265396, "learning_rate": 1.5574785036832297e-07, "loss": 0.0006, "step": 4973 }, { "epoch": 9.46, "grad_norm": 0.03220370039343834, "learning_rate": 1.5466719198681813e-07, "loss": 0.0002, "step": 4974 }, { "epoch": 9.46, "grad_norm": 0.10419854521751404, "learning_rate": 1.5359026652997044e-07, "loss": 0.0006, "step": 4975 }, { "epoch": 9.46, "grad_norm": 0.06577632576227188, "learning_rate": 1.5251707440610552e-07, "loss": 0.0006, "step": 4976 }, { "epoch": 9.46, "grad_norm": 0.1464771032333374, "learning_rate": 1.5144761602212898e-07, "loss": 0.0007, "step": 4977 }, { "epoch": 9.46, "grad_norm": 0.033389411866664886, "learning_rate": 1.5038189178353423e-07, "loss": 0.0001, "step": 4978 }, { "epoch": 9.47, "grad_norm": 0.15586581826210022, "learning_rate": 1.4931990209439807e-07, "loss": 0.0017, "step": 4979 }, { "epoch": 9.47, "grad_norm": 0.07032330334186554, "learning_rate": 1.482616473573806e-07, "loss": 0.0003, "step": 4980 }, { "epoch": 9.47, "grad_norm": 0.03514696657657623, "learning_rate": 1.4720712797372638e-07, "loss": 0.0002, "step": 4981 }, { "epoch": 9.47, "grad_norm": 0.29562053084373474, "learning_rate": 1.4615634434326453e-07, "loss": 0.0009, "step": 4982 }, { "epoch": 9.47, "grad_norm": 1.9191861152648926, "learning_rate": 1.451092968644041e-07, "loss": 0.0039, "step": 4983 }, { "epoch": 9.48, "grad_norm": 0.04435792937874794, "learning_rate": 1.4406598593414313e-07, "loss": 0.0003, "step": 4984 }, { "epoch": 9.48, "grad_norm": 0.05800731107592583, "learning_rate": 1.4302641194805955e-07, "loss": 0.0004, "step": 4985 }, { "epoch": 9.48, "grad_norm": 0.05331496521830559, "learning_rate": 1.419905753003137e-07, "loss": 0.0002, "step": 4986 }, { "epoch": 9.48, "grad_norm": 0.07517996430397034, "learning_rate": 1.4095847638365133e-07, "loss": 0.0003, "step": 4987 }, { "epoch": 9.48, "grad_norm": 0.05931700021028519, "learning_rate": 1.3993011558939952e-07, "loss": 0.0004, "step": 4988 }, { "epoch": 9.48, "grad_norm": 0.18355360627174377, "learning_rate": 1.389054933074663e-07, "loss": 0.0004, "step": 4989 }, { "epoch": 9.49, "grad_norm": 0.08368450403213501, "learning_rate": 1.3788460992634644e-07, "loss": 0.0005, "step": 4990 }, { "epoch": 9.49, "grad_norm": 0.1636076122522354, "learning_rate": 1.3686746583311484e-07, "loss": 0.0012, "step": 4991 }, { "epoch": 9.49, "grad_norm": 0.10198362916707993, "learning_rate": 1.3585406141342516e-07, "loss": 0.0005, "step": 4992 }, { "epoch": 9.49, "grad_norm": 0.03544921800494194, "learning_rate": 1.348443970515212e-07, "loss": 0.0002, "step": 4993 }, { "epoch": 9.49, "grad_norm": 0.5375022292137146, "learning_rate": 1.3383847313022224e-07, "loss": 0.0008, "step": 4994 }, { "epoch": 9.5, "grad_norm": 0.4975007474422455, "learning_rate": 1.3283629003092992e-07, "loss": 0.0009, "step": 4995 }, { "epoch": 9.5, "grad_norm": 0.08026143908500671, "learning_rate": 1.3183784813363022e-07, "loss": 0.0003, "step": 4996 }, { "epoch": 9.5, "grad_norm": 0.170439213514328, "learning_rate": 1.3084314781688922e-07, "loss": 0.0009, "step": 4997 }, { "epoch": 9.5, "grad_norm": 0.03482333570718765, "learning_rate": 1.2985218945785306e-07, "loss": 0.0002, "step": 4998 }, { "epoch": 9.5, "grad_norm": 0.09595148265361786, "learning_rate": 1.2886497343225e-07, "loss": 0.0006, "step": 4999 }, { "epoch": 9.51, "grad_norm": 0.44226107001304626, "learning_rate": 1.2788150011439293e-07, "loss": 0.0012, "step": 5000 }, { "epoch": 9.51, "eval_blimp_filtered_avg": 0.7377611940298507, "eval_blimp_filtered_std": 0.0048361642841713934, "step": 5000 }, { "epoch": 9.51, "eval_blimp_supplement_avg": 0.7952586206896551, "eval_blimp_supplement_std": 0.0177088511968717, "step": 5000 }, { "epoch": 9.51, "eval_vqa_filtered_avg": 0.38, "eval_vqa_filtered_std": 0.048783173121456316, "step": 5000 }, { "epoch": 9.51, "eval_winoground_filtered_avg": 0.49, "eval_winoground_filtered_std": 0.05024183937956912, "step": 5000 }, { "epoch": 9.51, "grad_norm": 0.13171954452991486, "learning_rate": 1.2690176987716907e-07, "loss": 0.0005, "step": 5001 }, { "epoch": 9.51, "grad_norm": 0.1672871708869934, "learning_rate": 1.2592578309205016e-07, "loss": 0.0005, "step": 5002 }, { "epoch": 9.51, "grad_norm": 0.03711386397480965, "learning_rate": 1.2495354012909022e-07, "loss": 0.0002, "step": 5003 }, { "epoch": 9.51, "grad_norm": 0.033128004521131516, "learning_rate": 1.239850413569199e-07, "loss": 0.0001, "step": 5004 }, { "epoch": 9.52, "grad_norm": 0.0713183656334877, "learning_rate": 1.2302028714275216e-07, "loss": 0.0004, "step": 5005 }, { "epoch": 9.52, "grad_norm": 0.26427263021469116, "learning_rate": 1.2205927785238214e-07, "loss": 0.0009, "step": 5006 }, { "epoch": 9.52, "grad_norm": 0.21677927672863007, "learning_rate": 1.211020138501795e-07, "loss": 0.0019, "step": 5007 }, { "epoch": 9.52, "grad_norm": 0.05995145067572594, "learning_rate": 1.2014849549910058e-07, "loss": 0.0002, "step": 5008 }, { "epoch": 9.52, "grad_norm": 0.08189782500267029, "learning_rate": 1.1919872316067726e-07, "loss": 0.0003, "step": 5009 }, { "epoch": 9.52, "grad_norm": 0.05967266112565994, "learning_rate": 1.1825269719502042e-07, "loss": 0.0004, "step": 5010 }, { "epoch": 9.53, "grad_norm": 0.34113919734954834, "learning_rate": 1.1731041796082531e-07, "loss": 0.0013, "step": 5011 }, { "epoch": 9.53, "grad_norm": 0.1860918402671814, "learning_rate": 1.1637188581536173e-07, "loss": 0.0014, "step": 5012 }, { "epoch": 9.53, "grad_norm": 0.914406418800354, "learning_rate": 1.1543710111448059e-07, "loss": 0.0022, "step": 5013 }, { "epoch": 9.53, "grad_norm": 0.07463357597589493, "learning_rate": 1.1450606421261168e-07, "loss": 0.0003, "step": 5014 }, { "epoch": 9.53, "grad_norm": 0.06445208191871643, "learning_rate": 1.1357877546276486e-07, "loss": 0.0005, "step": 5015 }, { "epoch": 9.54, "grad_norm": 0.11747375875711441, "learning_rate": 1.1265523521652667e-07, "loss": 0.0006, "step": 5016 }, { "epoch": 9.54, "grad_norm": 0.07873641699552536, "learning_rate": 1.1173544382406476e-07, "loss": 0.0005, "step": 5017 }, { "epoch": 9.54, "grad_norm": 0.0997672826051712, "learning_rate": 1.1081940163412352e-07, "loss": 0.0003, "step": 5018 }, { "epoch": 9.54, "grad_norm": 0.09713952243328094, "learning_rate": 1.0990710899402735e-07, "loss": 0.0005, "step": 5019 }, { "epoch": 9.54, "grad_norm": 0.06565051525831223, "learning_rate": 1.089985662496773e-07, "loss": 0.0004, "step": 5020 }, { "epoch": 9.55, "grad_norm": 0.26568925380706787, "learning_rate": 1.0809377374555341e-07, "loss": 0.0016, "step": 5021 }, { "epoch": 9.55, "grad_norm": 0.11774890869855881, "learning_rate": 1.0719273182471568e-07, "loss": 0.0003, "step": 5022 }, { "epoch": 9.55, "grad_norm": 0.3975834846496582, "learning_rate": 1.0629544082879861e-07, "loss": 0.0008, "step": 5023 }, { "epoch": 9.55, "grad_norm": 0.2063591480255127, "learning_rate": 1.0540190109801563e-07, "loss": 0.0003, "step": 5024 }, { "epoch": 9.55, "grad_norm": 1.6167798042297363, "learning_rate": 1.0451211297116015e-07, "loss": 0.0016, "step": 5025 }, { "epoch": 9.56, "grad_norm": 0.18613456189632416, "learning_rate": 1.0362607678560011e-07, "loss": 0.0014, "step": 5026 }, { "epoch": 9.56, "grad_norm": 0.44386231899261475, "learning_rate": 1.0274379287728232e-07, "loss": 0.0018, "step": 5027 }, { "epoch": 9.56, "grad_norm": 0.10689792037010193, "learning_rate": 1.0186526158073251e-07, "loss": 0.0006, "step": 5028 }, { "epoch": 9.56, "grad_norm": 0.17089210450649261, "learning_rate": 1.0099048322904869e-07, "loss": 0.0005, "step": 5029 }, { "epoch": 9.56, "grad_norm": 0.05171942338347435, "learning_rate": 1.0011945815391e-07, "loss": 0.0003, "step": 5030 }, { "epoch": 9.56, "grad_norm": 0.09965039044618607, "learning_rate": 9.925218668557335e-08, "loss": 0.0006, "step": 5031 }, { "epoch": 9.57, "grad_norm": 0.19695746898651123, "learning_rate": 9.838866915286904e-08, "loss": 0.0011, "step": 5032 }, { "epoch": 9.57, "grad_norm": 0.06751207262277603, "learning_rate": 9.752890588320518e-08, "loss": 0.0002, "step": 5033 }, { "epoch": 9.57, "grad_norm": 0.04172760620713234, "learning_rate": 9.667289720256767e-08, "loss": 0.0003, "step": 5034 }, { "epoch": 9.57, "grad_norm": 0.05527893453836441, "learning_rate": 9.5820643435518e-08, "loss": 0.0002, "step": 5035 }, { "epoch": 9.57, "grad_norm": 0.03076588734984398, "learning_rate": 9.497214490519213e-08, "loss": 0.0002, "step": 5036 }, { "epoch": 9.58, "grad_norm": 0.03303391858935356, "learning_rate": 9.412740193330827e-08, "loss": 0.0002, "step": 5037 }, { "epoch": 9.58, "grad_norm": 0.3549599051475525, "learning_rate": 9.328641484015244e-08, "loss": 0.0012, "step": 5038 }, { "epoch": 9.58, "grad_norm": 0.09555275738239288, "learning_rate": 9.244918394459179e-08, "loss": 0.0006, "step": 5039 }, { "epoch": 9.58, "grad_norm": 0.021579816937446594, "learning_rate": 9.161570956406907e-08, "loss": 0.0002, "step": 5040 }, { "epoch": 9.58, "grad_norm": 0.2106308937072754, "learning_rate": 9.078599201460036e-08, "loss": 0.0012, "step": 5041 }, { "epoch": 9.59, "grad_norm": 0.4135432243347168, "learning_rate": 8.99600316107796e-08, "loss": 0.001, "step": 5042 }, { "epoch": 9.59, "grad_norm": 0.3343641757965088, "learning_rate": 8.913782866577402e-08, "loss": 0.0012, "step": 5043 }, { "epoch": 9.59, "grad_norm": 0.7528327703475952, "learning_rate": 8.831938349132985e-08, "loss": 0.0018, "step": 5044 }, { "epoch": 9.59, "grad_norm": 0.3460502326488495, "learning_rate": 8.750469639776327e-08, "loss": 0.0004, "step": 5045 }, { "epoch": 9.59, "grad_norm": 0.059445809572935104, "learning_rate": 8.669376769397053e-08, "loss": 0.0003, "step": 5046 }, { "epoch": 9.6, "grad_norm": 0.3984399735927582, "learning_rate": 8.588659768741903e-08, "loss": 0.0008, "step": 5047 }, { "epoch": 9.6, "grad_norm": 0.035582851618528366, "learning_rate": 8.508318668415504e-08, "loss": 0.0002, "step": 5048 }, { "epoch": 9.6, "grad_norm": 0.04556626081466675, "learning_rate": 8.428353498879494e-08, "loss": 0.0002, "step": 5049 }, { "epoch": 9.6, "grad_norm": 0.08189944922924042, "learning_rate": 8.348764290453392e-08, "loss": 0.0006, "step": 5050 }, { "epoch": 9.6, "grad_norm": 0.15845291316509247, "learning_rate": 8.269551073313842e-08, "loss": 0.001, "step": 5051 }, { "epoch": 9.6, "grad_norm": 0.4765700697898865, "learning_rate": 8.19071387749526e-08, "loss": 0.0032, "step": 5052 }, { "epoch": 9.61, "grad_norm": 0.28507912158966064, "learning_rate": 8.112252732888959e-08, "loss": 0.0006, "step": 5053 }, { "epoch": 9.61, "grad_norm": 0.35881295800209045, "learning_rate": 8.034167669244474e-08, "loss": 0.0008, "step": 5054 }, { "epoch": 9.61, "grad_norm": 0.07623641192913055, "learning_rate": 7.956458716167902e-08, "loss": 0.0005, "step": 5055 }, { "epoch": 9.61, "grad_norm": 0.07599501311779022, "learning_rate": 7.879125903123231e-08, "loss": 0.0004, "step": 5056 }, { "epoch": 9.61, "grad_norm": 0.17923179268836975, "learning_rate": 7.802169259431891e-08, "loss": 0.001, "step": 5057 }, { "epoch": 9.62, "grad_norm": 0.4199851155281067, "learning_rate": 7.725588814272212e-08, "loss": 0.0009, "step": 5058 }, { "epoch": 9.62, "grad_norm": 0.023912325501441956, "learning_rate": 7.64938459668052e-08, "loss": 0.0001, "step": 5059 }, { "epoch": 9.62, "grad_norm": 0.1596629023551941, "learning_rate": 7.573556635549928e-08, "loss": 0.0013, "step": 5060 }, { "epoch": 9.62, "grad_norm": 0.07763338088989258, "learning_rate": 7.498104959631103e-08, "loss": 0.0004, "step": 5061 }, { "epoch": 9.62, "grad_norm": 0.26206138730049133, "learning_rate": 7.423029597532161e-08, "loss": 0.0007, "step": 5062 }, { "epoch": 9.63, "grad_norm": 0.05633801966905594, "learning_rate": 7.348330577718554e-08, "loss": 0.0002, "step": 5063 }, { "epoch": 9.63, "grad_norm": 0.01827426441013813, "learning_rate": 7.274007928512627e-08, "loss": 0.0001, "step": 5064 }, { "epoch": 9.63, "grad_norm": 0.3560895025730133, "learning_rate": 7.200061678094505e-08, "loss": 0.0022, "step": 5065 }, { "epoch": 9.63, "grad_norm": 0.020454850047826767, "learning_rate": 7.126491854501427e-08, "loss": 0.0001, "step": 5066 }, { "epoch": 9.63, "grad_norm": 0.04881764575839043, "learning_rate": 7.053298485627857e-08, "loss": 0.0003, "step": 5067 }, { "epoch": 9.63, "grad_norm": 0.22613990306854248, "learning_rate": 6.980481599225487e-08, "loss": 0.0012, "step": 5068 }, { "epoch": 9.64, "grad_norm": 0.14442333579063416, "learning_rate": 6.90804122290345e-08, "loss": 0.0006, "step": 5069 }, { "epoch": 9.64, "grad_norm": 0.2776709794998169, "learning_rate": 6.83597738412789e-08, "loss": 0.0006, "step": 5070 }, { "epoch": 9.64, "grad_norm": 0.06542372703552246, "learning_rate": 6.764290110222394e-08, "loss": 0.0003, "step": 5071 }, { "epoch": 9.64, "grad_norm": 0.07589323818683624, "learning_rate": 6.692979428367663e-08, "loss": 0.0003, "step": 5072 }, { "epoch": 9.64, "grad_norm": 0.07468652725219727, "learning_rate": 6.622045365601515e-08, "loss": 0.0004, "step": 5073 }, { "epoch": 9.65, "grad_norm": 0.14015181362628937, "learning_rate": 6.551487948819212e-08, "loss": 0.0005, "step": 5074 }, { "epoch": 9.65, "grad_norm": 0.2587446868419647, "learning_rate": 6.481307204773024e-08, "loss": 0.0014, "step": 5075 }, { "epoch": 9.65, "grad_norm": 0.4658990800380707, "learning_rate": 6.41150316007244e-08, "loss": 0.0023, "step": 5076 }, { "epoch": 9.65, "grad_norm": 0.3937667906284332, "learning_rate": 6.342075841184181e-08, "loss": 0.0007, "step": 5077 }, { "epoch": 9.65, "grad_norm": 0.034365441650152206, "learning_rate": 6.273025274431965e-08, "loss": 0.0002, "step": 5078 }, { "epoch": 9.66, "grad_norm": 0.05622144788503647, "learning_rate": 6.204351485997073e-08, "loss": 0.0003, "step": 5079 }, { "epoch": 9.66, "grad_norm": 0.032471004873514175, "learning_rate": 6.136054501917232e-08, "loss": 0.0002, "step": 5080 }, { "epoch": 9.66, "grad_norm": 0.031669214367866516, "learning_rate": 6.068134348088061e-08, "loss": 0.0002, "step": 5081 }, { "epoch": 9.66, "grad_norm": 0.5145406126976013, "learning_rate": 6.000591050261739e-08, "loss": 0.0027, "step": 5082 }, { "epoch": 9.66, "grad_norm": 0.08527063578367233, "learning_rate": 5.933424634047891e-08, "loss": 0.0003, "step": 5083 }, { "epoch": 9.67, "grad_norm": 0.05967738479375839, "learning_rate": 5.8666351249130336e-08, "loss": 0.0004, "step": 5084 }, { "epoch": 9.67, "grad_norm": 0.6261815428733826, "learning_rate": 5.8002225481808005e-08, "loss": 0.0024, "step": 5085 }, { "epoch": 9.67, "grad_norm": 0.019984712824225426, "learning_rate": 5.7341869290321594e-08, "loss": 0.0002, "step": 5086 }, { "epoch": 9.67, "grad_norm": 0.28990381956100464, "learning_rate": 5.6685282925048604e-08, "loss": 0.0009, "step": 5087 }, { "epoch": 9.67, "grad_norm": 0.2410230189561844, "learning_rate": 5.603246663493767e-08, "loss": 0.0006, "step": 5088 }, { "epoch": 9.67, "grad_norm": 0.07607830315828323, "learning_rate": 5.538342066750968e-08, "loss": 0.0003, "step": 5089 }, { "epoch": 9.68, "grad_norm": 0.3100145757198334, "learning_rate": 5.4738145268853346e-08, "loss": 0.0017, "step": 5090 }, { "epoch": 9.68, "grad_norm": 0.17272405326366425, "learning_rate": 5.4096640683629633e-08, "loss": 0.0005, "step": 5091 }, { "epoch": 9.68, "grad_norm": 0.09771198779344559, "learning_rate": 5.345890715507174e-08, "loss": 0.0004, "step": 5092 }, { "epoch": 9.68, "grad_norm": 0.33538904786109924, "learning_rate": 5.282494492497736e-08, "loss": 0.0007, "step": 5093 }, { "epoch": 9.68, "grad_norm": 0.3372056484222412, "learning_rate": 5.219475423371867e-08, "loss": 0.001, "step": 5094 }, { "epoch": 9.69, "grad_norm": 0.6791336536407471, "learning_rate": 5.1568335320236754e-08, "loss": 0.0015, "step": 5095 }, { "epoch": 9.69, "grad_norm": 0.0988527461886406, "learning_rate": 5.094568842204384e-08, "loss": 0.0007, "step": 5096 }, { "epoch": 9.69, "grad_norm": 0.034128740429878235, "learning_rate": 5.032681377521886e-08, "loss": 0.0002, "step": 5097 }, { "epoch": 9.69, "grad_norm": 0.13955718278884888, "learning_rate": 4.9711711614413016e-08, "loss": 0.0004, "step": 5098 }, { "epoch": 9.69, "grad_norm": 0.8727146983146667, "learning_rate": 4.9100382172847514e-08, "loss": 0.0009, "step": 5099 }, { "epoch": 9.7, "grad_norm": 0.3202883303165436, "learning_rate": 4.849282568231029e-08, "loss": 0.0022, "step": 5100 }, { "epoch": 9.7, "eval_blimp_filtered_avg": 0.7368656716417911, "eval_blimp_filtered_std": 0.004842441487026615, "step": 5100 }, { "epoch": 9.7, "eval_blimp_supplement_avg": 0.7952586206896551, "eval_blimp_supplement_std": 0.0177088511968717, "step": 5100 }, { "epoch": 9.7, "eval_vqa_filtered_avg": 0.37, "eval_vqa_filtered_std": 0.04852365870939099, "step": 5100 }, { "epoch": 9.7, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 5100 }, { "epoch": 9.7, "grad_norm": 0.019381700083613396, "learning_rate": 4.7889042373161496e-08, "loss": 0.0001, "step": 5101 }, { "epoch": 9.7, "grad_norm": 0.09676377475261688, "learning_rate": 4.7289032474329144e-08, "loss": 0.0005, "step": 5102 }, { "epoch": 9.7, "grad_norm": 0.08346100151538849, "learning_rate": 4.669279621331235e-08, "loss": 0.0004, "step": 5103 }, { "epoch": 9.7, "grad_norm": 0.1667742133140564, "learning_rate": 4.610033381617696e-08, "loss": 0.0004, "step": 5104 }, { "epoch": 9.71, "grad_norm": 0.25248682498931885, "learning_rate": 4.5511645507558825e-08, "loss": 0.0006, "step": 5105 }, { "epoch": 9.71, "grad_norm": 0.5562610626220703, "learning_rate": 4.492673151066385e-08, "loss": 0.0022, "step": 5106 }, { "epoch": 9.71, "grad_norm": 0.13554434478282928, "learning_rate": 4.4345592047265737e-08, "loss": 0.0006, "step": 5107 }, { "epoch": 9.71, "grad_norm": 0.38729602098464966, "learning_rate": 4.3768227337707134e-08, "loss": 0.0036, "step": 5108 }, { "epoch": 9.71, "grad_norm": 0.3621763586997986, "learning_rate": 4.319463760090181e-08, "loss": 0.0017, "step": 5109 }, { "epoch": 9.71, "grad_norm": 0.5772742033004761, "learning_rate": 4.2624823054328024e-08, "loss": 0.0021, "step": 5110 }, { "epoch": 9.72, "grad_norm": 0.17168575525283813, "learning_rate": 4.205878391403517e-08, "loss": 0.0015, "step": 5111 }, { "epoch": 9.72, "grad_norm": 0.09446343779563904, "learning_rate": 4.149652039464047e-08, "loss": 0.0005, "step": 5112 }, { "epoch": 9.72, "grad_norm": 0.07879792153835297, "learning_rate": 4.093803270933227e-08, "loss": 0.0003, "step": 5113 }, { "epoch": 9.72, "grad_norm": 0.39556246995925903, "learning_rate": 4.0383321069862316e-08, "loss": 0.001, "step": 5114 }, { "epoch": 9.72, "grad_norm": 0.44970467686653137, "learning_rate": 3.983238568655345e-08, "loss": 0.0013, "step": 5115 }, { "epoch": 9.73, "grad_norm": 0.05764764919877052, "learning_rate": 3.9285226768298604e-08, "loss": 0.0003, "step": 5116 }, { "epoch": 9.73, "grad_norm": 0.12775014340877533, "learning_rate": 3.874184452255514e-08, "loss": 0.0004, "step": 5117 }, { "epoch": 9.73, "grad_norm": 0.30707472562789917, "learning_rate": 3.820223915535048e-08, "loss": 0.0015, "step": 5118 }, { "epoch": 9.73, "grad_norm": 0.07284252345561981, "learning_rate": 3.766641087127987e-08, "loss": 0.0006, "step": 5119 }, { "epoch": 9.73, "grad_norm": 0.13589468598365784, "learning_rate": 3.713435987350522e-08, "loss": 0.0007, "step": 5120 }, { "epoch": 9.74, "grad_norm": 0.2595053017139435, "learning_rate": 3.660608636375962e-08, "loss": 0.001, "step": 5121 }, { "epoch": 9.74, "grad_norm": 0.15503214299678802, "learning_rate": 3.6081590542339506e-08, "loss": 0.0006, "step": 5122 }, { "epoch": 9.74, "grad_norm": 0.019765730947256088, "learning_rate": 3.5560872608111365e-08, "loss": 0.0001, "step": 5123 }, { "epoch": 9.74, "grad_norm": 0.06436411291360855, "learning_rate": 3.504393275850948e-08, "loss": 0.0003, "step": 5124 }, { "epoch": 9.74, "grad_norm": 0.16851328313350677, "learning_rate": 3.453077118953374e-08, "loss": 0.0012, "step": 5125 }, { "epoch": 9.75, "grad_norm": 0.5307997465133667, "learning_rate": 3.402138809575517e-08, "loss": 0.0018, "step": 5126 }, { "epoch": 9.75, "grad_norm": 0.04903549700975418, "learning_rate": 3.3515783670307054e-08, "loss": 0.0003, "step": 5127 }, { "epoch": 9.75, "grad_norm": 0.023877333849668503, "learning_rate": 3.301395810489494e-08, "loss": 0.0001, "step": 5128 }, { "epoch": 9.75, "grad_norm": 0.26175791025161743, "learning_rate": 3.251591158978884e-08, "loss": 0.002, "step": 5129 }, { "epoch": 9.75, "grad_norm": 0.11732929199934006, "learning_rate": 3.202164431382659e-08, "loss": 0.0003, "step": 5130 }, { "epoch": 9.75, "grad_norm": 0.09241407364606857, "learning_rate": 3.1531156464411624e-08, "loss": 0.0005, "step": 5131 }, { "epoch": 9.76, "grad_norm": 0.09402008354663849, "learning_rate": 3.10444482275174e-08, "loss": 0.0003, "step": 5132 }, { "epoch": 9.76, "grad_norm": 0.04634101688861847, "learning_rate": 3.0561519787681846e-08, "loss": 0.0002, "step": 5133 }, { "epoch": 9.76, "grad_norm": 0.1394444853067398, "learning_rate": 3.008237132801073e-08, "loss": 0.0011, "step": 5134 }, { "epoch": 9.76, "grad_norm": 0.11396057903766632, "learning_rate": 2.9607003030176494e-08, "loss": 0.0006, "step": 5135 }, { "epoch": 9.76, "grad_norm": 0.04981095716357231, "learning_rate": 2.9135415074418305e-08, "loss": 0.0002, "step": 5136 }, { "epoch": 9.77, "grad_norm": 0.057023800909519196, "learning_rate": 2.8667607639542016e-08, "loss": 0.0002, "step": 5137 }, { "epoch": 9.77, "grad_norm": 0.36986270546913147, "learning_rate": 2.820358090291908e-08, "loss": 0.0017, "step": 5138 }, { "epoch": 9.77, "grad_norm": 0.09466097503900528, "learning_rate": 2.774333504048876e-08, "loss": 0.0007, "step": 5139 }, { "epoch": 9.77, "grad_norm": 0.13003908097743988, "learning_rate": 2.7286870226758135e-08, "loss": 0.0008, "step": 5140 }, { "epoch": 9.77, "grad_norm": 0.10839512944221497, "learning_rate": 2.6834186634796534e-08, "loss": 0.0003, "step": 5141 }, { "epoch": 9.78, "grad_norm": 0.1647389829158783, "learning_rate": 2.638528443624333e-08, "loss": 0.001, "step": 5142 }, { "epoch": 9.78, "grad_norm": 0.03218585252761841, "learning_rate": 2.5940163801301266e-08, "loss": 0.0002, "step": 5143 }, { "epoch": 9.78, "grad_norm": 0.3535415232181549, "learning_rate": 2.5498824898744222e-08, "loss": 0.0009, "step": 5144 }, { "epoch": 9.78, "grad_norm": 0.11012395471334457, "learning_rate": 2.5061267895905015e-08, "loss": 0.0004, "step": 5145 }, { "epoch": 9.78, "grad_norm": 0.7872511744499207, "learning_rate": 2.4627492958688715e-08, "loss": 0.0018, "step": 5146 }, { "epoch": 9.79, "grad_norm": 0.20881770551204681, "learning_rate": 2.4197500251563755e-08, "loss": 0.0009, "step": 5147 }, { "epoch": 9.79, "grad_norm": 0.2629554569721222, "learning_rate": 2.377128993756306e-08, "loss": 0.0007, "step": 5148 }, { "epoch": 9.79, "grad_norm": 0.08822977542877197, "learning_rate": 2.3348862178289578e-08, "loss": 0.0007, "step": 5149 }, { "epoch": 9.79, "grad_norm": 0.05521848052740097, "learning_rate": 2.2930217133907416e-08, "loss": 0.0002, "step": 5150 }, { "epoch": 9.79, "grad_norm": 0.30267050862312317, "learning_rate": 2.2515354963150716e-08, "loss": 0.0018, "step": 5151 }, { "epoch": 9.79, "grad_norm": 0.09928404539823532, "learning_rate": 2.2104275823315868e-08, "loss": 0.0005, "step": 5152 }, { "epoch": 9.8, "grad_norm": 0.1032525971531868, "learning_rate": 2.1696979870267086e-08, "loss": 0.0002, "step": 5153 }, { "epoch": 9.8, "grad_norm": 0.0928661972284317, "learning_rate": 2.1293467258433065e-08, "loss": 0.0009, "step": 5154 }, { "epoch": 9.8, "grad_norm": 0.3731788992881775, "learning_rate": 2.0893738140808085e-08, "loss": 0.0013, "step": 5155 }, { "epoch": 9.8, "grad_norm": 0.2259458601474762, "learning_rate": 2.049779266895313e-08, "loss": 0.0014, "step": 5156 }, { "epoch": 9.8, "grad_norm": 0.21480314433574677, "learning_rate": 2.0105630992992566e-08, "loss": 0.0009, "step": 5157 }, { "epoch": 9.81, "grad_norm": 0.02685984969139099, "learning_rate": 1.9717253261617443e-08, "loss": 0.0001, "step": 5158 }, { "epoch": 9.81, "grad_norm": 0.2051694542169571, "learning_rate": 1.9332659622083306e-08, "loss": 0.0005, "step": 5159 }, { "epoch": 9.81, "grad_norm": 0.07398087531328201, "learning_rate": 1.8951850220213507e-08, "loss": 0.0003, "step": 5160 }, { "epoch": 9.81, "grad_norm": 0.7953336834907532, "learning_rate": 1.8574825200391445e-08, "loss": 0.0012, "step": 5161 }, { "epoch": 9.81, "grad_norm": 0.1640363335609436, "learning_rate": 1.8201584705571652e-08, "loss": 0.0006, "step": 5162 }, { "epoch": 9.82, "grad_norm": 0.08143129199743271, "learning_rate": 1.7832128877268706e-08, "loss": 0.0007, "step": 5163 }, { "epoch": 9.82, "grad_norm": 0.06358862668275833, "learning_rate": 1.7466457855565e-08, "loss": 0.0003, "step": 5164 }, { "epoch": 9.82, "grad_norm": 0.18295270204544067, "learning_rate": 1.7104571779107405e-08, "loss": 0.0005, "step": 5165 }, { "epoch": 9.82, "grad_norm": 1.2104531526565552, "learning_rate": 1.6746470785107273e-08, "loss": 0.0017, "step": 5166 }, { "epoch": 9.82, "grad_norm": 0.05473015457391739, "learning_rate": 1.639215500934044e-08, "loss": 0.0003, "step": 5167 }, { "epoch": 9.83, "grad_norm": 0.07983638346195221, "learning_rate": 1.604162458614944e-08, "loss": 0.0005, "step": 5168 }, { "epoch": 9.83, "grad_norm": 0.5181248188018799, "learning_rate": 1.5694879648439075e-08, "loss": 0.0017, "step": 5169 }, { "epoch": 9.83, "grad_norm": 0.03580164164304733, "learning_rate": 1.5351920327680848e-08, "loss": 0.0003, "step": 5170 }, { "epoch": 9.83, "grad_norm": 0.2495732456445694, "learning_rate": 1.5012746753909624e-08, "loss": 0.0009, "step": 5171 }, { "epoch": 9.83, "grad_norm": 0.8632474541664124, "learning_rate": 1.467735905572476e-08, "loss": 0.0004, "step": 5172 }, { "epoch": 9.83, "grad_norm": 0.1095060482621193, "learning_rate": 1.4345757360292312e-08, "loss": 0.0003, "step": 5173 }, { "epoch": 9.84, "grad_norm": 0.3221050798892975, "learning_rate": 1.4017941793340595e-08, "loss": 0.001, "step": 5174 }, { "epoch": 9.84, "grad_norm": 0.08804667741060257, "learning_rate": 1.3693912479162407e-08, "loss": 0.0004, "step": 5175 }, { "epoch": 9.84, "grad_norm": 0.34170395135879517, "learning_rate": 1.3373669540617251e-08, "loss": 0.0014, "step": 5176 }, { "epoch": 9.84, "grad_norm": 0.0896448865532875, "learning_rate": 1.3057213099125777e-08, "loss": 0.0003, "step": 5177 }, { "epoch": 9.84, "grad_norm": 0.027437830343842506, "learning_rate": 1.2744543274675335e-08, "loss": 0.0002, "step": 5178 }, { "epoch": 9.85, "grad_norm": 0.07113165408372879, "learning_rate": 1.2435660185816656e-08, "loss": 0.0005, "step": 5179 }, { "epoch": 9.85, "grad_norm": 0.6631986498832703, "learning_rate": 1.2130563949664943e-08, "loss": 0.0032, "step": 5180 }, { "epoch": 9.85, "grad_norm": 1.1519889831542969, "learning_rate": 1.1829254681898772e-08, "loss": 0.0011, "step": 5181 }, { "epoch": 9.85, "grad_norm": 0.22014819085597992, "learning_rate": 1.1531732496763425e-08, "loss": 0.0007, "step": 5182 }, { "epoch": 9.85, "grad_norm": 0.4270438849925995, "learning_rate": 1.1237997507064224e-08, "loss": 0.0021, "step": 5183 }, { "epoch": 9.86, "grad_norm": 0.255804181098938, "learning_rate": 1.0948049824174301e-08, "loss": 0.0007, "step": 5184 }, { "epoch": 9.86, "grad_norm": 0.04554520919919014, "learning_rate": 1.0661889558029049e-08, "loss": 0.0002, "step": 5185 }, { "epoch": 9.86, "grad_norm": 0.048948317766189575, "learning_rate": 1.0379516817128344e-08, "loss": 0.0003, "step": 5186 }, { "epoch": 9.86, "grad_norm": 0.05308644846081734, "learning_rate": 1.0100931708534322e-08, "loss": 0.0002, "step": 5187 }, { "epoch": 9.86, "grad_norm": 0.3177518844604492, "learning_rate": 9.826134337875826e-09, "loss": 0.0008, "step": 5188 }, { "epoch": 9.87, "grad_norm": 0.5380163192749023, "learning_rate": 9.555124809343952e-09, "loss": 0.0006, "step": 5189 }, { "epoch": 9.87, "grad_norm": 1.3997619152069092, "learning_rate": 9.287903225693174e-09, "loss": 0.0024, "step": 5190 }, { "epoch": 9.87, "grad_norm": 0.05072995647788048, "learning_rate": 9.024469688242443e-09, "loss": 0.0002, "step": 5191 }, { "epoch": 9.87, "grad_norm": 0.0591767281293869, "learning_rate": 8.764824296875196e-09, "loss": 0.0003, "step": 5192 }, { "epoch": 9.87, "grad_norm": 0.040956590324640274, "learning_rate": 8.508967150037128e-09, "loss": 0.0003, "step": 5193 }, { "epoch": 9.87, "grad_norm": 0.18656930327415466, "learning_rate": 8.256898344737307e-09, "loss": 0.0009, "step": 5194 }, { "epoch": 9.88, "grad_norm": 0.1588270366191864, "learning_rate": 8.008617976551502e-09, "loss": 0.001, "step": 5195 }, { "epoch": 9.88, "grad_norm": 0.11501819640398026, "learning_rate": 7.764126139615524e-09, "loss": 0.0007, "step": 5196 }, { "epoch": 9.88, "grad_norm": 0.07431716471910477, "learning_rate": 7.523422926629665e-09, "loss": 0.0004, "step": 5197 }, { "epoch": 9.88, "grad_norm": 0.1380661576986313, "learning_rate": 7.286508428858696e-09, "loss": 0.0004, "step": 5198 }, { "epoch": 9.88, "grad_norm": 0.3674444854259491, "learning_rate": 7.053382736130765e-09, "loss": 0.0018, "step": 5199 }, { "epoch": 9.89, "grad_norm": 0.07710067927837372, "learning_rate": 6.824045936836277e-09, "loss": 0.0003, "step": 5200 }, { "epoch": 9.89, "eval_blimp_filtered_avg": 0.7371641791044776, "eval_blimp_filtered_std": 0.00484188384297847, "step": 5200 }, { "epoch": 9.89, "eval_blimp_supplement_avg": 0.7952586206896551, "eval_blimp_supplement_std": 0.0177088511968717, "step": 5200 }, { "epoch": 9.89, "eval_vqa_filtered_avg": 0.37, "eval_vqa_filtered_std": 0.048523658709391, "step": 5200 }, { "epoch": 9.89, "eval_winoground_filtered_avg": 0.5, "eval_winoground_filtered_std": 0.050251890762960605, "step": 5200 }, { "epoch": 9.89, "grad_norm": 0.4718663990497589, "learning_rate": 6.598498117931229e-09, "loss": 0.0006, "step": 5201 }, { "epoch": 9.89, "grad_norm": 0.1719781905412674, "learning_rate": 6.376739364932772e-09, "loss": 0.0009, "step": 5202 }, { "epoch": 9.89, "grad_norm": 0.045540399849414825, "learning_rate": 6.158769761921424e-09, "loss": 0.0002, "step": 5203 }, { "epoch": 9.89, "grad_norm": 0.09470413625240326, "learning_rate": 5.9445893915421885e-09, "loss": 0.0005, "step": 5204 }, { "epoch": 9.9, "grad_norm": 0.024232104420661926, "learning_rate": 5.734198335004548e-09, "loss": 0.0002, "step": 5205 }, { "epoch": 9.9, "grad_norm": 0.2807256281375885, "learning_rate": 5.527596672078028e-09, "loss": 0.0015, "step": 5206 }, { "epoch": 9.9, "grad_norm": 0.39343810081481934, "learning_rate": 5.324784481096634e-09, "loss": 0.0008, "step": 5207 }, { "epoch": 9.9, "grad_norm": 0.05497434362769127, "learning_rate": 5.125761838959964e-09, "loss": 0.0003, "step": 5208 }, { "epoch": 9.9, "grad_norm": 0.2675904631614685, "learning_rate": 4.930528821126546e-09, "loss": 0.001, "step": 5209 }, { "epoch": 9.9, "grad_norm": 0.24515657126903534, "learning_rate": 4.73908550162272e-09, "loss": 0.0017, "step": 5210 }, { "epoch": 9.91, "grad_norm": 0.16634392738342285, "learning_rate": 4.551431953033758e-09, "loss": 0.0006, "step": 5211 }, { "epoch": 9.91, "grad_norm": 0.07630796730518341, "learning_rate": 4.367568246510523e-09, "loss": 0.0003, "step": 5212 }, { "epoch": 9.91, "grad_norm": 0.26775068044662476, "learning_rate": 4.187494451766138e-09, "loss": 0.0008, "step": 5213 }, { "epoch": 9.91, "grad_norm": 0.21088193356990814, "learning_rate": 4.011210637075991e-09, "loss": 0.002, "step": 5214 }, { "epoch": 9.91, "grad_norm": 0.08956579864025116, "learning_rate": 3.838716869279946e-09, "loss": 0.0002, "step": 5215 }, { "epoch": 9.92, "grad_norm": 0.08904320001602173, "learning_rate": 3.6700132137812426e-09, "loss": 0.0004, "step": 5216 }, { "epoch": 9.92, "grad_norm": 0.25460579991340637, "learning_rate": 3.50509973454316e-09, "loss": 0.0009, "step": 5217 }, { "epoch": 9.92, "grad_norm": 0.18310806155204773, "learning_rate": 3.3439764940934593e-09, "loss": 0.0011, "step": 5218 }, { "epoch": 9.92, "grad_norm": 0.23057261109352112, "learning_rate": 3.1866435535254925e-09, "loss": 0.0021, "step": 5219 }, { "epoch": 9.92, "grad_norm": 0.03760422393679619, "learning_rate": 3.0331009724915426e-09, "loss": 0.0003, "step": 5220 }, { "epoch": 9.93, "grad_norm": 0.29724618792533875, "learning_rate": 2.883348809208375e-09, "loss": 0.0009, "step": 5221 }, { "epoch": 9.93, "grad_norm": 0.07108108699321747, "learning_rate": 2.7373871204561254e-09, "loss": 0.0004, "step": 5222 }, { "epoch": 9.93, "grad_norm": 0.04423976689577103, "learning_rate": 2.595215961576081e-09, "loss": 0.0003, "step": 5223 }, { "epoch": 9.93, "grad_norm": 0.3950737714767456, "learning_rate": 2.4568353864751203e-09, "loss": 0.0003, "step": 5224 }, { "epoch": 9.93, "grad_norm": 0.044259827584028244, "learning_rate": 2.3222454476190538e-09, "loss": 0.0002, "step": 5225 }, { "epoch": 9.94, "grad_norm": 1.0665597915649414, "learning_rate": 2.1914461960403922e-09, "loss": 0.0021, "step": 5226 }, { "epoch": 9.94, "grad_norm": 0.46140047907829285, "learning_rate": 2.064437681331688e-09, "loss": 0.0014, "step": 5227 }, { "epoch": 9.94, "grad_norm": 0.46861112117767334, "learning_rate": 1.941219951648865e-09, "loss": 0.0025, "step": 5228 }, { "epoch": 9.94, "grad_norm": 0.08413451164960861, "learning_rate": 1.8217930537112183e-09, "loss": 0.0004, "step": 5229 }, { "epoch": 9.94, "grad_norm": 0.06641228497028351, "learning_rate": 1.7061570328003042e-09, "loss": 0.0002, "step": 5230 }, { "epoch": 9.94, "grad_norm": 0.012301451526582241, "learning_rate": 1.59431193275994e-09, "loss": 0.0001, "step": 5231 }, { "epoch": 9.95, "grad_norm": 0.1963719129562378, "learning_rate": 1.4862577959973145e-09, "loss": 0.0006, "step": 5232 }, { "epoch": 9.95, "grad_norm": 0.03907772898674011, "learning_rate": 1.3819946634818782e-09, "loss": 0.0003, "step": 5233 }, { "epoch": 9.95, "grad_norm": 0.13678279519081116, "learning_rate": 1.2815225747453418e-09, "loss": 0.0004, "step": 5234 }, { "epoch": 9.95, "grad_norm": 0.01693980023264885, "learning_rate": 1.1848415678827884e-09, "loss": 0.0001, "step": 5235 }, { "epoch": 9.95, "grad_norm": 0.1567397564649582, "learning_rate": 1.0919516795515616e-09, "loss": 0.0003, "step": 5236 }, { "epoch": 9.96, "grad_norm": 0.6244571208953857, "learning_rate": 1.0028529449701562e-09, "loss": 0.0015, "step": 5237 }, { "epoch": 9.96, "grad_norm": 0.057485442608594894, "learning_rate": 9.175453979226589e-10, "loss": 0.0002, "step": 5238 }, { "epoch": 9.96, "grad_norm": 0.07642792910337448, "learning_rate": 8.360290707543073e-10, "loss": 0.0003, "step": 5239 }, { "epoch": 9.96, "grad_norm": 0.040779221802949905, "learning_rate": 7.583039943703796e-10, "loss": 0.0003, "step": 5240 }, { "epoch": 9.96, "grad_norm": 0.06018037348985672, "learning_rate": 6.843701982428564e-10, "loss": 0.0003, "step": 5241 }, { "epoch": 9.97, "grad_norm": 0.13809074461460114, "learning_rate": 6.142277104026484e-10, "loss": 0.0011, "step": 5242 }, { "epoch": 9.97, "grad_norm": 0.5882775187492371, "learning_rate": 5.478765574462586e-10, "loss": 0.0092, "step": 5243 }, { "epoch": 9.97, "grad_norm": 0.16809655725955963, "learning_rate": 4.853167645302304e-10, "loss": 0.0003, "step": 5244 }, { "epoch": 9.97, "grad_norm": 0.21985819935798645, "learning_rate": 4.265483553755889e-10, "loss": 0.0009, "step": 5245 }, { "epoch": 9.97, "grad_norm": 0.11580268293619156, "learning_rate": 3.715713522622899e-10, "loss": 0.0012, "step": 5246 }, { "epoch": 9.98, "grad_norm": 0.15877658128738403, "learning_rate": 3.203857760381013e-10, "loss": 0.0006, "step": 5247 }, { "epoch": 9.98, "grad_norm": 0.11320873349905014, "learning_rate": 2.729916461097215e-10, "loss": 0.0003, "step": 5248 }, { "epoch": 9.98, "grad_norm": 0.3553732633590698, "learning_rate": 2.2938898044611024e-10, "loss": 0.0007, "step": 5249 }, { "epoch": 9.98, "grad_norm": 0.044037654995918274, "learning_rate": 1.8957779557959853e-10, "loss": 0.0002, "step": 5250 }, { "epoch": 9.98, "grad_norm": 0.3807389736175537, "learning_rate": 1.5355810660477864e-10, "loss": 0.0009, "step": 5251 }, { "epoch": 9.98, "grad_norm": 0.2613552212715149, "learning_rate": 1.2132992717961423e-10, "loss": 0.0019, "step": 5252 }, { "epoch": 9.99, "grad_norm": 0.0581224262714386, "learning_rate": 9.289326952321987e-11, "loss": 0.0003, "step": 5253 }, { "epoch": 9.99, "grad_norm": 0.10235191881656647, "learning_rate": 6.824814441808159e-11, "loss": 0.0026, "step": 5254 }, { "epoch": 9.99, "grad_norm": 0.25735583901405334, "learning_rate": 4.739456120672614e-11, "loss": 0.0005, "step": 5255 }, { "epoch": 9.99, "grad_norm": 0.08004412800073624, "learning_rate": 3.0332527798382363e-11, "loss": 0.0002, "step": 5256 }, { "epoch": 9.99, "grad_norm": 0.15413302183151245, "learning_rate": 1.706205066009936e-11, "loss": 0.0007, "step": 5257 }, { "epoch": 10.0, "grad_norm": 0.03276556730270386, "learning_rate": 7.583134824518113e-12, "loss": 0.0001, "step": 5258 }, { "epoch": 10.0, "grad_norm": 0.7509633302688599, "learning_rate": 1.8957838854305464e-12, "loss": 0.0011, "step": 5259 }, { "epoch": 10.0, "grad_norm": 0.14384357631206512, "learning_rate": 0.0, "loss": 0.0005, "step": 5260 }, { "epoch": 10.0, "step": 5260, "total_flos": 1.325353398408577e+18, "train_loss": 0.0462811390658704, "train_runtime": 12690.6804, "train_samples_per_second": 53.015, "train_steps_per_second": 0.414 } ], "logging_steps": 1.0, "max_steps": 5260, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1000, "total_flos": 1.325353398408577e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }